1*061da546Spatrick"""
2*061da546SpatrickLLDB AppKit formatters
3*061da546Spatrick
4*061da546SpatrickPart of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5*061da546SpatrickSee https://llvm.org/LICENSE.txt for license information.
6*061da546SpatrickSPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7*061da546Spatrick"""
8*061da546Spatrick# example synthetic children and summary provider for CFString (and related NSString class)
9*061da546Spatrick# the real code is part of the LLDB core
10*061da546Spatrickimport lldb
11*061da546Spatrickimport lldb.runtime.objc.objc_runtime
12*061da546Spatrickimport lldb.formatters.Logger
13*061da546Spatrick
14*061da546Spatricktry:
15*061da546Spatrick    unichr
16*061da546Spatrickexcept NameError:
17*061da546Spatrick    unichr = chr
18*061da546Spatrick
19*061da546Spatrickdef CFString_SummaryProvider(valobj, dict):
20*061da546Spatrick    logger = lldb.formatters.Logger.Logger()
21*061da546Spatrick    provider = CFStringSynthProvider(valobj, dict)
22*061da546Spatrick    if not provider.invalid:
23*061da546Spatrick        try:
24*061da546Spatrick            summary = provider.get_child_at_index(
25*061da546Spatrick                provider.get_child_index("content"))
26*061da546Spatrick            if isinstance(summary, lldb.SBValue):
27*061da546Spatrick                summary = summary.GetSummary()
28*061da546Spatrick            else:
29*061da546Spatrick                summary = '"' + summary + '"'
30*061da546Spatrick        except:
31*061da546Spatrick            summary = None
32*061da546Spatrick        if summary is None:
33*061da546Spatrick            summary = '<variable is not NSString>'
34*061da546Spatrick        return '@' + summary
35*061da546Spatrick    return ''
36*061da546Spatrick
37*061da546Spatrick
38*061da546Spatrickdef CFAttributedString_SummaryProvider(valobj, dict):
39*061da546Spatrick    logger = lldb.formatters.Logger.Logger()
40*061da546Spatrick    offset = valobj.GetTarget().GetProcess().GetAddressByteSize()
41*061da546Spatrick    pointee = valobj.GetValueAsUnsigned(0)
42*061da546Spatrick    summary = '<variable is not NSAttributedString>'
43*061da546Spatrick    if pointee is not None and pointee != 0:
44*061da546Spatrick        pointee = pointee + offset
45*061da546Spatrick        child_ptr = valobj.CreateValueFromAddress(
46*061da546Spatrick            "string_ptr", pointee, valobj.GetType())
47*061da546Spatrick        child = child_ptr.CreateValueFromAddress(
48*061da546Spatrick            "string_data",
49*061da546Spatrick            child_ptr.GetValueAsUnsigned(),
50*061da546Spatrick            valobj.GetType()).AddressOf()
51*061da546Spatrick        provider = CFStringSynthProvider(child, dict)
52*061da546Spatrick        if not provider.invalid:
53*061da546Spatrick            try:
54*061da546Spatrick                summary = provider.get_child_at_index(
55*061da546Spatrick                    provider.get_child_index("content")).GetSummary()
56*061da546Spatrick            except:
57*061da546Spatrick                summary = '<variable is not NSAttributedString>'
58*061da546Spatrick    if summary is None:
59*061da546Spatrick        summary = '<variable is not NSAttributedString>'
60*061da546Spatrick    return '@' + summary
61*061da546Spatrick
62*061da546Spatrick
63*061da546Spatrickdef __lldb_init_module(debugger, dict):
64*061da546Spatrick    debugger.HandleCommand(
65*061da546Spatrick        "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef")
66*061da546Spatrick    debugger.HandleCommand(
67*061da546Spatrick        "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString")
68*061da546Spatrick
69*061da546Spatrick
70*061da546Spatrickclass CFStringSynthProvider:
71*061da546Spatrick
72*061da546Spatrick    def __init__(self, valobj, dict):
73*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
74*061da546Spatrick        self.valobj = valobj
75*061da546Spatrick        self.update()
76*061da546Spatrick
77*061da546Spatrick    # children other than "content" are for debugging only and must not be
78*061da546Spatrick    # used in production code
79*061da546Spatrick    def num_children(self):
80*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
81*061da546Spatrick        if self.invalid:
82*061da546Spatrick            return 0
83*061da546Spatrick        return 6
84*061da546Spatrick
85*061da546Spatrick    def read_unicode(self, pointer, max_len=2048):
86*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
87*061da546Spatrick        process = self.valobj.GetTarget().GetProcess()
88*061da546Spatrick        error = lldb.SBError()
89*061da546Spatrick        pystr = u''
90*061da546Spatrick        # cannot do the read at once because the length value has
91*061da546Spatrick        # a weird encoding. better play it safe here
92*061da546Spatrick        while max_len > 0:
93*061da546Spatrick            content = process.ReadMemory(pointer, 2, error)
94*061da546Spatrick            new_bytes = bytearray(content)
95*061da546Spatrick            b0 = new_bytes[0]
96*061da546Spatrick            b1 = new_bytes[1]
97*061da546Spatrick            pointer = pointer + 2
98*061da546Spatrick            if b0 == 0 and b1 == 0:
99*061da546Spatrick                break
100*061da546Spatrick            # rearrange bytes depending on endianness
101*061da546Spatrick            # (do we really need this or is Cocoa going to
102*061da546Spatrick            #  use Windows-compatible little-endian even
103*061da546Spatrick            #  if the target is big endian?)
104*061da546Spatrick            if self.is_little:
105*061da546Spatrick                value = b1 * 256 + b0
106*061da546Spatrick            else:
107*061da546Spatrick                value = b0 * 256 + b1
108*061da546Spatrick            pystr = pystr + unichr(value)
109*061da546Spatrick            # read max_len unicode values, not max_len bytes
110*061da546Spatrick            max_len = max_len - 1
111*061da546Spatrick        return pystr
112*061da546Spatrick
113*061da546Spatrick    # handle the special case strings
114*061da546Spatrick    # only use the custom code for the tested LP64 case
115*061da546Spatrick    def handle_special(self):
116*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
117*061da546Spatrick        if not self.is_64_bit:
118*061da546Spatrick            # for 32bit targets, use safe ObjC code
119*061da546Spatrick            return self.handle_unicode_string_safe()
120*061da546Spatrick        offset = 12
121*061da546Spatrick        pointer = self.valobj.GetValueAsUnsigned(0) + offset
122*061da546Spatrick        pystr = self.read_unicode(pointer)
123*061da546Spatrick        return self.valobj.CreateValueFromExpression(
124*061da546Spatrick            "content", "(char*)\"" + pystr.encode('utf-8') + "\"")
125*061da546Spatrick
126*061da546Spatrick    # last resort call, use ObjC code to read; the final aim is to
127*061da546Spatrick    # be able to strip this call away entirely and only do the read
128*061da546Spatrick    # ourselves
129*061da546Spatrick    def handle_unicode_string_safe(self):
130*061da546Spatrick        return self.valobj.CreateValueFromExpression(
131*061da546Spatrick            "content", "(char*)\"" + self.valobj.GetObjectDescription() + "\"")
132*061da546Spatrick
133*061da546Spatrick    def handle_unicode_string(self):
134*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
135*061da546Spatrick        # step 1: find offset
136*061da546Spatrick        if self.inline:
137*061da546Spatrick            pointer = self.valobj.GetValueAsUnsigned(
138*061da546Spatrick                0) + self.size_of_cfruntime_base()
139*061da546Spatrick            if not self.explicit:
140*061da546Spatrick                # untested, use the safe code path
141*061da546Spatrick                return self.handle_unicode_string_safe()
142*061da546Spatrick            else:
143*061da546Spatrick                # a full pointer is skipped here before getting to the live
144*061da546Spatrick                # data
145*061da546Spatrick                pointer = pointer + self.pointer_size
146*061da546Spatrick        else:
147*061da546Spatrick            pointer = self.valobj.GetValueAsUnsigned(
148*061da546Spatrick                0) + self.size_of_cfruntime_base()
149*061da546Spatrick            # read 8 bytes here and make an address out of them
150*061da546Spatrick            try:
151*061da546Spatrick                char_type = self.valobj.GetType().GetBasicType(
152*061da546Spatrick                    lldb.eBasicTypeChar).GetPointerType()
153*061da546Spatrick                vopointer = self.valobj.CreateValueFromAddress(
154*061da546Spatrick                    "dummy", pointer, char_type)
155*061da546Spatrick                pointer = vopointer.GetValueAsUnsigned(0)
156*061da546Spatrick            except:
157*061da546Spatrick                return self.valobj.CreateValueFromExpression(
158*061da546Spatrick                    "content", '(char*)"@\"invalid NSString\""')
159*061da546Spatrick        # step 2: read Unicode data at pointer
160*061da546Spatrick        pystr = self.read_unicode(pointer)
161*061da546Spatrick        # step 3: return it
162*061da546Spatrick        return pystr.encode('utf-8')
163*061da546Spatrick
164*061da546Spatrick    def handle_inline_explicit(self):
165*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
166*061da546Spatrick        offset = 3 * self.pointer_size
167*061da546Spatrick        offset = offset + self.valobj.GetValueAsUnsigned(0)
168*061da546Spatrick        return self.valobj.CreateValueFromExpression(
169*061da546Spatrick            "content", "(char*)(" + str(offset) + ")")
170*061da546Spatrick
171*061da546Spatrick    def handle_mutable_string(self):
172*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
173*061da546Spatrick        offset = 2 * self.pointer_size
174*061da546Spatrick        data = self.valobj.CreateChildAtOffset(
175*061da546Spatrick            "content", offset, self.valobj.GetType().GetBasicType(
176*061da546Spatrick                lldb.eBasicTypeChar).GetPointerType())
177*061da546Spatrick        data_value = data.GetValueAsUnsigned(0)
178*061da546Spatrick        if self.explicit and self.unicode:
179*061da546Spatrick            return self.read_unicode(data_value).encode('utf-8')
180*061da546Spatrick        else:
181*061da546Spatrick            data_value = data_value + 1
182*061da546Spatrick            return self.valobj.CreateValueFromExpression(
183*061da546Spatrick                "content", "(char*)(" + str(data_value) + ")")
184*061da546Spatrick
185*061da546Spatrick    def handle_UTF8_inline(self):
186*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
187*061da546Spatrick        offset = self.valobj.GetValueAsUnsigned(
188*061da546Spatrick            0) + self.size_of_cfruntime_base()
189*061da546Spatrick        if not self.explicit:
190*061da546Spatrick            offset = offset + 1
191*061da546Spatrick        return self.valobj.CreateValueFromAddress(
192*061da546Spatrick            "content", offset, self.valobj.GetType().GetBasicType(
193*061da546Spatrick                lldb.eBasicTypeChar)).AddressOf()
194*061da546Spatrick
195*061da546Spatrick    def handle_UTF8_not_inline(self):
196*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
197*061da546Spatrick        offset = self.size_of_cfruntime_base()
198*061da546Spatrick        return self.valobj.CreateChildAtOffset(
199*061da546Spatrick            "content", offset, self.valobj.GetType().GetBasicType(
200*061da546Spatrick                lldb.eBasicTypeChar).GetPointerType())
201*061da546Spatrick
202*061da546Spatrick    def get_child_at_index(self, index):
203*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
204*061da546Spatrick        logger >> "Querying for child [" + str(index) + "]"
205*061da546Spatrick        if index == 0:
206*061da546Spatrick            return self.valobj.CreateValueFromExpression(
207*061da546Spatrick                "mutable", str(int(self.mutable)))
208*061da546Spatrick        if index == 1:
209*061da546Spatrick            return self.valobj.CreateValueFromExpression("inline",
210*061da546Spatrick                                                         str(int(self.inline)))
211*061da546Spatrick        if index == 2:
212*061da546Spatrick            return self.valobj.CreateValueFromExpression(
213*061da546Spatrick                "explicit", str(int(self.explicit)))
214*061da546Spatrick        if index == 3:
215*061da546Spatrick            return self.valobj.CreateValueFromExpression(
216*061da546Spatrick                "unicode", str(int(self.unicode)))
217*061da546Spatrick        if index == 4:
218*061da546Spatrick            return self.valobj.CreateValueFromExpression(
219*061da546Spatrick                "special", str(int(self.special)))
220*061da546Spatrick        if index == 5:
221*061da546Spatrick            # we are handling the several possible combinations of flags.
222*061da546Spatrick            # for each known combination we have a function that knows how to
223*061da546Spatrick            # go fetch the data from memory instead of running code. if a string is not
224*061da546Spatrick            # correctly displayed, one should start by finding a combination of flags that
225*061da546Spatrick            # makes it different from these known cases, and provide a new reader function
226*061da546Spatrick            # if this is not possible, a new flag might have to be made up (like the "special" flag
227*061da546Spatrick            # below, which is not a real flag in CFString), or alternatively one might need to use
228*061da546Spatrick            # the ObjC runtime helper to detect the new class and deal with it accordingly
229*061da546Spatrick            # print 'mutable = ' + str(self.mutable)
230*061da546Spatrick            # print 'inline = ' + str(self.inline)
231*061da546Spatrick            # print 'explicit = ' + str(self.explicit)
232*061da546Spatrick            # print 'unicode = ' + str(self.unicode)
233*061da546Spatrick            # print 'special = ' + str(self.special)
234*061da546Spatrick            if self.mutable:
235*061da546Spatrick                return self.handle_mutable_string()
236*061da546Spatrick            elif self.inline and self.explicit and \
237*061da546Spatrick                    self.unicode == False and self.special == False and \
238*061da546Spatrick                    self.mutable == False:
239*061da546Spatrick                return self.handle_inline_explicit()
240*061da546Spatrick            elif self.unicode:
241*061da546Spatrick                return self.handle_unicode_string()
242*061da546Spatrick            elif self.special:
243*061da546Spatrick                return self.handle_special()
244*061da546Spatrick            elif self.inline:
245*061da546Spatrick                return self.handle_UTF8_inline()
246*061da546Spatrick            else:
247*061da546Spatrick                return self.handle_UTF8_not_inline()
248*061da546Spatrick
249*061da546Spatrick    def get_child_index(self, name):
250*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
251*061da546Spatrick        logger >> "Querying for child ['" + str(name) + "']"
252*061da546Spatrick        if name == "content":
253*061da546Spatrick            return self.num_children() - 1
254*061da546Spatrick        if name == "mutable":
255*061da546Spatrick            return 0
256*061da546Spatrick        if name == "inline":
257*061da546Spatrick            return 1
258*061da546Spatrick        if name == "explicit":
259*061da546Spatrick            return 2
260*061da546Spatrick        if name == "unicode":
261*061da546Spatrick            return 3
262*061da546Spatrick        if name == "special":
263*061da546Spatrick            return 4
264*061da546Spatrick
265*061da546Spatrick    # CFRuntimeBase is defined as having an additional
266*061da546Spatrick    # 4 bytes (padding?) on LP64 architectures
267*061da546Spatrick    # to get its size we add up sizeof(pointer)+4
268*061da546Spatrick    # and then add 4 more bytes if we are on a 64bit system
269*061da546Spatrick    def size_of_cfruntime_base(self):
270*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
271*061da546Spatrick        return self.pointer_size + 4 + (4 if self.is_64_bit else 0)
272*061da546Spatrick
273*061da546Spatrick    # the info bits are part of the CFRuntimeBase structure
274*061da546Spatrick    # to get at them we have to skip a uintptr_t and then get
275*061da546Spatrick    # at the least-significant byte of a 4 byte array. If we are
276*061da546Spatrick    # on big-endian this means going to byte 3, if we are on
277*061da546Spatrick    # little endian (OSX & iOS), this means reading byte 0
278*061da546Spatrick    def offset_of_info_bits(self):
279*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
280*061da546Spatrick        offset = self.pointer_size
281*061da546Spatrick        if not self.is_little:
282*061da546Spatrick            offset = offset + 3
283*061da546Spatrick        return offset
284*061da546Spatrick
285*061da546Spatrick    def read_info_bits(self):
286*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
287*061da546Spatrick        cfinfo = self.valobj.CreateChildAtOffset(
288*061da546Spatrick            "cfinfo",
289*061da546Spatrick            self.offset_of_info_bits(),
290*061da546Spatrick            self.valobj.GetType().GetBasicType(
291*061da546Spatrick                lldb.eBasicTypeChar))
292*061da546Spatrick        cfinfo.SetFormat(11)
293*061da546Spatrick        info = cfinfo.GetValue()
294*061da546Spatrick        if info is not None:
295*061da546Spatrick            self.invalid = False
296*061da546Spatrick            return int(info, 0)
297*061da546Spatrick        else:
298*061da546Spatrick            self.invalid = True
299*061da546Spatrick            return None
300*061da546Spatrick
301*061da546Spatrick    # calculating internal flag bits of the CFString object
302*061da546Spatrick    # this stuff is defined and discussed in CFString.c
303*061da546Spatrick    def is_mutable(self):
304*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
305*061da546Spatrick        return (self.info_bits & 1) == 1
306*061da546Spatrick
307*061da546Spatrick    def is_inline(self):
308*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
309*061da546Spatrick        return (self.info_bits & 0x60) == 0
310*061da546Spatrick
311*061da546Spatrick    # this flag's name is ambiguous, it turns out
312*061da546Spatrick    # we must skip a length byte to get at the data
313*061da546Spatrick    # when this flag is False
314*061da546Spatrick    def has_explicit_length(self):
315*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
316*061da546Spatrick        return (self.info_bits & (1 | 4)) != 4
317*061da546Spatrick
318*061da546Spatrick    # probably a subclass of NSString. obtained this from [str pathExtension]
319*061da546Spatrick    # here info_bits = 0 and Unicode data at the start of the padding word
320*061da546Spatrick    # in the long run using the isa value might be safer as a way to identify this
321*061da546Spatrick    # instead of reading the info_bits
322*061da546Spatrick    def is_special_case(self):
323*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
324*061da546Spatrick        return self.info_bits == 0
325*061da546Spatrick
326*061da546Spatrick    def is_unicode(self):
327*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
328*061da546Spatrick        return (self.info_bits & 0x10) == 0x10
329*061da546Spatrick
330*061da546Spatrick    # preparing ourselves to read into memory
331*061da546Spatrick    # by adjusting architecture-specific info
332*061da546Spatrick    def adjust_for_architecture(self):
333*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
334*061da546Spatrick        self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize()
335*061da546Spatrick        self.is_64_bit = self.pointer_size == 8
336*061da546Spatrick        self.is_little = self.valobj.GetTarget().GetProcess(
337*061da546Spatrick        ).GetByteOrder() == lldb.eByteOrderLittle
338*061da546Spatrick
339*061da546Spatrick    # reading info bits out of the CFString and computing
340*061da546Spatrick    # useful values to get at the real data
341*061da546Spatrick    def compute_flags(self):
342*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
343*061da546Spatrick        self.info_bits = self.read_info_bits()
344*061da546Spatrick        if self.info_bits is None:
345*061da546Spatrick            return
346*061da546Spatrick        self.mutable = self.is_mutable()
347*061da546Spatrick        self.inline = self.is_inline()
348*061da546Spatrick        self.explicit = self.has_explicit_length()
349*061da546Spatrick        self.unicode = self.is_unicode()
350*061da546Spatrick        self.special = self.is_special_case()
351*061da546Spatrick
352*061da546Spatrick    def update(self):
353*061da546Spatrick        logger = lldb.formatters.Logger.Logger()
354*061da546Spatrick        self.adjust_for_architecture()
355*061da546Spatrick        self.compute_flags()
356