1*061da546Spatrick""" 2*061da546SpatrickLLDB AppKit formatters 3*061da546Spatrick 4*061da546SpatrickPart of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5*061da546SpatrickSee https://llvm.org/LICENSE.txt for license information. 6*061da546SpatrickSPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7*061da546Spatrick""" 8*061da546Spatrick# example synthetic children and summary provider for CFString (and related NSString class) 9*061da546Spatrick# the real code is part of the LLDB core 10*061da546Spatrickimport lldb 11*061da546Spatrickimport lldb.runtime.objc.objc_runtime 12*061da546Spatrickimport lldb.formatters.Logger 13*061da546Spatrick 14*061da546Spatricktry: 15*061da546Spatrick unichr 16*061da546Spatrickexcept NameError: 17*061da546Spatrick unichr = chr 18*061da546Spatrick 19*061da546Spatrickdef CFString_SummaryProvider(valobj, dict): 20*061da546Spatrick logger = lldb.formatters.Logger.Logger() 21*061da546Spatrick provider = CFStringSynthProvider(valobj, dict) 22*061da546Spatrick if not provider.invalid: 23*061da546Spatrick try: 24*061da546Spatrick summary = provider.get_child_at_index( 25*061da546Spatrick provider.get_child_index("content")) 26*061da546Spatrick if isinstance(summary, lldb.SBValue): 27*061da546Spatrick summary = summary.GetSummary() 28*061da546Spatrick else: 29*061da546Spatrick summary = '"' + summary + '"' 30*061da546Spatrick except: 31*061da546Spatrick summary = None 32*061da546Spatrick if summary is None: 33*061da546Spatrick summary = '<variable is not NSString>' 34*061da546Spatrick return '@' + summary 35*061da546Spatrick return '' 36*061da546Spatrick 37*061da546Spatrick 38*061da546Spatrickdef CFAttributedString_SummaryProvider(valobj, dict): 39*061da546Spatrick logger = lldb.formatters.Logger.Logger() 40*061da546Spatrick offset = valobj.GetTarget().GetProcess().GetAddressByteSize() 41*061da546Spatrick pointee = valobj.GetValueAsUnsigned(0) 42*061da546Spatrick summary = '<variable is not NSAttributedString>' 43*061da546Spatrick if pointee is not None and pointee != 0: 44*061da546Spatrick pointee = pointee + offset 45*061da546Spatrick child_ptr = valobj.CreateValueFromAddress( 46*061da546Spatrick "string_ptr", pointee, valobj.GetType()) 47*061da546Spatrick child = child_ptr.CreateValueFromAddress( 48*061da546Spatrick "string_data", 49*061da546Spatrick child_ptr.GetValueAsUnsigned(), 50*061da546Spatrick valobj.GetType()).AddressOf() 51*061da546Spatrick provider = CFStringSynthProvider(child, dict) 52*061da546Spatrick if not provider.invalid: 53*061da546Spatrick try: 54*061da546Spatrick summary = provider.get_child_at_index( 55*061da546Spatrick provider.get_child_index("content")).GetSummary() 56*061da546Spatrick except: 57*061da546Spatrick summary = '<variable is not NSAttributedString>' 58*061da546Spatrick if summary is None: 59*061da546Spatrick summary = '<variable is not NSAttributedString>' 60*061da546Spatrick return '@' + summary 61*061da546Spatrick 62*061da546Spatrick 63*061da546Spatrickdef __lldb_init_module(debugger, dict): 64*061da546Spatrick debugger.HandleCommand( 65*061da546Spatrick "type summary add -F CFString.CFString_SummaryProvider NSString CFStringRef CFMutableStringRef") 66*061da546Spatrick debugger.HandleCommand( 67*061da546Spatrick "type summary add -F CFString.CFAttributedString_SummaryProvider NSAttributedString") 68*061da546Spatrick 69*061da546Spatrick 70*061da546Spatrickclass CFStringSynthProvider: 71*061da546Spatrick 72*061da546Spatrick def __init__(self, valobj, dict): 73*061da546Spatrick logger = lldb.formatters.Logger.Logger() 74*061da546Spatrick self.valobj = valobj 75*061da546Spatrick self.update() 76*061da546Spatrick 77*061da546Spatrick # children other than "content" are for debugging only and must not be 78*061da546Spatrick # used in production code 79*061da546Spatrick def num_children(self): 80*061da546Spatrick logger = lldb.formatters.Logger.Logger() 81*061da546Spatrick if self.invalid: 82*061da546Spatrick return 0 83*061da546Spatrick return 6 84*061da546Spatrick 85*061da546Spatrick def read_unicode(self, pointer, max_len=2048): 86*061da546Spatrick logger = lldb.formatters.Logger.Logger() 87*061da546Spatrick process = self.valobj.GetTarget().GetProcess() 88*061da546Spatrick error = lldb.SBError() 89*061da546Spatrick pystr = u'' 90*061da546Spatrick # cannot do the read at once because the length value has 91*061da546Spatrick # a weird encoding. better play it safe here 92*061da546Spatrick while max_len > 0: 93*061da546Spatrick content = process.ReadMemory(pointer, 2, error) 94*061da546Spatrick new_bytes = bytearray(content) 95*061da546Spatrick b0 = new_bytes[0] 96*061da546Spatrick b1 = new_bytes[1] 97*061da546Spatrick pointer = pointer + 2 98*061da546Spatrick if b0 == 0 and b1 == 0: 99*061da546Spatrick break 100*061da546Spatrick # rearrange bytes depending on endianness 101*061da546Spatrick # (do we really need this or is Cocoa going to 102*061da546Spatrick # use Windows-compatible little-endian even 103*061da546Spatrick # if the target is big endian?) 104*061da546Spatrick if self.is_little: 105*061da546Spatrick value = b1 * 256 + b0 106*061da546Spatrick else: 107*061da546Spatrick value = b0 * 256 + b1 108*061da546Spatrick pystr = pystr + unichr(value) 109*061da546Spatrick # read max_len unicode values, not max_len bytes 110*061da546Spatrick max_len = max_len - 1 111*061da546Spatrick return pystr 112*061da546Spatrick 113*061da546Spatrick # handle the special case strings 114*061da546Spatrick # only use the custom code for the tested LP64 case 115*061da546Spatrick def handle_special(self): 116*061da546Spatrick logger = lldb.formatters.Logger.Logger() 117*061da546Spatrick if not self.is_64_bit: 118*061da546Spatrick # for 32bit targets, use safe ObjC code 119*061da546Spatrick return self.handle_unicode_string_safe() 120*061da546Spatrick offset = 12 121*061da546Spatrick pointer = self.valobj.GetValueAsUnsigned(0) + offset 122*061da546Spatrick pystr = self.read_unicode(pointer) 123*061da546Spatrick return self.valobj.CreateValueFromExpression( 124*061da546Spatrick "content", "(char*)\"" + pystr.encode('utf-8') + "\"") 125*061da546Spatrick 126*061da546Spatrick # last resort call, use ObjC code to read; the final aim is to 127*061da546Spatrick # be able to strip this call away entirely and only do the read 128*061da546Spatrick # ourselves 129*061da546Spatrick def handle_unicode_string_safe(self): 130*061da546Spatrick return self.valobj.CreateValueFromExpression( 131*061da546Spatrick "content", "(char*)\"" + self.valobj.GetObjectDescription() + "\"") 132*061da546Spatrick 133*061da546Spatrick def handle_unicode_string(self): 134*061da546Spatrick logger = lldb.formatters.Logger.Logger() 135*061da546Spatrick # step 1: find offset 136*061da546Spatrick if self.inline: 137*061da546Spatrick pointer = self.valobj.GetValueAsUnsigned( 138*061da546Spatrick 0) + self.size_of_cfruntime_base() 139*061da546Spatrick if not self.explicit: 140*061da546Spatrick # untested, use the safe code path 141*061da546Spatrick return self.handle_unicode_string_safe() 142*061da546Spatrick else: 143*061da546Spatrick # a full pointer is skipped here before getting to the live 144*061da546Spatrick # data 145*061da546Spatrick pointer = pointer + self.pointer_size 146*061da546Spatrick else: 147*061da546Spatrick pointer = self.valobj.GetValueAsUnsigned( 148*061da546Spatrick 0) + self.size_of_cfruntime_base() 149*061da546Spatrick # read 8 bytes here and make an address out of them 150*061da546Spatrick try: 151*061da546Spatrick char_type = self.valobj.GetType().GetBasicType( 152*061da546Spatrick lldb.eBasicTypeChar).GetPointerType() 153*061da546Spatrick vopointer = self.valobj.CreateValueFromAddress( 154*061da546Spatrick "dummy", pointer, char_type) 155*061da546Spatrick pointer = vopointer.GetValueAsUnsigned(0) 156*061da546Spatrick except: 157*061da546Spatrick return self.valobj.CreateValueFromExpression( 158*061da546Spatrick "content", '(char*)"@\"invalid NSString\""') 159*061da546Spatrick # step 2: read Unicode data at pointer 160*061da546Spatrick pystr = self.read_unicode(pointer) 161*061da546Spatrick # step 3: return it 162*061da546Spatrick return pystr.encode('utf-8') 163*061da546Spatrick 164*061da546Spatrick def handle_inline_explicit(self): 165*061da546Spatrick logger = lldb.formatters.Logger.Logger() 166*061da546Spatrick offset = 3 * self.pointer_size 167*061da546Spatrick offset = offset + self.valobj.GetValueAsUnsigned(0) 168*061da546Spatrick return self.valobj.CreateValueFromExpression( 169*061da546Spatrick "content", "(char*)(" + str(offset) + ")") 170*061da546Spatrick 171*061da546Spatrick def handle_mutable_string(self): 172*061da546Spatrick logger = lldb.formatters.Logger.Logger() 173*061da546Spatrick offset = 2 * self.pointer_size 174*061da546Spatrick data = self.valobj.CreateChildAtOffset( 175*061da546Spatrick "content", offset, self.valobj.GetType().GetBasicType( 176*061da546Spatrick lldb.eBasicTypeChar).GetPointerType()) 177*061da546Spatrick data_value = data.GetValueAsUnsigned(0) 178*061da546Spatrick if self.explicit and self.unicode: 179*061da546Spatrick return self.read_unicode(data_value).encode('utf-8') 180*061da546Spatrick else: 181*061da546Spatrick data_value = data_value + 1 182*061da546Spatrick return self.valobj.CreateValueFromExpression( 183*061da546Spatrick "content", "(char*)(" + str(data_value) + ")") 184*061da546Spatrick 185*061da546Spatrick def handle_UTF8_inline(self): 186*061da546Spatrick logger = lldb.formatters.Logger.Logger() 187*061da546Spatrick offset = self.valobj.GetValueAsUnsigned( 188*061da546Spatrick 0) + self.size_of_cfruntime_base() 189*061da546Spatrick if not self.explicit: 190*061da546Spatrick offset = offset + 1 191*061da546Spatrick return self.valobj.CreateValueFromAddress( 192*061da546Spatrick "content", offset, self.valobj.GetType().GetBasicType( 193*061da546Spatrick lldb.eBasicTypeChar)).AddressOf() 194*061da546Spatrick 195*061da546Spatrick def handle_UTF8_not_inline(self): 196*061da546Spatrick logger = lldb.formatters.Logger.Logger() 197*061da546Spatrick offset = self.size_of_cfruntime_base() 198*061da546Spatrick return self.valobj.CreateChildAtOffset( 199*061da546Spatrick "content", offset, self.valobj.GetType().GetBasicType( 200*061da546Spatrick lldb.eBasicTypeChar).GetPointerType()) 201*061da546Spatrick 202*061da546Spatrick def get_child_at_index(self, index): 203*061da546Spatrick logger = lldb.formatters.Logger.Logger() 204*061da546Spatrick logger >> "Querying for child [" + str(index) + "]" 205*061da546Spatrick if index == 0: 206*061da546Spatrick return self.valobj.CreateValueFromExpression( 207*061da546Spatrick "mutable", str(int(self.mutable))) 208*061da546Spatrick if index == 1: 209*061da546Spatrick return self.valobj.CreateValueFromExpression("inline", 210*061da546Spatrick str(int(self.inline))) 211*061da546Spatrick if index == 2: 212*061da546Spatrick return self.valobj.CreateValueFromExpression( 213*061da546Spatrick "explicit", str(int(self.explicit))) 214*061da546Spatrick if index == 3: 215*061da546Spatrick return self.valobj.CreateValueFromExpression( 216*061da546Spatrick "unicode", str(int(self.unicode))) 217*061da546Spatrick if index == 4: 218*061da546Spatrick return self.valobj.CreateValueFromExpression( 219*061da546Spatrick "special", str(int(self.special))) 220*061da546Spatrick if index == 5: 221*061da546Spatrick # we are handling the several possible combinations of flags. 222*061da546Spatrick # for each known combination we have a function that knows how to 223*061da546Spatrick # go fetch the data from memory instead of running code. if a string is not 224*061da546Spatrick # correctly displayed, one should start by finding a combination of flags that 225*061da546Spatrick # makes it different from these known cases, and provide a new reader function 226*061da546Spatrick # if this is not possible, a new flag might have to be made up (like the "special" flag 227*061da546Spatrick # below, which is not a real flag in CFString), or alternatively one might need to use 228*061da546Spatrick # the ObjC runtime helper to detect the new class and deal with it accordingly 229*061da546Spatrick # print 'mutable = ' + str(self.mutable) 230*061da546Spatrick # print 'inline = ' + str(self.inline) 231*061da546Spatrick # print 'explicit = ' + str(self.explicit) 232*061da546Spatrick # print 'unicode = ' + str(self.unicode) 233*061da546Spatrick # print 'special = ' + str(self.special) 234*061da546Spatrick if self.mutable: 235*061da546Spatrick return self.handle_mutable_string() 236*061da546Spatrick elif self.inline and self.explicit and \ 237*061da546Spatrick self.unicode == False and self.special == False and \ 238*061da546Spatrick self.mutable == False: 239*061da546Spatrick return self.handle_inline_explicit() 240*061da546Spatrick elif self.unicode: 241*061da546Spatrick return self.handle_unicode_string() 242*061da546Spatrick elif self.special: 243*061da546Spatrick return self.handle_special() 244*061da546Spatrick elif self.inline: 245*061da546Spatrick return self.handle_UTF8_inline() 246*061da546Spatrick else: 247*061da546Spatrick return self.handle_UTF8_not_inline() 248*061da546Spatrick 249*061da546Spatrick def get_child_index(self, name): 250*061da546Spatrick logger = lldb.formatters.Logger.Logger() 251*061da546Spatrick logger >> "Querying for child ['" + str(name) + "']" 252*061da546Spatrick if name == "content": 253*061da546Spatrick return self.num_children() - 1 254*061da546Spatrick if name == "mutable": 255*061da546Spatrick return 0 256*061da546Spatrick if name == "inline": 257*061da546Spatrick return 1 258*061da546Spatrick if name == "explicit": 259*061da546Spatrick return 2 260*061da546Spatrick if name == "unicode": 261*061da546Spatrick return 3 262*061da546Spatrick if name == "special": 263*061da546Spatrick return 4 264*061da546Spatrick 265*061da546Spatrick # CFRuntimeBase is defined as having an additional 266*061da546Spatrick # 4 bytes (padding?) on LP64 architectures 267*061da546Spatrick # to get its size we add up sizeof(pointer)+4 268*061da546Spatrick # and then add 4 more bytes if we are on a 64bit system 269*061da546Spatrick def size_of_cfruntime_base(self): 270*061da546Spatrick logger = lldb.formatters.Logger.Logger() 271*061da546Spatrick return self.pointer_size + 4 + (4 if self.is_64_bit else 0) 272*061da546Spatrick 273*061da546Spatrick # the info bits are part of the CFRuntimeBase structure 274*061da546Spatrick # to get at them we have to skip a uintptr_t and then get 275*061da546Spatrick # at the least-significant byte of a 4 byte array. If we are 276*061da546Spatrick # on big-endian this means going to byte 3, if we are on 277*061da546Spatrick # little endian (OSX & iOS), this means reading byte 0 278*061da546Spatrick def offset_of_info_bits(self): 279*061da546Spatrick logger = lldb.formatters.Logger.Logger() 280*061da546Spatrick offset = self.pointer_size 281*061da546Spatrick if not self.is_little: 282*061da546Spatrick offset = offset + 3 283*061da546Spatrick return offset 284*061da546Spatrick 285*061da546Spatrick def read_info_bits(self): 286*061da546Spatrick logger = lldb.formatters.Logger.Logger() 287*061da546Spatrick cfinfo = self.valobj.CreateChildAtOffset( 288*061da546Spatrick "cfinfo", 289*061da546Spatrick self.offset_of_info_bits(), 290*061da546Spatrick self.valobj.GetType().GetBasicType( 291*061da546Spatrick lldb.eBasicTypeChar)) 292*061da546Spatrick cfinfo.SetFormat(11) 293*061da546Spatrick info = cfinfo.GetValue() 294*061da546Spatrick if info is not None: 295*061da546Spatrick self.invalid = False 296*061da546Spatrick return int(info, 0) 297*061da546Spatrick else: 298*061da546Spatrick self.invalid = True 299*061da546Spatrick return None 300*061da546Spatrick 301*061da546Spatrick # calculating internal flag bits of the CFString object 302*061da546Spatrick # this stuff is defined and discussed in CFString.c 303*061da546Spatrick def is_mutable(self): 304*061da546Spatrick logger = lldb.formatters.Logger.Logger() 305*061da546Spatrick return (self.info_bits & 1) == 1 306*061da546Spatrick 307*061da546Spatrick def is_inline(self): 308*061da546Spatrick logger = lldb.formatters.Logger.Logger() 309*061da546Spatrick return (self.info_bits & 0x60) == 0 310*061da546Spatrick 311*061da546Spatrick # this flag's name is ambiguous, it turns out 312*061da546Spatrick # we must skip a length byte to get at the data 313*061da546Spatrick # when this flag is False 314*061da546Spatrick def has_explicit_length(self): 315*061da546Spatrick logger = lldb.formatters.Logger.Logger() 316*061da546Spatrick return (self.info_bits & (1 | 4)) != 4 317*061da546Spatrick 318*061da546Spatrick # probably a subclass of NSString. obtained this from [str pathExtension] 319*061da546Spatrick # here info_bits = 0 and Unicode data at the start of the padding word 320*061da546Spatrick # in the long run using the isa value might be safer as a way to identify this 321*061da546Spatrick # instead of reading the info_bits 322*061da546Spatrick def is_special_case(self): 323*061da546Spatrick logger = lldb.formatters.Logger.Logger() 324*061da546Spatrick return self.info_bits == 0 325*061da546Spatrick 326*061da546Spatrick def is_unicode(self): 327*061da546Spatrick logger = lldb.formatters.Logger.Logger() 328*061da546Spatrick return (self.info_bits & 0x10) == 0x10 329*061da546Spatrick 330*061da546Spatrick # preparing ourselves to read into memory 331*061da546Spatrick # by adjusting architecture-specific info 332*061da546Spatrick def adjust_for_architecture(self): 333*061da546Spatrick logger = lldb.formatters.Logger.Logger() 334*061da546Spatrick self.pointer_size = self.valobj.GetTarget().GetProcess().GetAddressByteSize() 335*061da546Spatrick self.is_64_bit = self.pointer_size == 8 336*061da546Spatrick self.is_little = self.valobj.GetTarget().GetProcess( 337*061da546Spatrick ).GetByteOrder() == lldb.eByteOrderLittle 338*061da546Spatrick 339*061da546Spatrick # reading info bits out of the CFString and computing 340*061da546Spatrick # useful values to get at the real data 341*061da546Spatrick def compute_flags(self): 342*061da546Spatrick logger = lldb.formatters.Logger.Logger() 343*061da546Spatrick self.info_bits = self.read_info_bits() 344*061da546Spatrick if self.info_bits is None: 345*061da546Spatrick return 346*061da546Spatrick self.mutable = self.is_mutable() 347*061da546Spatrick self.inline = self.is_inline() 348*061da546Spatrick self.explicit = self.has_explicit_length() 349*061da546Spatrick self.unicode = self.is_unicode() 350*061da546Spatrick self.special = self.is_special_case() 351*061da546Spatrick 352*061da546Spatrick def update(self): 353*061da546Spatrick logger = lldb.formatters.Logger.Logger() 354*061da546Spatrick self.adjust_for_architecture() 355*061da546Spatrick self.compute_flags() 356