1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build !js && !nacl && !plan9 && !solaris && !windows 6// +build !js,!nacl,!plan9,!solaris,!windows 7 8/* 9 10Splitdwarf uncompresses and copies the DWARF segment of a Mach-O 11executable into the "dSYM" file expected by lldb and ports of gdb 12on OSX. 13 14Usage: splitdwarf osxMachoFile [ osxDsymFile ] 15 16Unless a dSYM file name is provided on the command line, 17splitdwarf will place it where the OSX tools expect it, in 18"<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>", 19creating directories as necessary. 20 21*/ 22package main // import "golang.org/x/tools/cmd/splitdwarf" 23 24import ( 25 "crypto/sha256" 26 "fmt" 27 "io" 28 "os" 29 "path/filepath" 30 "strings" 31 "syscall" 32 33 "golang.org/x/tools/cmd/splitdwarf/internal/macho" 34) 35 36const ( 37 pageAlign = 12 // 4096 = 1 << 12 38) 39 40func note(format string, why ...interface{}) { 41 fmt.Fprintf(os.Stderr, format+"\n", why...) 42} 43 44func fail(format string, why ...interface{}) { 45 note(format, why...) 46 os.Exit(1) 47} 48 49// splitdwarf inputexe [ outputdwarf ] 50func main() { 51 if len(os.Args) < 2 || len(os.Args) > 3 { 52 fmt.Printf(` 53Usage: %s input_exe [ output_dsym ] 54Reads the executable input_exe, uncompresses and copies debugging 55information into output_dsym. If output_dsym is not specified, 56the path 57 input_exe.dSYM/Contents/Resources/DWARF/input_exe 58is used instead. That is the path that gdb and lldb expect 59on OSX. Input_exe needs a UUID segment; if that is missing, 60then one is created and added. In that case, the permissions 61for input_exe need to allow writing. 62`, os.Args[0]) 63 return 64 } 65 66 // Read input, find DWARF, be sure it looks right 67 inputExe := os.Args[1] 68 exeFile, err := os.Open(inputExe) 69 if err != nil { 70 fail("%v", err) 71 } 72 exeMacho, err := macho.NewFile(exeFile) 73 if err != nil { 74 fail("(internal) Couldn't create macho, %v", err) 75 } 76 // Postpone dealing with output till input is known-good 77 78 // describe(&exeMacho.FileTOC) 79 80 // Offsets into __LINKEDIT: 81 // 82 // Command LC_SYMTAB = 83 // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries 84 // struct { 85 // StringTableIndex uint32 86 // Type, SectionIndex uint8 87 // Description uint16 88 // Value uint64 89 // } 90 // 91 // (2) string table offset and size. Strings are zero-byte terminated. First must be " ". 92 // 93 // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym 94 // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab. 95 // 96 // Section __TEXT.__symbol_stub1. 97 // Offset and size (Reserved2) locate and describe a table for thios section. 98 // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table. 99 // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline]) 100 // 101 // Section __DATA.__nl_symbol_ptr. 102 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 103 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 104 // 105 // Section __DATA.__la_symbol_ptr. 106 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 107 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 108 // 109 110 // Create a File for the output dwarf. 111 // Copy header, file type is MH_DSYM 112 // Copy the relevant load commands 113 114 // LoadCmdUuid 115 // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these). 116 // Segment __PAGEZERO 117 // Segment __TEXT (zero the size, zero the offset of each section) 118 // Segment __DATA (zero the size, zero the offset of each section) 119 // Segment __LINKEDIT (contains the symbols and strings from Symtab) 120 // Segment __DWARF (uncompressed) 121 122 var uuid *macho.Uuid 123 for _, l := range exeMacho.Loads { 124 switch l.Command() { 125 case macho.LcUuid: 126 uuid = l.(*macho.Uuid) 127 } 128 } 129 130 // Ensure a given load is not nil 131 nonnilC := func(l macho.Load, s string) { 132 if l == nil { 133 fail("input file %s lacks load command %s", inputExe, s) 134 } 135 } 136 137 // Find a segment by name and ensure it is not nil 138 nonnilS := func(s string) *macho.Segment { 139 l := exeMacho.Segment(s) 140 if l == nil { 141 fail("input file %s lacks segment %s", inputExe, s) 142 } 143 return l 144 } 145 146 newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0) 147 148 symtab := exeMacho.Symtab 149 dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output 150 nonnilC(symtab, "symtab") 151 nonnilC(dysymtab, "dysymtab") 152 text := nonnilS("__TEXT") 153 data := nonnilS("__DATA") 154 linkedit := nonnilS("__LINKEDIT") 155 pagezero := nonnilS("__PAGEZERO") 156 157 newtext := text.CopyZeroed() 158 newdata := data.CopyZeroed() 159 newsymtab := symtab.Copy() 160 161 // Linkedit segment contain symbols and strings; 162 // Symtab refers to offsets into linkedit. 163 // This next bit initializes newsymtab and sets up data structures for the linkedit segment 164 linkeditsyms := []macho.Nlist64{} 165 linkeditstrings := []string{} 166 167 // Linkedit will begin at the second page, i.e., offset is one page from beginning 168 // Symbols come first 169 linkeditsymbase := uint32(1) << pageAlign 170 171 // Strings come second, offset by the number of symbols times their size. 172 // Only those symbols from dysymtab.defsym are written into the debugging information. 173 linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym 174 175 // The first two bytes of the strings are reserved for space, null (' ', \000) 176 linkeditstringcur := uint32(2) 177 178 newsymtab.Syms = newsymtab.Syms[:0] 179 newsymtab.Symoff = linkeditsymbase 180 newsymtab.Stroff = linkeditstringbase 181 newsymtab.Nsyms = dysymtab.Nextdefsym 182 for i := uint32(0); i < dysymtab.Nextdefsym; i++ { 183 ii := i + dysymtab.Iextdefsym 184 oldsym := symtab.Syms[ii] 185 newsymtab.Syms = append(newsymtab.Syms, oldsym) 186 187 linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur), 188 Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value}) 189 linkeditstringcur += uint32(len(oldsym.Name)) + 1 190 linkeditstrings = append(linkeditstrings, oldsym.Name) 191 } 192 newsymtab.Strsize = linkeditstringcur 193 194 exeNeedsUuid := uuid == nil 195 if exeNeedsUuid { 196 uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}} 197 uuid.Len = uuid.LoadSize(newtoc) 198 copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16]) 199 uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3 200 uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1 201 } 202 newtoc.AddLoad(uuid) 203 204 // For the specified segment (assumed to be in exeMacho) make a copy of its 205 // sections with appropriate fields zeroed out, and append them to the 206 // currently-last segment in newtoc. 207 copyZOdSections := func(g *macho.Segment) { 208 for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ { 209 s := exeMacho.Sections[i].Copy() 210 s.Offset = 0 211 s.Reloff = 0 212 s.Nreloc = 0 213 newtoc.AddSection(s) 214 } 215 } 216 217 newtoc.AddLoad(newsymtab) 218 newtoc.AddSegment(pagezero) 219 newtoc.AddSegment(newtext) 220 copyZOdSections(text) 221 newtoc.AddSegment(newdata) 222 copyZOdSections(data) 223 224 newlinkedit := linkedit.Copy() 225 newlinkedit.Offset = uint64(linkeditsymbase) 226 newlinkedit.Filesz = uint64(linkeditstringcur) 227 newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file 228 newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign) 229 // The rest should copy over fine. 230 newtoc.AddSegment(newlinkedit) 231 232 dwarf := nonnilS("__DWARF") 233 newdwarf := dwarf.CopyZeroed() 234 newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign) 235 newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1) 236 newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file. 237 newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign) 238 newtoc.AddSegment(newdwarf) 239 240 // Map out Dwarf sections (that is, this is section descriptors, not their contents). 241 offset := uint32(newdwarf.Offset) 242 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 243 o := exeMacho.Sections[i] 244 s := o.Copy() 245 s.Offset = offset 246 us := o.UncompressedSize() 247 if s.Size < us { 248 s.Size = uint64(us) 249 s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes. 250 } 251 offset += uint32(us) 252 if strings.HasPrefix(s.Name, "__z") { 253 s.Name = "__" + s.Name[3:] // remove "z" 254 } 255 s.Reloff = 0 256 s.Nreloc = 0 257 newtoc.AddSection(s) 258 } 259 260 // Write segments/sections. 261 // Only dwarf and linkedit contain anything interesting. 262 263 // Memory map the output file to get the buffer directly. 264 outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF" 265 if len(os.Args) > 2 { 266 outDwarf = os.Args[2] 267 } else { 268 err := os.MkdirAll(outDwarf, 0755) 269 if err != nil { 270 fail("%v", err) 271 } 272 outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe)) 273 } 274 dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize())) 275 276 // (1) Linkedit segment 277 // Symbol table 278 offset = uint32(newlinkedit.Offset) 279 for i := range linkeditsyms { 280 if exeMacho.Magic == macho.Magic64 { 281 offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder) 282 } else { 283 offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder) 284 } 285 } 286 287 // Initial two bytes of string table, followed by actual zero-terminated strings. 288 buffer[linkeditstringbase] = ' ' 289 buffer[linkeditstringbase+1] = 0 290 offset = linkeditstringbase + 2 291 for _, str := range linkeditstrings { 292 for i := 0; i < len(str); i++ { 293 buffer[offset] = str[i] 294 offset++ 295 } 296 buffer[offset] = 0 297 offset++ 298 } 299 300 // (2) DWARF segment 301 ioff := newdwarf.Firstsect - dwarf.Firstsect 302 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 303 s := exeMacho.Sections[i] 304 j := i + ioff 305 s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:]) 306 } 307 308 // Because "text" overlaps the header and the loads, write them afterwards, just in case. 309 // Write header. 310 newtoc.Put(buffer) 311 312 err = syscall.Munmap(buffer) 313 if err != nil { 314 fail("Munmap %s for dwarf output failed, %v", outDwarf, err) 315 } 316 err = dwarfFile.Close() 317 if err != nil { 318 fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err) 319 } 320 321 if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command 322 hdr := exeMacho.FileTOC.FileHeader 323 oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize() 324 hdr.NCommands += 1 325 hdr.SizeCommands += uuid.LoadSize(newtoc) 326 327 mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0) 328 if err != nil { 329 fail("Updating UUID in binary failed, %v", err) 330 } 331 exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)), 332 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 333 if err != nil { 334 fail("Mmap of %s for UUID update failed, %v", inputExe, err) 335 } 336 _ = hdr.Put(exebuf, newtoc.ByteOrder) 337 _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder) 338 err = syscall.Munmap(exebuf) 339 if err != nil { 340 fail("Munmap of %s for UUID update failed, %v", inputExe, err) 341 } 342 } 343} 344 345// CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file, 346// and returns the file descriptor and mapped buffer. 347func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) { 348 dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) 349 if err != nil { 350 fail("Open for mmap failed, %v", err) 351 } 352 err = os.Truncate(outDwarf, size) 353 if err != nil { 354 fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err) 355 } 356 buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 357 if err != nil { 358 fail("Mmap %s for dwarf output update failed, %v", outDwarf, err) 359 } 360 return dwarfFile, buffer 361} 362 363func describe(exem *macho.FileTOC) { 364 note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags)) 365 for i, l := range exem.Loads { 366 if s, ok := l.(*macho.Segment); ok { 367 fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name, 368 s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect) 369 for j := uint32(0); j < s.Nsect; j++ { 370 c := exem.Sections[j+s.Firstsect] 371 fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3) 372 } 373 } else { 374 fmt.Printf("Load %d is %v\n", i, l) 375 } 376 } 377 if exem.SizeCommands != exem.LoadSize() { 378 fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize()) 379 } else { 380 note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize()) 381 } 382 note("File size is %d", exem.FileSize()) 383} 384 385// contentuuid returns a UUID derived from (some of) the content of an executable. 386// specifically included are the non-DWARF sections, specifically excluded are things 387// that surely depend on the presence or absence of DWARF sections (e.g., section 388// numbers, positions with file, number of load commands). 389// (It was considered desirable if this was insensitive to the presence of the 390// __DWARF segment, however because it is not last, it moves other segments, 391// whose contents appear to contain file offset references.) 392func contentuuid(exem *macho.FileTOC) []byte { 393 h := sha256.New() 394 for _, l := range exem.Loads { 395 if l.Command() == macho.LcUuid { 396 continue 397 } 398 if s, ok := l.(*macho.Segment); ok { 399 if s.Name == "__DWARF" || s.Name == "__PAGEZERO" { 400 continue 401 } 402 for j := uint32(0); j < s.Nsect; j++ { 403 c := exem.Sections[j+s.Firstsect] 404 io.Copy(h, c.Open()) 405 } 406 } // Getting dependence on other load commands right is fiddly. 407 } 408 return h.Sum(nil) 409} 410