1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !js && !nacl && !plan9 && !solaris && !windows
6// +build !js,!nacl,!plan9,!solaris,!windows
7
8/*
9
10Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
11executable into the "dSYM" file expected by lldb and ports of gdb
12on OSX.
13
14Usage: splitdwarf osxMachoFile [ osxDsymFile ]
15
16Unless a dSYM file name is provided on the command line,
17splitdwarf will place it where the OSX tools expect it, in
18"<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
19creating directories as necessary.
20
21*/
22package main // import "golang.org/x/tools/cmd/splitdwarf"
23
24import (
25	"crypto/sha256"
26	"fmt"
27	"io"
28	"os"
29	"path/filepath"
30	"strings"
31	"syscall"
32
33	"golang.org/x/tools/cmd/splitdwarf/internal/macho"
34)
35
36const (
37	pageAlign = 12 // 4096 = 1 << 12
38)
39
40func note(format string, why ...interface{}) {
41	fmt.Fprintf(os.Stderr, format+"\n", why...)
42}
43
44func fail(format string, why ...interface{}) {
45	note(format, why...)
46	os.Exit(1)
47}
48
49// splitdwarf inputexe [ outputdwarf ]
50func main() {
51	if len(os.Args) < 2 || len(os.Args) > 3 {
52		fmt.Printf(`
53Usage: %s input_exe [ output_dsym ]
54Reads the executable input_exe, uncompresses and copies debugging
55information into output_dsym. If output_dsym is not specified,
56the path
57      input_exe.dSYM/Contents/Resources/DWARF/input_exe
58is used instead.  That is the path that gdb and lldb expect
59on OSX.  Input_exe needs a UUID segment; if that is missing,
60then one is created and added.  In that case, the permissions
61for input_exe need to allow writing.
62`, os.Args[0])
63		return
64	}
65
66	// Read input, find DWARF, be sure it looks right
67	inputExe := os.Args[1]
68	exeFile, err := os.Open(inputExe)
69	if err != nil {
70		fail("%v", err)
71	}
72	exeMacho, err := macho.NewFile(exeFile)
73	if err != nil {
74		fail("(internal) Couldn't create macho, %v", err)
75	}
76	// Postpone dealing with output till input is known-good
77
78	// describe(&exeMacho.FileTOC)
79
80	// Offsets into __LINKEDIT:
81	//
82	// Command LC_SYMTAB =
83	//  (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
84	// struct {
85	//  StringTableIndex uint32
86	//  Type, SectionIndex uint8
87	//  Description uint16
88	//  Value uint64
89	// }
90	//
91	// (2) string table offset and size.  Strings are zero-byte terminated.  First must be " ".
92	//
93	// Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
94	//   IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
95	//
96	// Section __TEXT.__symbol_stub1.
97	//   Offset and size (Reserved2) locate and describe a table for thios section.
98	//   Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
99	//   (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
100	//
101	// Section __DATA.__nl_symbol_ptr.
102	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
103	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
104	//
105	// Section __DATA.__la_symbol_ptr.
106	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
107	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
108	//
109
110	// Create a File for the output dwarf.
111	// Copy header, file type is MH_DSYM
112	// Copy the relevant load commands
113
114	// LoadCmdUuid
115	// Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
116	// Segment __PAGEZERO
117	// Segment __TEXT (zero the size, zero the offset of each section)
118	// Segment __DATA (zero the size, zero the offset of each section)
119	// Segment __LINKEDIT (contains the symbols and strings from Symtab)
120	// Segment __DWARF (uncompressed)
121
122	var uuid *macho.Uuid
123	for _, l := range exeMacho.Loads {
124		switch l.Command() {
125		case macho.LcUuid:
126			uuid = l.(*macho.Uuid)
127		}
128	}
129
130	// Ensure a given load is not nil
131	nonnilC := func(l macho.Load, s string) {
132		if l == nil {
133			fail("input file %s lacks load command %s", inputExe, s)
134		}
135	}
136
137	// Find a segment by name and ensure it is not nil
138	nonnilS := func(s string) *macho.Segment {
139		l := exeMacho.Segment(s)
140		if l == nil {
141			fail("input file %s lacks segment %s", inputExe, s)
142		}
143		return l
144	}
145
146	newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
147
148	symtab := exeMacho.Symtab
149	dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
150	nonnilC(symtab, "symtab")
151	nonnilC(dysymtab, "dysymtab")
152	text := nonnilS("__TEXT")
153	data := nonnilS("__DATA")
154	linkedit := nonnilS("__LINKEDIT")
155	pagezero := nonnilS("__PAGEZERO")
156
157	newtext := text.CopyZeroed()
158	newdata := data.CopyZeroed()
159	newsymtab := symtab.Copy()
160
161	// Linkedit segment contain symbols and strings;
162	// Symtab refers to offsets into linkedit.
163	// This next bit initializes newsymtab and sets up data structures for the linkedit segment
164	linkeditsyms := []macho.Nlist64{}
165	linkeditstrings := []string{}
166
167	// Linkedit will begin at the second page, i.e., offset is one page from beginning
168	// Symbols come first
169	linkeditsymbase := uint32(1) << pageAlign
170
171	// Strings come second, offset by the number of symbols times their size.
172	// Only those symbols from dysymtab.defsym are written into the debugging information.
173	linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
174
175	// The first two bytes of the strings are reserved for space, null (' ', \000)
176	linkeditstringcur := uint32(2)
177
178	newsymtab.Syms = newsymtab.Syms[:0]
179	newsymtab.Symoff = linkeditsymbase
180	newsymtab.Stroff = linkeditstringbase
181	newsymtab.Nsyms = dysymtab.Nextdefsym
182	for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
183		ii := i + dysymtab.Iextdefsym
184		oldsym := symtab.Syms[ii]
185		newsymtab.Syms = append(newsymtab.Syms, oldsym)
186
187		linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
188			Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
189		linkeditstringcur += uint32(len(oldsym.Name)) + 1
190		linkeditstrings = append(linkeditstrings, oldsym.Name)
191	}
192	newsymtab.Strsize = linkeditstringcur
193
194	exeNeedsUuid := uuid == nil
195	if exeNeedsUuid {
196		uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
197		uuid.Len = uuid.LoadSize(newtoc)
198		copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
199		uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
200		uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
201	}
202	newtoc.AddLoad(uuid)
203
204	// For the specified segment (assumed to be in exeMacho) make a copy of its
205	// sections with appropriate fields zeroed out, and append them to the
206	// currently-last segment in newtoc.
207	copyZOdSections := func(g *macho.Segment) {
208		for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
209			s := exeMacho.Sections[i].Copy()
210			s.Offset = 0
211			s.Reloff = 0
212			s.Nreloc = 0
213			newtoc.AddSection(s)
214		}
215	}
216
217	newtoc.AddLoad(newsymtab)
218	newtoc.AddSegment(pagezero)
219	newtoc.AddSegment(newtext)
220	copyZOdSections(text)
221	newtoc.AddSegment(newdata)
222	copyZOdSections(data)
223
224	newlinkedit := linkedit.Copy()
225	newlinkedit.Offset = uint64(linkeditsymbase)
226	newlinkedit.Filesz = uint64(linkeditstringcur)
227	newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
228	newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
229	// The rest should copy over fine.
230	newtoc.AddSegment(newlinkedit)
231
232	dwarf := nonnilS("__DWARF")
233	newdwarf := dwarf.CopyZeroed()
234	newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
235	newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
236	newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
237	newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
238	newtoc.AddSegment(newdwarf)
239
240	// Map out Dwarf sections (that is, this is section descriptors, not their contents).
241	offset := uint32(newdwarf.Offset)
242	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
243		o := exeMacho.Sections[i]
244		s := o.Copy()
245		s.Offset = offset
246		us := o.UncompressedSize()
247		if s.Size < us {
248			s.Size = uint64(us)
249			s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
250		}
251		offset += uint32(us)
252		if strings.HasPrefix(s.Name, "__z") {
253			s.Name = "__" + s.Name[3:] // remove "z"
254		}
255		s.Reloff = 0
256		s.Nreloc = 0
257		newtoc.AddSection(s)
258	}
259
260	// Write segments/sections.
261	// Only dwarf and linkedit contain anything interesting.
262
263	// Memory map the output file to get the buffer directly.
264	outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
265	if len(os.Args) > 2 {
266		outDwarf = os.Args[2]
267	} else {
268		err := os.MkdirAll(outDwarf, 0755)
269		if err != nil {
270			fail("%v", err)
271		}
272		outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
273	}
274	dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
275
276	// (1) Linkedit segment
277	// Symbol table
278	offset = uint32(newlinkedit.Offset)
279	for i := range linkeditsyms {
280		if exeMacho.Magic == macho.Magic64 {
281			offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
282		} else {
283			offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
284		}
285	}
286
287	// Initial two bytes of string table, followed by actual zero-terminated strings.
288	buffer[linkeditstringbase] = ' '
289	buffer[linkeditstringbase+1] = 0
290	offset = linkeditstringbase + 2
291	for _, str := range linkeditstrings {
292		for i := 0; i < len(str); i++ {
293			buffer[offset] = str[i]
294			offset++
295		}
296		buffer[offset] = 0
297		offset++
298	}
299
300	// (2) DWARF segment
301	ioff := newdwarf.Firstsect - dwarf.Firstsect
302	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
303		s := exeMacho.Sections[i]
304		j := i + ioff
305		s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
306	}
307
308	// Because "text" overlaps the header and the loads, write them afterwards, just in case.
309	// Write header.
310	newtoc.Put(buffer)
311
312	err = syscall.Munmap(buffer)
313	if err != nil {
314		fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
315	}
316	err = dwarfFile.Close()
317	if err != nil {
318		fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
319	}
320
321	if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
322		hdr := exeMacho.FileTOC.FileHeader
323		oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
324		hdr.NCommands += 1
325		hdr.SizeCommands += uuid.LoadSize(newtoc)
326
327		mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
328		if err != nil {
329			fail("Updating UUID in binary failed, %v", err)
330		}
331		exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
332			syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
333		if err != nil {
334			fail("Mmap of %s for UUID update failed, %v", inputExe, err)
335		}
336		_ = hdr.Put(exebuf, newtoc.ByteOrder)
337		_ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
338		err = syscall.Munmap(exebuf)
339		if err != nil {
340			fail("Munmap of %s for UUID update failed, %v", inputExe, err)
341		}
342	}
343}
344
345// CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
346// and returns the file descriptor and mapped buffer.
347func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
348	dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
349	if err != nil {
350		fail("Open for mmap failed, %v", err)
351	}
352	err = os.Truncate(outDwarf, size)
353	if err != nil {
354		fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
355	}
356	buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
357	if err != nil {
358		fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
359	}
360	return dwarfFile, buffer
361}
362
363func describe(exem *macho.FileTOC) {
364	note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
365	for i, l := range exem.Loads {
366		if s, ok := l.(*macho.Segment); ok {
367			fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
368				s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
369			for j := uint32(0); j < s.Nsect; j++ {
370				c := exem.Sections[j+s.Firstsect]
371				fmt.Printf("   Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
372			}
373		} else {
374			fmt.Printf("Load %d is %v\n", i, l)
375		}
376	}
377	if exem.SizeCommands != exem.LoadSize() {
378		fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
379	} else {
380		note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
381	}
382	note("File size is %d", exem.FileSize())
383}
384
385// contentuuid returns a UUID derived from (some of) the content of an executable.
386// specifically included are the non-DWARF sections, specifically excluded are things
387// that surely depend on the presence or absence of DWARF sections (e.g., section
388// numbers, positions with file, number of load commands).
389// (It was considered desirable if this was insensitive to the presence of the
390// __DWARF segment, however because it is not last, it moves other segments,
391// whose contents appear to contain file offset references.)
392func contentuuid(exem *macho.FileTOC) []byte {
393	h := sha256.New()
394	for _, l := range exem.Loads {
395		if l.Command() == macho.LcUuid {
396			continue
397		}
398		if s, ok := l.(*macho.Segment); ok {
399			if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
400				continue
401			}
402			for j := uint32(0); j < s.Nsect; j++ {
403				c := exem.Sections[j+s.Firstsect]
404				io.Copy(h, c.Open())
405			}
406		} // Getting dependence on other load commands right is fiddly.
407	}
408	return h.Sum(nil)
409}
410