1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package macho implements access to Mach-O object files.
6package macho
7
8// High level access to low level data structures.
9
10import (
11	"bytes"
12	"compress/zlib"
13	"debug/dwarf"
14	"encoding/binary"
15	"fmt"
16	"io"
17	"os"
18	"strings"
19)
20
21// A File represents an open Mach-O file.
22type File struct {
23	FileHeader
24	ByteOrder binary.ByteOrder
25	Loads     []Load
26	Sections  []*Section
27
28	Symtab   *Symtab
29	Dysymtab *Dysymtab
30
31	closer io.Closer
32}
33
34// A Load represents any Mach-O load command.
35type Load interface {
36	Raw() []byte
37}
38
39// A LoadBytes is the uninterpreted bytes of a Mach-O load command.
40type LoadBytes []byte
41
42func (b LoadBytes) Raw() []byte { return b }
43
44// A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command.
45type SegmentHeader struct {
46	Cmd     LoadCmd
47	Len     uint32
48	Name    string
49	Addr    uint64
50	Memsz   uint64
51	Offset  uint64
52	Filesz  uint64
53	Maxprot uint32
54	Prot    uint32
55	Nsect   uint32
56	Flag    uint32
57}
58
59// A Segment represents a Mach-O 32-bit or 64-bit load segment command.
60type Segment struct {
61	LoadBytes
62	SegmentHeader
63
64	// Embed ReaderAt for ReadAt method.
65	// Do not embed SectionReader directly
66	// to avoid having Read and Seek.
67	// If a client wants Read and Seek it must use
68	// Open() to avoid fighting over the seek offset
69	// with other clients.
70	io.ReaderAt
71	sr *io.SectionReader
72}
73
74// Data reads and returns the contents of the segment.
75func (s *Segment) Data() ([]byte, error) {
76	dat := make([]byte, s.sr.Size())
77	n, err := s.sr.ReadAt(dat, 0)
78	if n == len(dat) {
79		err = nil
80	}
81	return dat[0:n], err
82}
83
84// Open returns a new ReadSeeker reading the segment.
85func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
86
87type SectionHeader struct {
88	Name   string
89	Seg    string
90	Addr   uint64
91	Size   uint64
92	Offset uint32
93	Align  uint32
94	Reloff uint32
95	Nreloc uint32
96	Flags  uint32
97}
98
99// A Reloc represents a Mach-O relocation.
100type Reloc struct {
101	Addr  uint32
102	Value uint32
103	// when Scattered == false && Extern == true, Value is the symbol number.
104	// when Scattered == false && Extern == false, Value is the section number.
105	// when Scattered == true, Value is the value that this reloc refers to.
106	Type      uint8
107	Len       uint8 // 0=byte, 1=word, 2=long, 3=quad
108	Pcrel     bool
109	Extern    bool // valid if Scattered == false
110	Scattered bool
111}
112
113type Section struct {
114	SectionHeader
115	Relocs []Reloc
116
117	// Embed ReaderAt for ReadAt method.
118	// Do not embed SectionReader directly
119	// to avoid having Read and Seek.
120	// If a client wants Read and Seek it must use
121	// Open() to avoid fighting over the seek offset
122	// with other clients.
123	io.ReaderAt
124	sr *io.SectionReader
125}
126
127// Data reads and returns the contents of the Mach-O section.
128func (s *Section) Data() ([]byte, error) {
129	dat := make([]byte, s.sr.Size())
130	n, err := s.sr.ReadAt(dat, 0)
131	if n == len(dat) {
132		err = nil
133	}
134	return dat[0:n], err
135}
136
137// Open returns a new ReadSeeker reading the Mach-O section.
138func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
139
140// A Dylib represents a Mach-O load dynamic library command.
141type Dylib struct {
142	LoadBytes
143	Name           string
144	Time           uint32
145	CurrentVersion uint32
146	CompatVersion  uint32
147}
148
149// A Symtab represents a Mach-O symbol table command.
150type Symtab struct {
151	LoadBytes
152	SymtabCmd
153	Syms []Symbol
154}
155
156// A Dysymtab represents a Mach-O dynamic symbol table command.
157type Dysymtab struct {
158	LoadBytes
159	DysymtabCmd
160	IndirectSyms []uint32 // indices into Symtab.Syms
161}
162
163// A Rpath represents a Mach-O rpath command.
164type Rpath struct {
165	LoadBytes
166	Path string
167}
168
169// A Symbol is a Mach-O 32-bit or 64-bit symbol table entry.
170type Symbol struct {
171	Name  string
172	Type  uint8
173	Sect  uint8
174	Desc  uint16
175	Value uint64
176}
177
178/*
179 * Mach-O reader
180 */
181
182// FormatError is returned by some operations if the data does
183// not have the correct format for an object file.
184type FormatError struct {
185	off int64
186	msg string
187	val interface{}
188}
189
190func (e *FormatError) Error() string {
191	msg := e.msg
192	if e.val != nil {
193		msg += fmt.Sprintf(" '%v'", e.val)
194	}
195	msg += fmt.Sprintf(" in record at byte %#x", e.off)
196	return msg
197}
198
199// Open opens the named file using os.Open and prepares it for use as a Mach-O binary.
200func Open(name string) (*File, error) {
201	f, err := os.Open(name)
202	if err != nil {
203		return nil, err
204	}
205	ff, err := NewFile(f)
206	if err != nil {
207		f.Close()
208		return nil, err
209	}
210	ff.closer = f
211	return ff, nil
212}
213
214// Close closes the File.
215// If the File was created using NewFile directly instead of Open,
216// Close has no effect.
217func (f *File) Close() error {
218	var err error
219	if f.closer != nil {
220		err = f.closer.Close()
221		f.closer = nil
222	}
223	return err
224}
225
226// NewFile creates a new File for accessing a Mach-O binary in an underlying reader.
227// The Mach-O binary is expected to start at position 0 in the ReaderAt.
228func NewFile(r io.ReaderAt) (*File, error) {
229	f := new(File)
230	sr := io.NewSectionReader(r, 0, 1<<63-1)
231
232	// Read and decode Mach magic to determine byte order, size.
233	// Magic32 and Magic64 differ only in the bottom bit.
234	var ident [4]byte
235	if _, err := r.ReadAt(ident[0:], 0); err != nil {
236		return nil, err
237	}
238	be := binary.BigEndian.Uint32(ident[0:])
239	le := binary.LittleEndian.Uint32(ident[0:])
240	switch Magic32 &^ 1 {
241	case be &^ 1:
242		f.ByteOrder = binary.BigEndian
243		f.Magic = be
244	case le &^ 1:
245		f.ByteOrder = binary.LittleEndian
246		f.Magic = le
247	default:
248		return nil, &FormatError{0, "invalid magic number", nil}
249	}
250
251	// Read entire file header.
252	if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil {
253		return nil, err
254	}
255
256	// Then load commands.
257	offset := int64(fileHeaderSize32)
258	if f.Magic == Magic64 {
259		offset = fileHeaderSize64
260	}
261	dat := make([]byte, f.Cmdsz)
262	if _, err := r.ReadAt(dat, offset); err != nil {
263		return nil, err
264	}
265	f.Loads = make([]Load, f.Ncmd)
266	bo := f.ByteOrder
267	for i := range f.Loads {
268		// Each load command begins with uint32 command and length.
269		if len(dat) < 8 {
270			return nil, &FormatError{offset, "command block too small", nil}
271		}
272		cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8])
273		if siz < 8 || siz > uint32(len(dat)) {
274			return nil, &FormatError{offset, "invalid command block size", nil}
275		}
276		var cmddat []byte
277		cmddat, dat = dat[0:siz], dat[siz:]
278		offset += int64(siz)
279		var s *Segment
280		switch cmd {
281		default:
282			f.Loads[i] = LoadBytes(cmddat)
283
284		case LoadCmdRpath:
285			var hdr RpathCmd
286			b := bytes.NewReader(cmddat)
287			if err := binary.Read(b, bo, &hdr); err != nil {
288				return nil, err
289			}
290			l := new(Rpath)
291			if hdr.Path >= uint32(len(cmddat)) {
292				return nil, &FormatError{offset, "invalid path in rpath command", hdr.Path}
293			}
294			l.Path = cstring(cmddat[hdr.Path:])
295			l.LoadBytes = LoadBytes(cmddat)
296			f.Loads[i] = l
297
298		case LoadCmdDylib:
299			var hdr DylibCmd
300			b := bytes.NewReader(cmddat)
301			if err := binary.Read(b, bo, &hdr); err != nil {
302				return nil, err
303			}
304			l := new(Dylib)
305			if hdr.Name >= uint32(len(cmddat)) {
306				return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name}
307			}
308			l.Name = cstring(cmddat[hdr.Name:])
309			l.Time = hdr.Time
310			l.CurrentVersion = hdr.CurrentVersion
311			l.CompatVersion = hdr.CompatVersion
312			l.LoadBytes = LoadBytes(cmddat)
313			f.Loads[i] = l
314
315		case LoadCmdSymtab:
316			var hdr SymtabCmd
317			b := bytes.NewReader(cmddat)
318			if err := binary.Read(b, bo, &hdr); err != nil {
319				return nil, err
320			}
321			strtab := make([]byte, hdr.Strsize)
322			if _, err := r.ReadAt(strtab, int64(hdr.Stroff)); err != nil {
323				return nil, err
324			}
325			var symsz int
326			if f.Magic == Magic64 {
327				symsz = 16
328			} else {
329				symsz = 12
330			}
331			symdat := make([]byte, int(hdr.Nsyms)*symsz)
332			if _, err := r.ReadAt(symdat, int64(hdr.Symoff)); err != nil {
333				return nil, err
334			}
335			st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset)
336			if err != nil {
337				return nil, err
338			}
339			f.Loads[i] = st
340			f.Symtab = st
341
342		case LoadCmdDysymtab:
343			var hdr DysymtabCmd
344			b := bytes.NewReader(cmddat)
345			if err := binary.Read(b, bo, &hdr); err != nil {
346				return nil, err
347			}
348			dat := make([]byte, hdr.Nindirectsyms*4)
349			if _, err := r.ReadAt(dat, int64(hdr.Indirectsymoff)); err != nil {
350				return nil, err
351			}
352			x := make([]uint32, hdr.Nindirectsyms)
353			if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil {
354				return nil, err
355			}
356			st := new(Dysymtab)
357			st.LoadBytes = LoadBytes(cmddat)
358			st.DysymtabCmd = hdr
359			st.IndirectSyms = x
360			f.Loads[i] = st
361			f.Dysymtab = st
362
363		case LoadCmdSegment:
364			var seg32 Segment32
365			b := bytes.NewReader(cmddat)
366			if err := binary.Read(b, bo, &seg32); err != nil {
367				return nil, err
368			}
369			s = new(Segment)
370			s.LoadBytes = cmddat
371			s.Cmd = cmd
372			s.Len = siz
373			s.Name = cstring(seg32.Name[0:])
374			s.Addr = uint64(seg32.Addr)
375			s.Memsz = uint64(seg32.Memsz)
376			s.Offset = uint64(seg32.Offset)
377			s.Filesz = uint64(seg32.Filesz)
378			s.Maxprot = seg32.Maxprot
379			s.Prot = seg32.Prot
380			s.Nsect = seg32.Nsect
381			s.Flag = seg32.Flag
382			f.Loads[i] = s
383			for i := 0; i < int(s.Nsect); i++ {
384				var sh32 Section32
385				if err := binary.Read(b, bo, &sh32); err != nil {
386					return nil, err
387				}
388				sh := new(Section)
389				sh.Name = cstring(sh32.Name[0:])
390				sh.Seg = cstring(sh32.Seg[0:])
391				sh.Addr = uint64(sh32.Addr)
392				sh.Size = uint64(sh32.Size)
393				sh.Offset = sh32.Offset
394				sh.Align = sh32.Align
395				sh.Reloff = sh32.Reloff
396				sh.Nreloc = sh32.Nreloc
397				sh.Flags = sh32.Flags
398				if err := f.pushSection(sh, r); err != nil {
399					return nil, err
400				}
401			}
402
403		case LoadCmdSegment64:
404			var seg64 Segment64
405			b := bytes.NewReader(cmddat)
406			if err := binary.Read(b, bo, &seg64); err != nil {
407				return nil, err
408			}
409			s = new(Segment)
410			s.LoadBytes = cmddat
411			s.Cmd = cmd
412			s.Len = siz
413			s.Name = cstring(seg64.Name[0:])
414			s.Addr = seg64.Addr
415			s.Memsz = seg64.Memsz
416			s.Offset = seg64.Offset
417			s.Filesz = seg64.Filesz
418			s.Maxprot = seg64.Maxprot
419			s.Prot = seg64.Prot
420			s.Nsect = seg64.Nsect
421			s.Flag = seg64.Flag
422			f.Loads[i] = s
423			for i := 0; i < int(s.Nsect); i++ {
424				var sh64 Section64
425				if err := binary.Read(b, bo, &sh64); err != nil {
426					return nil, err
427				}
428				sh := new(Section)
429				sh.Name = cstring(sh64.Name[0:])
430				sh.Seg = cstring(sh64.Seg[0:])
431				sh.Addr = sh64.Addr
432				sh.Size = sh64.Size
433				sh.Offset = sh64.Offset
434				sh.Align = sh64.Align
435				sh.Reloff = sh64.Reloff
436				sh.Nreloc = sh64.Nreloc
437				sh.Flags = sh64.Flags
438				if err := f.pushSection(sh, r); err != nil {
439					return nil, err
440				}
441			}
442		}
443		if s != nil {
444			s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz))
445			s.ReaderAt = s.sr
446		}
447	}
448	return f, nil
449}
450
451func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) {
452	bo := f.ByteOrder
453	symtab := make([]Symbol, hdr.Nsyms)
454	b := bytes.NewReader(symdat)
455	for i := range symtab {
456		var n Nlist64
457		if f.Magic == Magic64 {
458			if err := binary.Read(b, bo, &n); err != nil {
459				return nil, err
460			}
461		} else {
462			var n32 Nlist32
463			if err := binary.Read(b, bo, &n32); err != nil {
464				return nil, err
465			}
466			n.Name = n32.Name
467			n.Type = n32.Type
468			n.Sect = n32.Sect
469			n.Desc = n32.Desc
470			n.Value = uint64(n32.Value)
471		}
472		sym := &symtab[i]
473		if n.Name >= uint32(len(strtab)) {
474			return nil, &FormatError{offset, "invalid name in symbol table", n.Name}
475		}
476		sym.Name = cstring(strtab[n.Name:])
477		sym.Type = n.Type
478		sym.Sect = n.Sect
479		sym.Desc = n.Desc
480		sym.Value = n.Value
481	}
482	st := new(Symtab)
483	st.LoadBytes = LoadBytes(cmddat)
484	st.Syms = symtab
485	return st, nil
486}
487
488type relocInfo struct {
489	Addr   uint32
490	Symnum uint32
491}
492
493func (f *File) pushSection(sh *Section, r io.ReaderAt) error {
494	f.Sections = append(f.Sections, sh)
495	sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size))
496	sh.ReaderAt = sh.sr
497
498	if sh.Nreloc > 0 {
499		reldat := make([]byte, int(sh.Nreloc)*8)
500		if _, err := r.ReadAt(reldat, int64(sh.Reloff)); err != nil {
501			return err
502		}
503		b := bytes.NewReader(reldat)
504
505		bo := f.ByteOrder
506
507		sh.Relocs = make([]Reloc, sh.Nreloc)
508		for i := range sh.Relocs {
509			rel := &sh.Relocs[i]
510
511			var ri relocInfo
512			if err := binary.Read(b, bo, &ri); err != nil {
513				return err
514			}
515
516			if ri.Addr&(1<<31) != 0 { // scattered
517				rel.Addr = ri.Addr & (1<<24 - 1)
518				rel.Type = uint8((ri.Addr >> 24) & (1<<4 - 1))
519				rel.Len = uint8((ri.Addr >> 28) & (1<<2 - 1))
520				rel.Pcrel = ri.Addr&(1<<30) != 0
521				rel.Value = ri.Symnum
522				rel.Scattered = true
523			} else {
524				switch bo {
525				case binary.LittleEndian:
526					rel.Addr = ri.Addr
527					rel.Value = ri.Symnum & (1<<24 - 1)
528					rel.Pcrel = ri.Symnum&(1<<24) != 0
529					rel.Len = uint8((ri.Symnum >> 25) & (1<<2 - 1))
530					rel.Extern = ri.Symnum&(1<<27) != 0
531					rel.Type = uint8((ri.Symnum >> 28) & (1<<4 - 1))
532				case binary.BigEndian:
533					rel.Addr = ri.Addr
534					rel.Value = ri.Symnum >> 8
535					rel.Pcrel = ri.Symnum&(1<<7) != 0
536					rel.Len = uint8((ri.Symnum >> 5) & (1<<2 - 1))
537					rel.Extern = ri.Symnum&(1<<4) != 0
538					rel.Type = uint8(ri.Symnum & (1<<4 - 1))
539				default:
540					panic("unreachable")
541				}
542			}
543		}
544	}
545
546	return nil
547}
548
549func cstring(b []byte) string {
550	i := bytes.IndexByte(b, 0)
551	if i == -1 {
552		i = len(b)
553	}
554	return string(b[0:i])
555}
556
557// Segment returns the first Segment with the given name, or nil if no such segment exists.
558func (f *File) Segment(name string) *Segment {
559	for _, l := range f.Loads {
560		if s, ok := l.(*Segment); ok && s.Name == name {
561			return s
562		}
563	}
564	return nil
565}
566
567// Section returns the first section with the given name, or nil if no such
568// section exists.
569func (f *File) Section(name string) *Section {
570	for _, s := range f.Sections {
571		if s.Name == name {
572			return s
573		}
574	}
575	return nil
576}
577
578// DWARF returns the DWARF debug information for the Mach-O file.
579func (f *File) DWARF() (*dwarf.Data, error) {
580	dwarfSuffix := func(s *Section) string {
581		switch {
582		case strings.HasPrefix(s.Name, "__debug_"):
583			return s.Name[8:]
584		case strings.HasPrefix(s.Name, "__zdebug_"):
585			return s.Name[9:]
586		default:
587			return ""
588		}
589
590	}
591	sectionData := func(s *Section) ([]byte, error) {
592		b, err := s.Data()
593		if err != nil && uint64(len(b)) < s.Size {
594			return nil, err
595		}
596
597		if len(b) >= 12 && string(b[:4]) == "ZLIB" {
598			dlen := binary.BigEndian.Uint64(b[4:12])
599			dbuf := make([]byte, dlen)
600			r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
601			if err != nil {
602				return nil, err
603			}
604			if _, err := io.ReadFull(r, dbuf); err != nil {
605				return nil, err
606			}
607			if err := r.Close(); err != nil {
608				return nil, err
609			}
610			b = dbuf
611		}
612		return b, nil
613	}
614
615	// There are many other DWARF sections, but these
616	// are the ones the debug/dwarf package uses.
617	// Don't bother loading others.
618	var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
619	for _, s := range f.Sections {
620		suffix := dwarfSuffix(s)
621		if suffix == "" {
622			continue
623		}
624		if _, ok := dat[suffix]; !ok {
625			continue
626		}
627		b, err := sectionData(s)
628		if err != nil {
629			return nil, err
630		}
631		dat[suffix] = b
632	}
633
634	d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
635	if err != nil {
636		return nil, err
637	}
638
639	// Look for DWARF4 .debug_types sections.
640	for i, s := range f.Sections {
641		suffix := dwarfSuffix(s)
642		if suffix != "types" {
643			continue
644		}
645
646		b, err := sectionData(s)
647		if err != nil {
648			return nil, err
649		}
650
651		err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
652		if err != nil {
653			return nil, err
654		}
655	}
656
657	return d, nil
658}
659
660// ImportedSymbols returns the names of all symbols
661// referred to by the binary f that are expected to be
662// satisfied by other libraries at dynamic load time.
663func (f *File) ImportedSymbols() ([]string, error) {
664	if f.Dysymtab == nil || f.Symtab == nil {
665		return nil, &FormatError{0, "missing symbol table", nil}
666	}
667
668	st := f.Symtab
669	dt := f.Dysymtab
670	var all []string
671	for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] {
672		all = append(all, s.Name)
673	}
674	return all, nil
675}
676
677// ImportedLibraries returns the paths of all libraries
678// referred to by the binary f that are expected to be
679// linked with the binary at dynamic link time.
680func (f *File) ImportedLibraries() ([]string, error) {
681	var all []string
682	for _, l := range f.Loads {
683		if lib, ok := l.(*Dylib); ok {
684			all = append(all, lib.Name)
685		}
686	}
687	return all, nil
688}
689