1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package macho implements access to Mach-O object files.
6package macho
7
8// High level access to low level data structures.
9
10import (
11	"bytes"
12	"debug/dwarf"
13	"encoding/binary"
14	"fmt"
15	"io"
16	"os"
17)
18
19// A File represents an open Mach-O file.
20type File struct {
21	FileHeader
22	ByteOrder binary.ByteOrder
23	Loads     []Load
24	Sections  []*Section
25
26	Symtab   *Symtab
27	Dysymtab *Dysymtab
28
29	closer io.Closer
30}
31
32// A Load represents any Mach-O load command.
33type Load interface {
34	Raw() []byte
35}
36
37// A LoadBytes is the uninterpreted bytes of a Mach-O load command.
38type LoadBytes []byte
39
40func (b LoadBytes) Raw() []byte { return b }
41
42// A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command.
43type SegmentHeader struct {
44	Cmd     LoadCmd
45	Len     uint32
46	Name    string
47	Addr    uint64
48	Memsz   uint64
49	Offset  uint64
50	Filesz  uint64
51	Maxprot uint32
52	Prot    uint32
53	Nsect   uint32
54	Flag    uint32
55}
56
57// A Segment represents a Mach-O 32-bit or 64-bit load segment command.
58type Segment struct {
59	LoadBytes
60	SegmentHeader
61
62	// Embed ReaderAt for ReadAt method.
63	// Do not embed SectionReader directly
64	// to avoid having Read and Seek.
65	// If a client wants Read and Seek it must use
66	// Open() to avoid fighting over the seek offset
67	// with other clients.
68	io.ReaderAt
69	sr *io.SectionReader
70}
71
72// Data reads and returns the contents of the segment.
73func (s *Segment) Data() ([]byte, error) {
74	dat := make([]byte, s.sr.Size())
75	n, err := s.sr.ReadAt(dat, 0)
76	if n == len(dat) {
77		err = nil
78	}
79	return dat[0:n], err
80}
81
82// Open returns a new ReadSeeker reading the segment.
83func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
84
85type SectionHeader struct {
86	Name   string
87	Seg    string
88	Addr   uint64
89	Size   uint64
90	Offset uint32
91	Align  uint32
92	Reloff uint32
93	Nreloc uint32
94	Flags  uint32
95}
96
97// A Reloc represents a Mach-O relocation.
98type Reloc struct {
99	Addr  uint32
100	Value uint32
101	// when Scattered == false && Extern == true, Value is the symbol number.
102	// when Scattered == false && Extern == false, Value is the section number.
103	// when Scattered == true, Value is the value that this reloc refers to.
104	Type      uint8
105	Len       uint8 // 0=byte, 1=word, 2=long, 3=quad
106	Pcrel     bool
107	Extern    bool // valid if Scattered == false
108	Scattered bool
109}
110
111type Section struct {
112	SectionHeader
113	Relocs []Reloc
114
115	// Embed ReaderAt for ReadAt method.
116	// Do not embed SectionReader directly
117	// to avoid having Read and Seek.
118	// If a client wants Read and Seek it must use
119	// Open() to avoid fighting over the seek offset
120	// with other clients.
121	io.ReaderAt
122	sr *io.SectionReader
123}
124
125// Data reads and returns the contents of the Mach-O section.
126func (s *Section) Data() ([]byte, error) {
127	dat := make([]byte, s.sr.Size())
128	n, err := s.sr.ReadAt(dat, 0)
129	if n == len(dat) {
130		err = nil
131	}
132	return dat[0:n], err
133}
134
135// Open returns a new ReadSeeker reading the Mach-O section.
136func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
137
138// A Dylib represents a Mach-O load dynamic library command.
139type Dylib struct {
140	LoadBytes
141	Name           string
142	Time           uint32
143	CurrentVersion uint32
144	CompatVersion  uint32
145}
146
147// A Symtab represents a Mach-O symbol table command.
148type Symtab struct {
149	LoadBytes
150	SymtabCmd
151	Syms []Symbol
152}
153
154// A Dysymtab represents a Mach-O dynamic symbol table command.
155type Dysymtab struct {
156	LoadBytes
157	DysymtabCmd
158	IndirectSyms []uint32 // indices into Symtab.Syms
159}
160
161// A Rpath represents a Mach-O rpath command.
162type Rpath struct {
163	LoadBytes
164	Path string
165}
166
167// A Symbol is a Mach-O 32-bit or 64-bit symbol table entry.
168type Symbol struct {
169	Name  string
170	Type  uint8
171	Sect  uint8
172	Desc  uint16
173	Value uint64
174}
175
176/*
177 * Mach-O reader
178 */
179
180// FormatError is returned by some operations if the data does
181// not have the correct format for an object file.
182type FormatError struct {
183	off int64
184	msg string
185	val interface{}
186}
187
188func (e *FormatError) Error() string {
189	msg := e.msg
190	if e.val != nil {
191		msg += fmt.Sprintf(" '%v'", e.val)
192	}
193	msg += fmt.Sprintf(" in record at byte %#x", e.off)
194	return msg
195}
196
197// Open opens the named file using os.Open and prepares it for use as a Mach-O binary.
198func Open(name string) (*File, error) {
199	f, err := os.Open(name)
200	if err != nil {
201		return nil, err
202	}
203	ff, err := NewFile(f)
204	if err != nil {
205		f.Close()
206		return nil, err
207	}
208	ff.closer = f
209	return ff, nil
210}
211
212// Close closes the File.
213// If the File was created using NewFile directly instead of Open,
214// Close has no effect.
215func (f *File) Close() error {
216	var err error
217	if f.closer != nil {
218		err = f.closer.Close()
219		f.closer = nil
220	}
221	return err
222}
223
224// NewFile creates a new File for accessing a Mach-O binary in an underlying reader.
225// The Mach-O binary is expected to start at position 0 in the ReaderAt.
226func NewFile(r io.ReaderAt) (*File, error) {
227	f := new(File)
228	sr := io.NewSectionReader(r, 0, 1<<63-1)
229
230	// Read and decode Mach magic to determine byte order, size.
231	// Magic32 and Magic64 differ only in the bottom bit.
232	var ident [4]byte
233	if _, err := r.ReadAt(ident[0:], 0); err != nil {
234		return nil, err
235	}
236	be := binary.BigEndian.Uint32(ident[0:])
237	le := binary.LittleEndian.Uint32(ident[0:])
238	switch Magic32 &^ 1 {
239	case be &^ 1:
240		f.ByteOrder = binary.BigEndian
241		f.Magic = be
242	case le &^ 1:
243		f.ByteOrder = binary.LittleEndian
244		f.Magic = le
245	default:
246		return nil, &FormatError{0, "invalid magic number", nil}
247	}
248
249	// Read entire file header.
250	if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil {
251		return nil, err
252	}
253
254	// Then load commands.
255	offset := int64(fileHeaderSize32)
256	if f.Magic == Magic64 {
257		offset = fileHeaderSize64
258	}
259	dat := make([]byte, f.Cmdsz)
260	if _, err := r.ReadAt(dat, offset); err != nil {
261		return nil, err
262	}
263	f.Loads = make([]Load, f.Ncmd)
264	bo := f.ByteOrder
265	for i := range f.Loads {
266		// Each load command begins with uint32 command and length.
267		if len(dat) < 8 {
268			return nil, &FormatError{offset, "command block too small", nil}
269		}
270		cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8])
271		if siz < 8 || siz > uint32(len(dat)) {
272			return nil, &FormatError{offset, "invalid command block size", nil}
273		}
274		var cmddat []byte
275		cmddat, dat = dat[0:siz], dat[siz:]
276		offset += int64(siz)
277		var s *Segment
278		switch cmd {
279		default:
280			f.Loads[i] = LoadBytes(cmddat)
281
282		case LoadCmdRpath:
283			var hdr RpathCmd
284			b := bytes.NewReader(cmddat)
285			if err := binary.Read(b, bo, &hdr); err != nil {
286				return nil, err
287			}
288			l := new(Rpath)
289			if hdr.Path >= uint32(len(cmddat)) {
290				return nil, &FormatError{offset, "invalid path in rpath command", hdr.Path}
291			}
292			l.Path = cstring(cmddat[hdr.Path:])
293			l.LoadBytes = LoadBytes(cmddat)
294			f.Loads[i] = l
295
296		case LoadCmdDylib:
297			var hdr DylibCmd
298			b := bytes.NewReader(cmddat)
299			if err := binary.Read(b, bo, &hdr); err != nil {
300				return nil, err
301			}
302			l := new(Dylib)
303			if hdr.Name >= uint32(len(cmddat)) {
304				return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name}
305			}
306			l.Name = cstring(cmddat[hdr.Name:])
307			l.Time = hdr.Time
308			l.CurrentVersion = hdr.CurrentVersion
309			l.CompatVersion = hdr.CompatVersion
310			l.LoadBytes = LoadBytes(cmddat)
311			f.Loads[i] = l
312
313		case LoadCmdSymtab:
314			var hdr SymtabCmd
315			b := bytes.NewReader(cmddat)
316			if err := binary.Read(b, bo, &hdr); err != nil {
317				return nil, err
318			}
319			strtab := make([]byte, hdr.Strsize)
320			if _, err := r.ReadAt(strtab, int64(hdr.Stroff)); err != nil {
321				return nil, err
322			}
323			var symsz int
324			if f.Magic == Magic64 {
325				symsz = 16
326			} else {
327				symsz = 12
328			}
329			symdat := make([]byte, int(hdr.Nsyms)*symsz)
330			if _, err := r.ReadAt(symdat, int64(hdr.Symoff)); err != nil {
331				return nil, err
332			}
333			st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset)
334			if err != nil {
335				return nil, err
336			}
337			f.Loads[i] = st
338			f.Symtab = st
339
340		case LoadCmdDysymtab:
341			var hdr DysymtabCmd
342			b := bytes.NewReader(cmddat)
343			if err := binary.Read(b, bo, &hdr); err != nil {
344				return nil, err
345			}
346			dat := make([]byte, hdr.Nindirectsyms*4)
347			if _, err := r.ReadAt(dat, int64(hdr.Indirectsymoff)); err != nil {
348				return nil, err
349			}
350			x := make([]uint32, hdr.Nindirectsyms)
351			if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil {
352				return nil, err
353			}
354			st := new(Dysymtab)
355			st.LoadBytes = LoadBytes(cmddat)
356			st.DysymtabCmd = hdr
357			st.IndirectSyms = x
358			f.Loads[i] = st
359			f.Dysymtab = st
360
361		case LoadCmdSegment:
362			var seg32 Segment32
363			b := bytes.NewReader(cmddat)
364			if err := binary.Read(b, bo, &seg32); err != nil {
365				return nil, err
366			}
367			s = new(Segment)
368			s.LoadBytes = cmddat
369			s.Cmd = cmd
370			s.Len = siz
371			s.Name = cstring(seg32.Name[0:])
372			s.Addr = uint64(seg32.Addr)
373			s.Memsz = uint64(seg32.Memsz)
374			s.Offset = uint64(seg32.Offset)
375			s.Filesz = uint64(seg32.Filesz)
376			s.Maxprot = seg32.Maxprot
377			s.Prot = seg32.Prot
378			s.Nsect = seg32.Nsect
379			s.Flag = seg32.Flag
380			f.Loads[i] = s
381			for i := 0; i < int(s.Nsect); i++ {
382				var sh32 Section32
383				if err := binary.Read(b, bo, &sh32); err != nil {
384					return nil, err
385				}
386				sh := new(Section)
387				sh.Name = cstring(sh32.Name[0:])
388				sh.Seg = cstring(sh32.Seg[0:])
389				sh.Addr = uint64(sh32.Addr)
390				sh.Size = uint64(sh32.Size)
391				sh.Offset = sh32.Offset
392				sh.Align = sh32.Align
393				sh.Reloff = sh32.Reloff
394				sh.Nreloc = sh32.Nreloc
395				sh.Flags = sh32.Flags
396				if err := f.pushSection(sh, r); err != nil {
397					return nil, err
398				}
399			}
400
401		case LoadCmdSegment64:
402			var seg64 Segment64
403			b := bytes.NewReader(cmddat)
404			if err := binary.Read(b, bo, &seg64); err != nil {
405				return nil, err
406			}
407			s = new(Segment)
408			s.LoadBytes = cmddat
409			s.Cmd = cmd
410			s.Len = siz
411			s.Name = cstring(seg64.Name[0:])
412			s.Addr = seg64.Addr
413			s.Memsz = seg64.Memsz
414			s.Offset = seg64.Offset
415			s.Filesz = seg64.Filesz
416			s.Maxprot = seg64.Maxprot
417			s.Prot = seg64.Prot
418			s.Nsect = seg64.Nsect
419			s.Flag = seg64.Flag
420			f.Loads[i] = s
421			for i := 0; i < int(s.Nsect); i++ {
422				var sh64 Section64
423				if err := binary.Read(b, bo, &sh64); err != nil {
424					return nil, err
425				}
426				sh := new(Section)
427				sh.Name = cstring(sh64.Name[0:])
428				sh.Seg = cstring(sh64.Seg[0:])
429				sh.Addr = sh64.Addr
430				sh.Size = sh64.Size
431				sh.Offset = sh64.Offset
432				sh.Align = sh64.Align
433				sh.Reloff = sh64.Reloff
434				sh.Nreloc = sh64.Nreloc
435				sh.Flags = sh64.Flags
436				if err := f.pushSection(sh, r); err != nil {
437					return nil, err
438				}
439			}
440		}
441		if s != nil {
442			s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz))
443			s.ReaderAt = s.sr
444		}
445	}
446	return f, nil
447}
448
449func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) {
450	bo := f.ByteOrder
451	symtab := make([]Symbol, hdr.Nsyms)
452	b := bytes.NewReader(symdat)
453	for i := range symtab {
454		var n Nlist64
455		if f.Magic == Magic64 {
456			if err := binary.Read(b, bo, &n); err != nil {
457				return nil, err
458			}
459		} else {
460			var n32 Nlist32
461			if err := binary.Read(b, bo, &n32); err != nil {
462				return nil, err
463			}
464			n.Name = n32.Name
465			n.Type = n32.Type
466			n.Sect = n32.Sect
467			n.Desc = n32.Desc
468			n.Value = uint64(n32.Value)
469		}
470		sym := &symtab[i]
471		if n.Name >= uint32(len(strtab)) {
472			return nil, &FormatError{offset, "invalid name in symbol table", n.Name}
473		}
474		sym.Name = cstring(strtab[n.Name:])
475		sym.Type = n.Type
476		sym.Sect = n.Sect
477		sym.Desc = n.Desc
478		sym.Value = n.Value
479	}
480	st := new(Symtab)
481	st.LoadBytes = LoadBytes(cmddat)
482	st.Syms = symtab
483	return st, nil
484}
485
486type relocInfo struct {
487	Addr   uint32
488	Symnum uint32
489}
490
491func (f *File) pushSection(sh *Section, r io.ReaderAt) error {
492	f.Sections = append(f.Sections, sh)
493	sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size))
494	sh.ReaderAt = sh.sr
495
496	if sh.Nreloc > 0 {
497		reldat := make([]byte, int(sh.Nreloc)*8)
498		if _, err := r.ReadAt(reldat, int64(sh.Reloff)); err != nil {
499			return err
500		}
501		b := bytes.NewReader(reldat)
502
503		bo := f.ByteOrder
504
505		sh.Relocs = make([]Reloc, sh.Nreloc)
506		for i := range sh.Relocs {
507			rel := &sh.Relocs[i]
508
509			var ri relocInfo
510			if err := binary.Read(b, bo, &ri); err != nil {
511				return err
512			}
513
514			if ri.Addr&(1<<31) != 0 { // scattered
515				rel.Addr = ri.Addr & (1<<24 - 1)
516				rel.Type = uint8((ri.Addr >> 24) & (1<<4 - 1))
517				rel.Len = uint8((ri.Addr >> 28) & (1<<2 - 1))
518				rel.Pcrel = ri.Addr&(1<<30) != 0
519				rel.Value = ri.Symnum
520				rel.Scattered = true
521			} else {
522				switch bo {
523				case binary.LittleEndian:
524					rel.Addr = ri.Addr
525					rel.Value = ri.Symnum & (1<<24 - 1)
526					rel.Pcrel = ri.Symnum&(1<<24) != 0
527					rel.Len = uint8((ri.Symnum >> 25) & (1<<2 - 1))
528					rel.Extern = ri.Symnum&(1<<27) != 0
529					rel.Type = uint8((ri.Symnum >> 28) & (1<<4 - 1))
530				case binary.BigEndian:
531					rel.Addr = ri.Addr
532					rel.Value = ri.Symnum >> 8
533					rel.Pcrel = ri.Symnum&(1<<7) != 0
534					rel.Len = uint8((ri.Symnum >> 5) & (1<<2 - 1))
535					rel.Extern = ri.Symnum&(1<<4) != 0
536					rel.Type = uint8(ri.Symnum & (1<<4 - 1))
537				default:
538					panic("unreachable")
539				}
540			}
541		}
542	}
543
544	return nil
545}
546
547func cstring(b []byte) string {
548	var i int
549	for i = 0; i < len(b) && b[i] != 0; i++ {
550	}
551	return string(b[0:i])
552}
553
554// Segment returns the first Segment with the given name, or nil if no such segment exists.
555func (f *File) Segment(name string) *Segment {
556	for _, l := range f.Loads {
557		if s, ok := l.(*Segment); ok && s.Name == name {
558			return s
559		}
560	}
561	return nil
562}
563
564// Section returns the first section with the given name, or nil if no such
565// section exists.
566func (f *File) Section(name string) *Section {
567	for _, s := range f.Sections {
568		if s.Name == name {
569			return s
570		}
571	}
572	return nil
573}
574
575// DWARF returns the DWARF debug information for the Mach-O file.
576func (f *File) DWARF() (*dwarf.Data, error) {
577	// There are many other DWARF sections, but these
578	// are the ones the debug/dwarf package uses.
579	// Don't bother loading others.
580	var names = [...]string{"abbrev", "info", "line", "ranges", "str"}
581	var dat [len(names)][]byte
582	for i, name := range names {
583		name = "__debug_" + name
584		s := f.Section(name)
585		if s == nil {
586			continue
587		}
588		b, err := s.Data()
589		if err != nil && uint64(len(b)) < s.Size {
590			return nil, err
591		}
592		dat[i] = b
593	}
594
595	abbrev, info, line, ranges, str := dat[0], dat[1], dat[2], dat[3], dat[4]
596	return dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
597}
598
599// ImportedSymbols returns the names of all symbols
600// referred to by the binary f that are expected to be
601// satisfied by other libraries at dynamic load time.
602func (f *File) ImportedSymbols() ([]string, error) {
603	if f.Dysymtab == nil || f.Symtab == nil {
604		return nil, &FormatError{0, "missing symbol table", nil}
605	}
606
607	st := f.Symtab
608	dt := f.Dysymtab
609	var all []string
610	for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] {
611		all = append(all, s.Name)
612	}
613	return all, nil
614}
615
616// ImportedLibraries returns the paths of all libraries
617// referred to by the binary f that are expected to be
618// linked with the binary at dynamic link time.
619func (f *File) ImportedLibraries() ([]string, error) {
620	var all []string
621	for _, l := range f.Loads {
622		if lib, ok := l.(*Dylib); ok {
623			all = append(all, lib.Name)
624		}
625	}
626	return all, nil
627}
628