1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Go new object file format, reading and writing.
6
7package goobj2 // TODO: replace the goobj package?
8
9import (
10	"bytes"
11	"cmd/internal/bio"
12	"encoding/binary"
13	"errors"
14	"fmt"
15	"io"
16	"unsafe"
17)
18
19// New object file format.
20//
21//    Header struct {
22//       Magic   [...]byte   // "\x00go114LD"
23//       Flags   uint32
24//       // TODO: Fingerprint
25//       Offsets [...]uint32 // byte offset of each block below
26//    }
27//
28//    Strings [...]struct {
29//       Len  uint32
30//       Data [...]byte
31//    }
32//
33//    Autolib  [...]stringOff // imported packages (for file loading) // TODO: add fingerprints
34//    PkgIndex [...]stringOff // referenced packages by index
35//
36//    DwarfFiles [...]stringOff
37//
38//    SymbolDefs [...]struct {
39//       Name stringOff
40//       ABI  uint16
41//       Type uint8
42//       Flag uint8
43//       Size uint32
44//    }
45//    NonPkgDefs [...]struct { // non-pkg symbol definitions
46//       ... // same as SymbolDefs
47//    }
48//    NonPkgRefs [...]struct { // non-pkg symbol references
49//       ... // same as SymbolDefs
50//    }
51//
52//    RelocIndex [...]uint32 // index to Relocs
53//    AuxIndex   [...]uint32 // index to Aux
54//    DataIndex  [...]uint32 // offset to Data
55//
56//    Relocs [...]struct {
57//       Off  int32
58//       Size uint8
59//       Type uint8
60//       Add  int64
61//       Sym  symRef
62//    }
63//
64//    Aux [...]struct {
65//       Type uint8
66//       Sym  symRef
67//    }
68//
69//    Data   [...]byte
70//    Pcdata [...]byte
71//
72// stringOff is a uint32 (?) offset that points to the corresponding
73// string, which is a uint32 length followed by that number of bytes.
74//
75// symRef is struct { PkgIdx, SymIdx uint32 }.
76//
77// Slice type (e.g. []symRef) is encoded as a length prefix (uint32)
78// followed by that number of elements.
79//
80// The types below correspond to the encoded data structure in the
81// object file.
82
83// Symbol indexing.
84//
85// Each symbol is referenced with a pair of indices, { PkgIdx, SymIdx },
86// as the symRef struct above.
87//
88// PkgIdx is either a predeclared index (see PkgIdxNone below) or
89// an index of an imported package. For the latter case, PkgIdx is the
90// index of the package in the PkgIndex array. 0 is an invalid index.
91//
92// SymIdx is the index of the symbol in the given package.
93// - If PkgIdx is PkgIdxSelf, SymIdx is the index of the symbol in the
94//   SymbolDefs array.
95// - If PkgIdx is PkgIdxNone, SymIdx is the index of the symbol in the
96//   NonPkgDefs array (could natually overflow to NonPkgRefs array).
97// - Otherwise, SymIdx is the index of the symbol in some other package's
98//   SymbolDefs array.
99//
100// {0, 0} represents a nil symbol. Otherwise PkgIdx should not be 0.
101//
102// RelocIndex, AuxIndex, and DataIndex contains indices/offsets to
103// Relocs/Aux/Data blocks, one element per symbol, first for all the
104// defined symbols, then all the defined non-package symbols, in the
105// same order of SymbolDefs/NonPkgDefs arrays. For N total defined
106// symbols, the array is of length N+1. The last element is the total
107// number of relocations (aux symbols, data blocks, etc.).
108//
109// They can be accessed by index. For the i-th symbol, its relocations
110// are the RelocIndex[i]-th (inclusive) to RelocIndex[i+1]-th (exclusive)
111// elements in the Relocs array. Aux/Data are likewise. (The index is
112// 0-based.)
113
114// Auxiliary symbols.
115//
116// Each symbol may (or may not) be associated with a number of auxiliary
117// symbols. They are described in the Aux block. See Aux struct below.
118// Currently a symbol's Gotype and FuncInfo are auxiliary symbols. We
119// may make use of aux symbols in more cases, e.g. DWARF symbols.
120
121// Package Index.
122const (
123	PkgIdxNone    = (1<<31 - 1) - iota // Non-package symbols
124	PkgIdxBuiltin                      // Predefined symbols // TODO: not used for now, we could use it for compiler-generated symbols like runtime.newobject
125	PkgIdxSelf                         // Symbols defined in the current package
126	PkgIdxInvalid = 0
127	// The index of other referenced packages starts from 1.
128)
129
130// Blocks
131const (
132	BlkAutolib = iota
133	BlkPkgIdx
134	BlkDwarfFile
135	BlkSymdef
136	BlkNonpkgdef
137	BlkNonpkgref
138	BlkRelocIdx
139	BlkAuxIdx
140	BlkDataIdx
141	BlkReloc
142	BlkAux
143	BlkData
144	BlkPcdata
145	NBlk
146)
147
148// File header.
149// TODO: probably no need to export this.
150type Header struct {
151	Magic   string
152	Flags   uint32
153	Offsets [NBlk]uint32
154}
155
156const Magic = "\x00go114LD"
157
158func (h *Header) Write(w *Writer) {
159	w.RawString(h.Magic)
160	w.Uint32(h.Flags)
161	for _, x := range h.Offsets {
162		w.Uint32(x)
163	}
164}
165
166func (h *Header) Read(r *Reader) error {
167	b := r.BytesAt(0, len(Magic))
168	h.Magic = string(b)
169	if h.Magic != Magic {
170		return errors.New("wrong magic, not a Go object file")
171	}
172	off := uint32(len(h.Magic))
173	h.Flags = r.uint32At(off)
174	off += 4
175	for i := range h.Offsets {
176		h.Offsets[i] = r.uint32At(off)
177		off += 4
178	}
179	return nil
180}
181
182func (h *Header) Size() int {
183	return len(h.Magic) + 4 + 4*len(h.Offsets)
184}
185
186// Symbol definition.
187type Sym struct {
188	Name string
189	ABI  uint16
190	Type uint8
191	Flag uint8
192	Siz  uint32
193}
194
195const SymABIstatic = ^uint16(0)
196
197const (
198	ObjFlagShared = 1 << iota
199)
200
201const (
202	SymFlagDupok = 1 << iota
203	SymFlagLocal
204	SymFlagTypelink
205	SymFlagLeaf
206	SymFlagCFunc
207	SymFlagReflectMethod
208	SymFlagGoType
209	SymFlagTopFrame
210)
211
212func (s *Sym) Write(w *Writer) {
213	w.StringRef(s.Name)
214	w.Uint16(s.ABI)
215	w.Uint8(s.Type)
216	w.Uint8(s.Flag)
217	w.Uint32(s.Siz)
218}
219
220func (s *Sym) Read(r *Reader, off uint32) {
221	s.Name = r.StringRef(off)
222	s.ABI = r.uint16At(off + 4)
223	s.Type = r.uint8At(off + 6)
224	s.Flag = r.uint8At(off + 7)
225	s.Siz = r.uint32At(off + 8)
226}
227
228func (s *Sym) Size() int {
229	return 4 + 2 + 1 + 1 + 4
230}
231
232func (s *Sym) Dupok() bool         { return s.Flag&SymFlagDupok != 0 }
233func (s *Sym) Local() bool         { return s.Flag&SymFlagLocal != 0 }
234func (s *Sym) Typelink() bool      { return s.Flag&SymFlagTypelink != 0 }
235func (s *Sym) Leaf() bool          { return s.Flag&SymFlagLeaf != 0 }
236func (s *Sym) CFunc() bool         { return s.Flag&SymFlagCFunc != 0 }
237func (s *Sym) ReflectMethod() bool { return s.Flag&SymFlagReflectMethod != 0 }
238func (s *Sym) IsGoType() bool      { return s.Flag&SymFlagGoType != 0 }
239func (s *Sym) TopFrame() bool      { return s.Flag&SymFlagTopFrame != 0 }
240
241// Symbol reference.
242type SymRef struct {
243	PkgIdx uint32
244	SymIdx uint32
245}
246
247func (s *SymRef) Write(w *Writer) {
248	w.Uint32(s.PkgIdx)
249	w.Uint32(s.SymIdx)
250}
251
252func (s *SymRef) Read(r *Reader, off uint32) {
253	s.PkgIdx = r.uint32At(off)
254	s.SymIdx = r.uint32At(off + 4)
255}
256
257func (s *SymRef) Size() int {
258	return 4 + 4
259}
260
261// Relocation.
262type Reloc struct {
263	Off  int32
264	Siz  uint8
265	Type uint8
266	Add  int64
267	Sym  SymRef
268}
269
270func (r *Reloc) Write(w *Writer) {
271	w.Uint32(uint32(r.Off))
272	w.Uint8(r.Siz)
273	w.Uint8(r.Type)
274	w.Uint64(uint64(r.Add))
275	r.Sym.Write(w)
276}
277
278func (o *Reloc) Read(r *Reader, off uint32) {
279	o.Off = r.int32At(off)
280	o.Siz = r.uint8At(off + 4)
281	o.Type = r.uint8At(off + 5)
282	o.Add = r.int64At(off + 6)
283	o.Sym.Read(r, off+14)
284}
285
286func (r *Reloc) Size() int {
287	return 4 + 1 + 1 + 8 + r.Sym.Size()
288}
289
290// Aux symbol info.
291type Aux struct {
292	Type uint8
293	Sym  SymRef
294}
295
296// Aux Type
297const (
298	AuxGotype = iota
299	AuxFuncInfo
300	AuxFuncdata
301	AuxDwarfInfo
302	AuxDwarfLoc
303	AuxDwarfRanges
304	AuxDwarfLines
305
306	// TODO: more. Pcdata?
307)
308
309func (a *Aux) Write(w *Writer) {
310	w.Uint8(a.Type)
311	a.Sym.Write(w)
312}
313
314func (a *Aux) Read(r *Reader, off uint32) {
315	a.Type = r.uint8At(off)
316	a.Sym.Read(r, off+1)
317}
318
319func (a *Aux) Size() int {
320	return 1 + a.Sym.Size()
321}
322
323type Writer struct {
324	wr        *bio.Writer
325	stringMap map[string]uint32
326	off       uint32 // running offset
327}
328
329func NewWriter(wr *bio.Writer) *Writer {
330	return &Writer{wr: wr, stringMap: make(map[string]uint32)}
331}
332
333func (w *Writer) AddString(s string) {
334	if _, ok := w.stringMap[s]; ok {
335		return
336	}
337	w.stringMap[s] = w.off
338	w.Uint32(uint32(len(s)))
339	w.RawString(s)
340}
341
342func (w *Writer) StringRef(s string) {
343	off, ok := w.stringMap[s]
344	if !ok {
345		panic(fmt.Sprintf("writeStringRef: string not added: %q", s))
346	}
347	w.Uint32(off)
348}
349
350func (w *Writer) RawString(s string) {
351	w.wr.WriteString(s)
352	w.off += uint32(len(s))
353}
354
355func (w *Writer) Bytes(s []byte) {
356	w.wr.Write(s)
357	w.off += uint32(len(s))
358}
359
360func (w *Writer) Uint64(x uint64) {
361	var b [8]byte
362	binary.LittleEndian.PutUint64(b[:], x)
363	w.wr.Write(b[:])
364	w.off += 8
365}
366
367func (w *Writer) Uint32(x uint32) {
368	var b [4]byte
369	binary.LittleEndian.PutUint32(b[:], x)
370	w.wr.Write(b[:])
371	w.off += 4
372}
373
374func (w *Writer) Uint16(x uint16) {
375	var b [2]byte
376	binary.LittleEndian.PutUint16(b[:], x)
377	w.wr.Write(b[:])
378	w.off += 2
379}
380
381func (w *Writer) Uint8(x uint8) {
382	w.wr.WriteByte(x)
383	w.off++
384}
385
386func (w *Writer) Offset() uint32 {
387	return w.off
388}
389
390type Reader struct {
391	b        []byte // mmapped bytes, if not nil
392	readonly bool   // whether b is backed with read-only memory
393
394	rd    io.ReaderAt
395	start uint32
396	h     Header // keep block offsets
397}
398
399func NewReaderFromBytes(b []byte, readonly bool) *Reader {
400	r := &Reader{b: b, readonly: readonly, rd: bytes.NewReader(b), start: 0}
401	err := r.h.Read(r)
402	if err != nil {
403		return nil
404	}
405	return r
406}
407
408func (r *Reader) BytesAt(off uint32, len int) []byte {
409	if len == 0 {
410		return nil
411	}
412	end := int(off) + len
413	return r.b[int(off):end:end]
414}
415
416func (r *Reader) uint64At(off uint32) uint64 {
417	b := r.BytesAt(off, 8)
418	return binary.LittleEndian.Uint64(b)
419}
420
421func (r *Reader) int64At(off uint32) int64 {
422	return int64(r.uint64At(off))
423}
424
425func (r *Reader) uint32At(off uint32) uint32 {
426	b := r.BytesAt(off, 4)
427	return binary.LittleEndian.Uint32(b)
428}
429
430func (r *Reader) int32At(off uint32) int32 {
431	return int32(r.uint32At(off))
432}
433
434func (r *Reader) uint16At(off uint32) uint16 {
435	b := r.BytesAt(off, 2)
436	return binary.LittleEndian.Uint16(b)
437}
438
439func (r *Reader) uint8At(off uint32) uint8 {
440	b := r.BytesAt(off, 1)
441	return b[0]
442}
443
444func (r *Reader) StringAt(off uint32) string {
445	l := r.uint32At(off)
446	b := r.b[off+4 : off+4+l]
447	if r.readonly {
448		return toString(b) // backed by RO memory, ok to make unsafe string
449	}
450	return string(b)
451}
452
453func toString(b []byte) string {
454	type stringHeader struct {
455		str unsafe.Pointer
456		len int
457	}
458
459	if len(b) == 0 {
460		return ""
461	}
462	ss := stringHeader{str: unsafe.Pointer(&b[0]), len: len(b)}
463	s := *(*string)(unsafe.Pointer(&ss))
464	return s
465}
466
467func (r *Reader) StringRef(off uint32) string {
468	return r.StringAt(r.uint32At(off))
469}
470
471func (r *Reader) Autolib() []string {
472	n := (r.h.Offsets[BlkAutolib+1] - r.h.Offsets[BlkAutolib]) / 4
473	s := make([]string, n)
474	for i := range s {
475		off := r.h.Offsets[BlkAutolib] + uint32(i)*4
476		s[i] = r.StringRef(off)
477	}
478	return s
479}
480
481func (r *Reader) Pkglist() []string {
482	n := (r.h.Offsets[BlkPkgIdx+1] - r.h.Offsets[BlkPkgIdx]) / 4
483	s := make([]string, n)
484	for i := range s {
485		off := r.h.Offsets[BlkPkgIdx] + uint32(i)*4
486		s[i] = r.StringRef(off)
487	}
488	return s
489}
490
491func (r *Reader) NPkg() int {
492	return int(r.h.Offsets[BlkPkgIdx+1]-r.h.Offsets[BlkPkgIdx]) / 4
493}
494
495func (r *Reader) Pkg(i int) string {
496	off := r.h.Offsets[BlkPkgIdx] + uint32(i)*4
497	return r.StringRef(off)
498}
499
500func (r *Reader) NDwarfFile() int {
501	return int(r.h.Offsets[BlkDwarfFile+1]-r.h.Offsets[BlkDwarfFile]) / 4
502}
503
504func (r *Reader) DwarfFile(i int) string {
505	off := r.h.Offsets[BlkDwarfFile] + uint32(i)*4
506	return r.StringRef(off)
507}
508
509func (r *Reader) NSym() int {
510	symsiz := (&Sym{}).Size()
511	return int(r.h.Offsets[BlkSymdef+1]-r.h.Offsets[BlkSymdef]) / symsiz
512}
513
514func (r *Reader) NNonpkgdef() int {
515	symsiz := (&Sym{}).Size()
516	return int(r.h.Offsets[BlkNonpkgdef+1]-r.h.Offsets[BlkNonpkgdef]) / symsiz
517}
518
519func (r *Reader) NNonpkgref() int {
520	symsiz := (&Sym{}).Size()
521	return int(r.h.Offsets[BlkNonpkgref+1]-r.h.Offsets[BlkNonpkgref]) / symsiz
522}
523
524// SymOff returns the offset of the i-th symbol.
525func (r *Reader) SymOff(i int) uint32 {
526	symsiz := (&Sym{}).Size()
527	return r.h.Offsets[BlkSymdef] + uint32(i*symsiz)
528}
529
530// NReloc returns the number of relocations of the i-th symbol.
531func (r *Reader) NReloc(i int) int {
532	relocIdxOff := r.h.Offsets[BlkRelocIdx] + uint32(i*4)
533	return int(r.uint32At(relocIdxOff+4) - r.uint32At(relocIdxOff))
534}
535
536// RelocOff returns the offset of the j-th relocation of the i-th symbol.
537func (r *Reader) RelocOff(i int, j int) uint32 {
538	relocIdxOff := r.h.Offsets[BlkRelocIdx] + uint32(i*4)
539	relocIdx := r.uint32At(relocIdxOff)
540	relocsiz := (&Reloc{}).Size()
541	return r.h.Offsets[BlkReloc] + (relocIdx+uint32(j))*uint32(relocsiz)
542}
543
544// NAux returns the number of aux symbols of the i-th symbol.
545func (r *Reader) NAux(i int) int {
546	auxIdxOff := r.h.Offsets[BlkAuxIdx] + uint32(i*4)
547	return int(r.uint32At(auxIdxOff+4) - r.uint32At(auxIdxOff))
548}
549
550// AuxOff returns the offset of the j-th aux symbol of the i-th symbol.
551func (r *Reader) AuxOff(i int, j int) uint32 {
552	auxIdxOff := r.h.Offsets[BlkAuxIdx] + uint32(i*4)
553	auxIdx := r.uint32At(auxIdxOff)
554	auxsiz := (&Aux{}).Size()
555	return r.h.Offsets[BlkAux] + (auxIdx+uint32(j))*uint32(auxsiz)
556}
557
558// DataOff returns the offset of the i-th symbol's data.
559func (r *Reader) DataOff(i int) uint32 {
560	dataIdxOff := r.h.Offsets[BlkDataIdx] + uint32(i*4)
561	return r.h.Offsets[BlkData] + r.uint32At(dataIdxOff)
562}
563
564// DataSize returns the size of the i-th symbol's data.
565func (r *Reader) DataSize(i int) int {
566	return int(r.DataOff(i+1) - r.DataOff(i))
567}
568
569// Data returns the i-th symbol's data.
570func (r *Reader) Data(i int) []byte {
571	return r.BytesAt(r.DataOff(i), r.DataSize(i))
572}
573
574// AuxDataBase returns the base offset of the aux data block.
575func (r *Reader) PcdataBase() uint32 {
576	return r.h.Offsets[BlkPcdata]
577}
578
579// ReadOnly returns whether r.BytesAt returns read-only bytes.
580func (r *Reader) ReadOnly() bool {
581	return r.readonly
582}
583
584// Flags returns the flag bits read from the object file header.
585func (r *Reader) Flags() uint32 {
586	return r.h.Flags
587}
588