1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Binary package export.
6// This file was derived from $GOROOT/src/cmd/compile/internal/gc/bexport.go;
7// see that file for specification of the format.
8
9package gcimporter
10
11import (
12	"bytes"
13	"encoding/binary"
14	"fmt"
15	"go/ast"
16	"go/constant"
17	"go/token"
18	"go/types"
19	"math"
20	"math/big"
21	"sort"
22	"strings"
23)
24
25// If debugFormat is set, each integer and string value is preceded by a marker
26// and position information in the encoding. This mechanism permits an importer
27// to recognize immediately when it is out of sync. The importer recognizes this
28// mode automatically (i.e., it can import export data produced with debugging
29// support even if debugFormat is not set at the time of import). This mode will
30// lead to massively larger export data (by a factor of 2 to 3) and should only
31// be enabled during development and debugging.
32//
33// NOTE: This flag is the first flag to enable if importing dies because of
34// (suspected) format errors, and whenever a change is made to the format.
35const debugFormat = false // default: false
36
37// If trace is set, debugging output is printed to std out.
38const trace = false // default: false
39
40// Current export format version. Increase with each format change.
41// Note: The latest binary (non-indexed) export format is at version 6.
42//       This exporter is still at level 4, but it doesn't matter since
43//       the binary importer can handle older versions just fine.
44// 6: package height (CL 105038) -- NOT IMPLEMENTED HERE
45// 5: improved position encoding efficiency (issue 20080, CL 41619) -- NOT IMPLEMEMTED HERE
46// 4: type name objects support type aliases, uses aliasTag
47// 3: Go1.8 encoding (same as version 2, aliasTag defined but never used)
48// 2: removed unused bool in ODCL export (compiler only)
49// 1: header format change (more regular), export package for _ struct fields
50// 0: Go1.7 encoding
51const exportVersion = 4
52
53// trackAllTypes enables cycle tracking for all types, not just named
54// types. The existing compiler invariants assume that unnamed types
55// that are not completely set up are not used, or else there are spurious
56// errors.
57// If disabled, only named types are tracked, possibly leading to slightly
58// less efficient encoding in rare cases. It also prevents the export of
59// some corner-case type declarations (but those are not handled correctly
60// with with the textual export format either).
61// TODO(gri) enable and remove once issues caused by it are fixed
62const trackAllTypes = false
63
64type exporter struct {
65	fset *token.FileSet
66	out  bytes.Buffer
67
68	// object -> index maps, indexed in order of serialization
69	strIndex map[string]int
70	pkgIndex map[*types.Package]int
71	typIndex map[types.Type]int
72
73	// position encoding
74	posInfoFormat bool
75	prevFile      string
76	prevLine      int
77
78	// debugging support
79	written int // bytes written
80	indent  int // for trace
81}
82
83// internalError represents an error generated inside this package.
84type internalError string
85
86func (e internalError) Error() string { return "gcimporter: " + string(e) }
87
88func internalErrorf(format string, args ...interface{}) error {
89	return internalError(fmt.Sprintf(format, args...))
90}
91
92// BExportData returns binary export data for pkg.
93// If no file set is provided, position info will be missing.
94func BExportData(fset *token.FileSet, pkg *types.Package) (b []byte, err error) {
95	defer func() {
96		if e := recover(); e != nil {
97			if ierr, ok := e.(internalError); ok {
98				err = ierr
99				return
100			}
101			// Not an internal error; panic again.
102			panic(e)
103		}
104	}()
105
106	p := exporter{
107		fset:          fset,
108		strIndex:      map[string]int{"": 0}, // empty string is mapped to 0
109		pkgIndex:      make(map[*types.Package]int),
110		typIndex:      make(map[types.Type]int),
111		posInfoFormat: true, // TODO(gri) might become a flag, eventually
112	}
113
114	// write version info
115	// The version string must start with "version %d" where %d is the version
116	// number. Additional debugging information may follow after a blank; that
117	// text is ignored by the importer.
118	p.rawStringln(fmt.Sprintf("version %d", exportVersion))
119	var debug string
120	if debugFormat {
121		debug = "debug"
122	}
123	p.rawStringln(debug) // cannot use p.bool since it's affected by debugFormat; also want to see this clearly
124	p.bool(trackAllTypes)
125	p.bool(p.posInfoFormat)
126
127	// --- generic export data ---
128
129	// populate type map with predeclared "known" types
130	for index, typ := range predeclared() {
131		p.typIndex[typ] = index
132	}
133	if len(p.typIndex) != len(predeclared()) {
134		return nil, internalError("duplicate entries in type map?")
135	}
136
137	// write package data
138	p.pkg(pkg, true)
139	if trace {
140		p.tracef("\n")
141	}
142
143	// write objects
144	objcount := 0
145	scope := pkg.Scope()
146	for _, name := range scope.Names() {
147		if !ast.IsExported(name) {
148			continue
149		}
150		if trace {
151			p.tracef("\n")
152		}
153		p.obj(scope.Lookup(name))
154		objcount++
155	}
156
157	// indicate end of list
158	if trace {
159		p.tracef("\n")
160	}
161	p.tag(endTag)
162
163	// for self-verification only (redundant)
164	p.int(objcount)
165
166	if trace {
167		p.tracef("\n")
168	}
169
170	// --- end of export data ---
171
172	return p.out.Bytes(), nil
173}
174
175func (p *exporter) pkg(pkg *types.Package, emptypath bool) {
176	if pkg == nil {
177		panic(internalError("unexpected nil pkg"))
178	}
179
180	// if we saw the package before, write its index (>= 0)
181	if i, ok := p.pkgIndex[pkg]; ok {
182		p.index('P', i)
183		return
184	}
185
186	// otherwise, remember the package, write the package tag (< 0) and package data
187	if trace {
188		p.tracef("P%d = { ", len(p.pkgIndex))
189		defer p.tracef("} ")
190	}
191	p.pkgIndex[pkg] = len(p.pkgIndex)
192
193	p.tag(packageTag)
194	p.string(pkg.Name())
195	if emptypath {
196		p.string("")
197	} else {
198		p.string(pkg.Path())
199	}
200}
201
202func (p *exporter) obj(obj types.Object) {
203	switch obj := obj.(type) {
204	case *types.Const:
205		p.tag(constTag)
206		p.pos(obj)
207		p.qualifiedName(obj)
208		p.typ(obj.Type())
209		p.value(obj.Val())
210
211	case *types.TypeName:
212		if obj.IsAlias() {
213			p.tag(aliasTag)
214			p.pos(obj)
215			p.qualifiedName(obj)
216		} else {
217			p.tag(typeTag)
218		}
219		p.typ(obj.Type())
220
221	case *types.Var:
222		p.tag(varTag)
223		p.pos(obj)
224		p.qualifiedName(obj)
225		p.typ(obj.Type())
226
227	case *types.Func:
228		p.tag(funcTag)
229		p.pos(obj)
230		p.qualifiedName(obj)
231		sig := obj.Type().(*types.Signature)
232		p.paramList(sig.Params(), sig.Variadic())
233		p.paramList(sig.Results(), false)
234
235	default:
236		panic(internalErrorf("unexpected object %v (%T)", obj, obj))
237	}
238}
239
240func (p *exporter) pos(obj types.Object) {
241	if !p.posInfoFormat {
242		return
243	}
244
245	file, line := p.fileLine(obj)
246	if file == p.prevFile {
247		// common case: write line delta
248		// delta == 0 means different file or no line change
249		delta := line - p.prevLine
250		p.int(delta)
251		if delta == 0 {
252			p.int(-1) // -1 means no file change
253		}
254	} else {
255		// different file
256		p.int(0)
257		// Encode filename as length of common prefix with previous
258		// filename, followed by (possibly empty) suffix. Filenames
259		// frequently share path prefixes, so this can save a lot
260		// of space and make export data size less dependent on file
261		// path length. The suffix is unlikely to be empty because
262		// file names tend to end in ".go".
263		n := commonPrefixLen(p.prevFile, file)
264		p.int(n)           // n >= 0
265		p.string(file[n:]) // write suffix only
266		p.prevFile = file
267		p.int(line)
268	}
269	p.prevLine = line
270}
271
272func (p *exporter) fileLine(obj types.Object) (file string, line int) {
273	if p.fset != nil {
274		pos := p.fset.Position(obj.Pos())
275		file = pos.Filename
276		line = pos.Line
277	}
278	return
279}
280
281func commonPrefixLen(a, b string) int {
282	if len(a) > len(b) {
283		a, b = b, a
284	}
285	// len(a) <= len(b)
286	i := 0
287	for i < len(a) && a[i] == b[i] {
288		i++
289	}
290	return i
291}
292
293func (p *exporter) qualifiedName(obj types.Object) {
294	p.string(obj.Name())
295	p.pkg(obj.Pkg(), false)
296}
297
298func (p *exporter) typ(t types.Type) {
299	if t == nil {
300		panic(internalError("nil type"))
301	}
302
303	// Possible optimization: Anonymous pointer types *T where
304	// T is a named type are common. We could canonicalize all
305	// such types *T to a single type PT = *T. This would lead
306	// to at most one *T entry in typIndex, and all future *T's
307	// would be encoded as the respective index directly. Would
308	// save 1 byte (pointerTag) per *T and reduce the typIndex
309	// size (at the cost of a canonicalization map). We can do
310	// this later, without encoding format change.
311
312	// if we saw the type before, write its index (>= 0)
313	if i, ok := p.typIndex[t]; ok {
314		p.index('T', i)
315		return
316	}
317
318	// otherwise, remember the type, write the type tag (< 0) and type data
319	if trackAllTypes {
320		if trace {
321			p.tracef("T%d = {>\n", len(p.typIndex))
322			defer p.tracef("<\n} ")
323		}
324		p.typIndex[t] = len(p.typIndex)
325	}
326
327	switch t := t.(type) {
328	case *types.Named:
329		if !trackAllTypes {
330			// if we don't track all types, track named types now
331			p.typIndex[t] = len(p.typIndex)
332		}
333
334		p.tag(namedTag)
335		p.pos(t.Obj())
336		p.qualifiedName(t.Obj())
337		p.typ(t.Underlying())
338		if !types.IsInterface(t) {
339			p.assocMethods(t)
340		}
341
342	case *types.Array:
343		p.tag(arrayTag)
344		p.int64(t.Len())
345		p.typ(t.Elem())
346
347	case *types.Slice:
348		p.tag(sliceTag)
349		p.typ(t.Elem())
350
351	case *dddSlice:
352		p.tag(dddTag)
353		p.typ(t.elem)
354
355	case *types.Struct:
356		p.tag(structTag)
357		p.fieldList(t)
358
359	case *types.Pointer:
360		p.tag(pointerTag)
361		p.typ(t.Elem())
362
363	case *types.Signature:
364		p.tag(signatureTag)
365		p.paramList(t.Params(), t.Variadic())
366		p.paramList(t.Results(), false)
367
368	case *types.Interface:
369		p.tag(interfaceTag)
370		p.iface(t)
371
372	case *types.Map:
373		p.tag(mapTag)
374		p.typ(t.Key())
375		p.typ(t.Elem())
376
377	case *types.Chan:
378		p.tag(chanTag)
379		p.int(int(3 - t.Dir())) // hack
380		p.typ(t.Elem())
381
382	default:
383		panic(internalErrorf("unexpected type %T: %s", t, t))
384	}
385}
386
387func (p *exporter) assocMethods(named *types.Named) {
388	// Sort methods (for determinism).
389	var methods []*types.Func
390	for i := 0; i < named.NumMethods(); i++ {
391		methods = append(methods, named.Method(i))
392	}
393	sort.Sort(methodsByName(methods))
394
395	p.int(len(methods))
396
397	if trace && methods != nil {
398		p.tracef("associated methods {>\n")
399	}
400
401	for i, m := range methods {
402		if trace && i > 0 {
403			p.tracef("\n")
404		}
405
406		p.pos(m)
407		name := m.Name()
408		p.string(name)
409		if !exported(name) {
410			p.pkg(m.Pkg(), false)
411		}
412
413		sig := m.Type().(*types.Signature)
414		p.paramList(types.NewTuple(sig.Recv()), false)
415		p.paramList(sig.Params(), sig.Variadic())
416		p.paramList(sig.Results(), false)
417		p.int(0) // dummy value for go:nointerface pragma - ignored by importer
418	}
419
420	if trace && methods != nil {
421		p.tracef("<\n} ")
422	}
423}
424
425type methodsByName []*types.Func
426
427func (x methodsByName) Len() int           { return len(x) }
428func (x methodsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
429func (x methodsByName) Less(i, j int) bool { return x[i].Name() < x[j].Name() }
430
431func (p *exporter) fieldList(t *types.Struct) {
432	if trace && t.NumFields() > 0 {
433		p.tracef("fields {>\n")
434		defer p.tracef("<\n} ")
435	}
436
437	p.int(t.NumFields())
438	for i := 0; i < t.NumFields(); i++ {
439		if trace && i > 0 {
440			p.tracef("\n")
441		}
442		p.field(t.Field(i))
443		p.string(t.Tag(i))
444	}
445}
446
447func (p *exporter) field(f *types.Var) {
448	if !f.IsField() {
449		panic(internalError("field expected"))
450	}
451
452	p.pos(f)
453	p.fieldName(f)
454	p.typ(f.Type())
455}
456
457func (p *exporter) iface(t *types.Interface) {
458	// TODO(gri): enable importer to load embedded interfaces,
459	// then emit Embeddeds and ExplicitMethods separately here.
460	p.int(0)
461
462	n := t.NumMethods()
463	if trace && n > 0 {
464		p.tracef("methods {>\n")
465		defer p.tracef("<\n} ")
466	}
467	p.int(n)
468	for i := 0; i < n; i++ {
469		if trace && i > 0 {
470			p.tracef("\n")
471		}
472		p.method(t.Method(i))
473	}
474}
475
476func (p *exporter) method(m *types.Func) {
477	sig := m.Type().(*types.Signature)
478	if sig.Recv() == nil {
479		panic(internalError("method expected"))
480	}
481
482	p.pos(m)
483	p.string(m.Name())
484	if m.Name() != "_" && !ast.IsExported(m.Name()) {
485		p.pkg(m.Pkg(), false)
486	}
487
488	// interface method; no need to encode receiver.
489	p.paramList(sig.Params(), sig.Variadic())
490	p.paramList(sig.Results(), false)
491}
492
493func (p *exporter) fieldName(f *types.Var) {
494	name := f.Name()
495
496	if f.Anonymous() {
497		// anonymous field - we distinguish between 3 cases:
498		// 1) field name matches base type name and is exported
499		// 2) field name matches base type name and is not exported
500		// 3) field name doesn't match base type name (alias name)
501		bname := basetypeName(f.Type())
502		if name == bname {
503			if ast.IsExported(name) {
504				name = "" // 1) we don't need to know the field name or package
505			} else {
506				name = "?" // 2) use unexported name "?" to force package export
507			}
508		} else {
509			// 3) indicate alias and export name as is
510			// (this requires an extra "@" but this is a rare case)
511			p.string("@")
512		}
513	}
514
515	p.string(name)
516	if name != "" && !ast.IsExported(name) {
517		p.pkg(f.Pkg(), false)
518	}
519}
520
521func basetypeName(typ types.Type) string {
522	switch typ := deref(typ).(type) {
523	case *types.Basic:
524		return typ.Name()
525	case *types.Named:
526		return typ.Obj().Name()
527	default:
528		return "" // unnamed type
529	}
530}
531
532func (p *exporter) paramList(params *types.Tuple, variadic bool) {
533	// use negative length to indicate unnamed parameters
534	// (look at the first parameter only since either all
535	// names are present or all are absent)
536	n := params.Len()
537	if n > 0 && params.At(0).Name() == "" {
538		n = -n
539	}
540	p.int(n)
541	for i := 0; i < params.Len(); i++ {
542		q := params.At(i)
543		t := q.Type()
544		if variadic && i == params.Len()-1 {
545			t = &dddSlice{t.(*types.Slice).Elem()}
546		}
547		p.typ(t)
548		if n > 0 {
549			name := q.Name()
550			p.string(name)
551			if name != "_" {
552				p.pkg(q.Pkg(), false)
553			}
554		}
555		p.string("") // no compiler-specific info
556	}
557}
558
559func (p *exporter) value(x constant.Value) {
560	if trace {
561		p.tracef("= ")
562	}
563
564	switch x.Kind() {
565	case constant.Bool:
566		tag := falseTag
567		if constant.BoolVal(x) {
568			tag = trueTag
569		}
570		p.tag(tag)
571
572	case constant.Int:
573		if v, exact := constant.Int64Val(x); exact {
574			// common case: x fits into an int64 - use compact encoding
575			p.tag(int64Tag)
576			p.int64(v)
577			return
578		}
579		// uncommon case: large x - use float encoding
580		// (powers of 2 will be encoded efficiently with exponent)
581		p.tag(floatTag)
582		p.float(constant.ToFloat(x))
583
584	case constant.Float:
585		p.tag(floatTag)
586		p.float(x)
587
588	case constant.Complex:
589		p.tag(complexTag)
590		p.float(constant.Real(x))
591		p.float(constant.Imag(x))
592
593	case constant.String:
594		p.tag(stringTag)
595		p.string(constant.StringVal(x))
596
597	case constant.Unknown:
598		// package contains type errors
599		p.tag(unknownTag)
600
601	default:
602		panic(internalErrorf("unexpected value %v (%T)", x, x))
603	}
604}
605
606func (p *exporter) float(x constant.Value) {
607	if x.Kind() != constant.Float {
608		panic(internalErrorf("unexpected constant %v, want float", x))
609	}
610	// extract sign (there is no -0)
611	sign := constant.Sign(x)
612	if sign == 0 {
613		// x == 0
614		p.int(0)
615		return
616	}
617	// x != 0
618
619	var f big.Float
620	if v, exact := constant.Float64Val(x); exact {
621		// float64
622		f.SetFloat64(v)
623	} else if num, denom := constant.Num(x), constant.Denom(x); num.Kind() == constant.Int {
624		// TODO(gri): add big.Rat accessor to constant.Value.
625		r := valueToRat(num)
626		f.SetRat(r.Quo(r, valueToRat(denom)))
627	} else {
628		// Value too large to represent as a fraction => inaccessible.
629		// TODO(gri): add big.Float accessor to constant.Value.
630		f.SetFloat64(math.MaxFloat64) // FIXME
631	}
632
633	// extract exponent such that 0.5 <= m < 1.0
634	var m big.Float
635	exp := f.MantExp(&m)
636
637	// extract mantissa as *big.Int
638	// - set exponent large enough so mant satisfies mant.IsInt()
639	// - get *big.Int from mant
640	m.SetMantExp(&m, int(m.MinPrec()))
641	mant, acc := m.Int(nil)
642	if acc != big.Exact {
643		panic(internalError("internal error"))
644	}
645
646	p.int(sign)
647	p.int(exp)
648	p.string(string(mant.Bytes()))
649}
650
651func valueToRat(x constant.Value) *big.Rat {
652	// Convert little-endian to big-endian.
653	// I can't believe this is necessary.
654	bytes := constant.Bytes(x)
655	for i := 0; i < len(bytes)/2; i++ {
656		bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
657	}
658	return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
659}
660
661func (p *exporter) bool(b bool) bool {
662	if trace {
663		p.tracef("[")
664		defer p.tracef("= %v] ", b)
665	}
666
667	x := 0
668	if b {
669		x = 1
670	}
671	p.int(x)
672	return b
673}
674
675// ----------------------------------------------------------------------------
676// Low-level encoders
677
678func (p *exporter) index(marker byte, index int) {
679	if index < 0 {
680		panic(internalError("invalid index < 0"))
681	}
682	if debugFormat {
683		p.marker('t')
684	}
685	if trace {
686		p.tracef("%c%d ", marker, index)
687	}
688	p.rawInt64(int64(index))
689}
690
691func (p *exporter) tag(tag int) {
692	if tag >= 0 {
693		panic(internalError("invalid tag >= 0"))
694	}
695	if debugFormat {
696		p.marker('t')
697	}
698	if trace {
699		p.tracef("%s ", tagString[-tag])
700	}
701	p.rawInt64(int64(tag))
702}
703
704func (p *exporter) int(x int) {
705	p.int64(int64(x))
706}
707
708func (p *exporter) int64(x int64) {
709	if debugFormat {
710		p.marker('i')
711	}
712	if trace {
713		p.tracef("%d ", x)
714	}
715	p.rawInt64(x)
716}
717
718func (p *exporter) string(s string) {
719	if debugFormat {
720		p.marker('s')
721	}
722	if trace {
723		p.tracef("%q ", s)
724	}
725	// if we saw the string before, write its index (>= 0)
726	// (the empty string is mapped to 0)
727	if i, ok := p.strIndex[s]; ok {
728		p.rawInt64(int64(i))
729		return
730	}
731	// otherwise, remember string and write its negative length and bytes
732	p.strIndex[s] = len(p.strIndex)
733	p.rawInt64(-int64(len(s)))
734	for i := 0; i < len(s); i++ {
735		p.rawByte(s[i])
736	}
737}
738
739// marker emits a marker byte and position information which makes
740// it easy for a reader to detect if it is "out of sync". Used for
741// debugFormat format only.
742func (p *exporter) marker(m byte) {
743	p.rawByte(m)
744	// Enable this for help tracking down the location
745	// of an incorrect marker when running in debugFormat.
746	if false && trace {
747		p.tracef("#%d ", p.written)
748	}
749	p.rawInt64(int64(p.written))
750}
751
752// rawInt64 should only be used by low-level encoders.
753func (p *exporter) rawInt64(x int64) {
754	var tmp [binary.MaxVarintLen64]byte
755	n := binary.PutVarint(tmp[:], x)
756	for i := 0; i < n; i++ {
757		p.rawByte(tmp[i])
758	}
759}
760
761// rawStringln should only be used to emit the initial version string.
762func (p *exporter) rawStringln(s string) {
763	for i := 0; i < len(s); i++ {
764		p.rawByte(s[i])
765	}
766	p.rawByte('\n')
767}
768
769// rawByte is the bottleneck interface to write to p.out.
770// rawByte escapes b as follows (any encoding does that
771// hides '$'):
772//
773//	'$'  => '|' 'S'
774//	'|'  => '|' '|'
775//
776// Necessary so other tools can find the end of the
777// export data by searching for "$$".
778// rawByte should only be used by low-level encoders.
779func (p *exporter) rawByte(b byte) {
780	switch b {
781	case '$':
782		// write '$' as '|' 'S'
783		b = 'S'
784		fallthrough
785	case '|':
786		// write '|' as '|' '|'
787		p.out.WriteByte('|')
788		p.written++
789	}
790	p.out.WriteByte(b)
791	p.written++
792}
793
794// tracef is like fmt.Printf but it rewrites the format string
795// to take care of indentation.
796func (p *exporter) tracef(format string, args ...interface{}) {
797	if strings.ContainsAny(format, "<>\n") {
798		var buf bytes.Buffer
799		for i := 0; i < len(format); i++ {
800			// no need to deal with runes
801			ch := format[i]
802			switch ch {
803			case '>':
804				p.indent++
805				continue
806			case '<':
807				p.indent--
808				continue
809			}
810			buf.WriteByte(ch)
811			if ch == '\n' {
812				for j := p.indent; j > 0; j-- {
813					buf.WriteString(".  ")
814				}
815			}
816		}
817		format = buf.String()
818	}
819	fmt.Printf(format, args...)
820}
821
822// Debugging support.
823// (tagString is only used when tracing is enabled)
824var tagString = [...]string{
825	// Packages
826	-packageTag: "package",
827
828	// Types
829	-namedTag:     "named type",
830	-arrayTag:     "array",
831	-sliceTag:     "slice",
832	-dddTag:       "ddd",
833	-structTag:    "struct",
834	-pointerTag:   "pointer",
835	-signatureTag: "signature",
836	-interfaceTag: "interface",
837	-mapTag:       "map",
838	-chanTag:      "chan",
839
840	// Values
841	-falseTag:    "false",
842	-trueTag:     "true",
843	-int64Tag:    "int64",
844	-floatTag:    "float",
845	-fractionTag: "fraction",
846	-complexTag:  "complex",
847	-stringTag:   "string",
848	-unknownTag:  "unknown",
849
850	// Type aliases
851	-aliasTag: "alias",
852}
853