1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Binary package export.
6// This file was derived from $GOROOT/src/cmd/compile/internal/gc/bexport.go;
7// see that file for specification of the format.
8
9package gcimporter
10
11import (
12	"bytes"
13	"encoding/binary"
14	"fmt"
15	"go/ast"
16	"go/constant"
17	"go/token"
18	"go/types"
19	"math"
20	"math/big"
21	"sort"
22	"strings"
23)
24
25// If debugFormat is set, each integer and string value is preceded by a marker
26// and position information in the encoding. This mechanism permits an importer
27// to recognize immediately when it is out of sync. The importer recognizes this
28// mode automatically (i.e., it can import export data produced with debugging
29// support even if debugFormat is not set at the time of import). This mode will
30// lead to massively larger export data (by a factor of 2 to 3) and should only
31// be enabled during development and debugging.
32//
33// NOTE: This flag is the first flag to enable if importing dies because of
34// (suspected) format errors, and whenever a change is made to the format.
35const debugFormat = false // default: false
36
37// Current export format version. Increase with each format change.
38// Note: The latest binary (non-indexed) export format is at version 6.
39//       This exporter is still at level 4, but it doesn't matter since
40//       the binary importer can handle older versions just fine.
41// 6: package height (CL 105038) -- NOT IMPLEMENTED HERE
42// 5: improved position encoding efficiency (issue 20080, CL 41619) -- NOT IMPLEMEMTED HERE
43// 4: type name objects support type aliases, uses aliasTag
44// 3: Go1.8 encoding (same as version 2, aliasTag defined but never used)
45// 2: removed unused bool in ODCL export (compiler only)
46// 1: header format change (more regular), export package for _ struct fields
47// 0: Go1.7 encoding
48const exportVersion = 4
49
50// trackAllTypes enables cycle tracking for all types, not just named
51// types. The existing compiler invariants assume that unnamed types
52// that are not completely set up are not used, or else there are spurious
53// errors.
54// If disabled, only named types are tracked, possibly leading to slightly
55// less efficient encoding in rare cases. It also prevents the export of
56// some corner-case type declarations (but those are not handled correctly
57// with with the textual export format either).
58// TODO(gri) enable and remove once issues caused by it are fixed
59const trackAllTypes = false
60
61type exporter struct {
62	fset *token.FileSet
63	out  bytes.Buffer
64
65	// object -> index maps, indexed in order of serialization
66	strIndex map[string]int
67	pkgIndex map[*types.Package]int
68	typIndex map[types.Type]int
69
70	// position encoding
71	posInfoFormat bool
72	prevFile      string
73	prevLine      int
74
75	// debugging support
76	written int // bytes written
77	indent  int // for trace
78}
79
80// internalError represents an error generated inside this package.
81type internalError string
82
83func (e internalError) Error() string { return "gcimporter: " + string(e) }
84
85func internalErrorf(format string, args ...interface{}) error {
86	return internalError(fmt.Sprintf(format, args...))
87}
88
89// BExportData returns binary export data for pkg.
90// If no file set is provided, position info will be missing.
91func BExportData(fset *token.FileSet, pkg *types.Package) (b []byte, err error) {
92	if !debug {
93		defer func() {
94			if e := recover(); e != nil {
95				if ierr, ok := e.(internalError); ok {
96					err = ierr
97					return
98				}
99				// Not an internal error; panic again.
100				panic(e)
101			}
102		}()
103	}
104
105	p := exporter{
106		fset:          fset,
107		strIndex:      map[string]int{"": 0}, // empty string is mapped to 0
108		pkgIndex:      make(map[*types.Package]int),
109		typIndex:      make(map[types.Type]int),
110		posInfoFormat: true, // TODO(gri) might become a flag, eventually
111	}
112
113	// write version info
114	// The version string must start with "version %d" where %d is the version
115	// number. Additional debugging information may follow after a blank; that
116	// text is ignored by the importer.
117	p.rawStringln(fmt.Sprintf("version %d", exportVersion))
118	var debug string
119	if debugFormat {
120		debug = "debug"
121	}
122	p.rawStringln(debug) // cannot use p.bool since it's affected by debugFormat; also want to see this clearly
123	p.bool(trackAllTypes)
124	p.bool(p.posInfoFormat)
125
126	// --- generic export data ---
127
128	// populate type map with predeclared "known" types
129	for index, typ := range predeclared() {
130		p.typIndex[typ] = index
131	}
132	if len(p.typIndex) != len(predeclared()) {
133		return nil, internalError("duplicate entries in type map?")
134	}
135
136	// write package data
137	p.pkg(pkg, true)
138	if trace {
139		p.tracef("\n")
140	}
141
142	// write objects
143	objcount := 0
144	scope := pkg.Scope()
145	for _, name := range scope.Names() {
146		if !ast.IsExported(name) {
147			continue
148		}
149		if trace {
150			p.tracef("\n")
151		}
152		p.obj(scope.Lookup(name))
153		objcount++
154	}
155
156	// indicate end of list
157	if trace {
158		p.tracef("\n")
159	}
160	p.tag(endTag)
161
162	// for self-verification only (redundant)
163	p.int(objcount)
164
165	if trace {
166		p.tracef("\n")
167	}
168
169	// --- end of export data ---
170
171	return p.out.Bytes(), nil
172}
173
174func (p *exporter) pkg(pkg *types.Package, emptypath bool) {
175	if pkg == nil {
176		panic(internalError("unexpected nil pkg"))
177	}
178
179	// if we saw the package before, write its index (>= 0)
180	if i, ok := p.pkgIndex[pkg]; ok {
181		p.index('P', i)
182		return
183	}
184
185	// otherwise, remember the package, write the package tag (< 0) and package data
186	if trace {
187		p.tracef("P%d = { ", len(p.pkgIndex))
188		defer p.tracef("} ")
189	}
190	p.pkgIndex[pkg] = len(p.pkgIndex)
191
192	p.tag(packageTag)
193	p.string(pkg.Name())
194	if emptypath {
195		p.string("")
196	} else {
197		p.string(pkg.Path())
198	}
199}
200
201func (p *exporter) obj(obj types.Object) {
202	switch obj := obj.(type) {
203	case *types.Const:
204		p.tag(constTag)
205		p.pos(obj)
206		p.qualifiedName(obj)
207		p.typ(obj.Type())
208		p.value(obj.Val())
209
210	case *types.TypeName:
211		if obj.IsAlias() {
212			p.tag(aliasTag)
213			p.pos(obj)
214			p.qualifiedName(obj)
215		} else {
216			p.tag(typeTag)
217		}
218		p.typ(obj.Type())
219
220	case *types.Var:
221		p.tag(varTag)
222		p.pos(obj)
223		p.qualifiedName(obj)
224		p.typ(obj.Type())
225
226	case *types.Func:
227		p.tag(funcTag)
228		p.pos(obj)
229		p.qualifiedName(obj)
230		sig := obj.Type().(*types.Signature)
231		p.paramList(sig.Params(), sig.Variadic())
232		p.paramList(sig.Results(), false)
233
234	default:
235		panic(internalErrorf("unexpected object %v (%T)", obj, obj))
236	}
237}
238
239func (p *exporter) pos(obj types.Object) {
240	if !p.posInfoFormat {
241		return
242	}
243
244	file, line := p.fileLine(obj)
245	if file == p.prevFile {
246		// common case: write line delta
247		// delta == 0 means different file or no line change
248		delta := line - p.prevLine
249		p.int(delta)
250		if delta == 0 {
251			p.int(-1) // -1 means no file change
252		}
253	} else {
254		// different file
255		p.int(0)
256		// Encode filename as length of common prefix with previous
257		// filename, followed by (possibly empty) suffix. Filenames
258		// frequently share path prefixes, so this can save a lot
259		// of space and make export data size less dependent on file
260		// path length. The suffix is unlikely to be empty because
261		// file names tend to end in ".go".
262		n := commonPrefixLen(p.prevFile, file)
263		p.int(n)           // n >= 0
264		p.string(file[n:]) // write suffix only
265		p.prevFile = file
266		p.int(line)
267	}
268	p.prevLine = line
269}
270
271func (p *exporter) fileLine(obj types.Object) (file string, line int) {
272	if p.fset != nil {
273		pos := p.fset.Position(obj.Pos())
274		file = pos.Filename
275		line = pos.Line
276	}
277	return
278}
279
280func commonPrefixLen(a, b string) int {
281	if len(a) > len(b) {
282		a, b = b, a
283	}
284	// len(a) <= len(b)
285	i := 0
286	for i < len(a) && a[i] == b[i] {
287		i++
288	}
289	return i
290}
291
292func (p *exporter) qualifiedName(obj types.Object) {
293	p.string(obj.Name())
294	p.pkg(obj.Pkg(), false)
295}
296
297func (p *exporter) typ(t types.Type) {
298	if t == nil {
299		panic(internalError("nil type"))
300	}
301
302	// Possible optimization: Anonymous pointer types *T where
303	// T is a named type are common. We could canonicalize all
304	// such types *T to a single type PT = *T. This would lead
305	// to at most one *T entry in typIndex, and all future *T's
306	// would be encoded as the respective index directly. Would
307	// save 1 byte (pointerTag) per *T and reduce the typIndex
308	// size (at the cost of a canonicalization map). We can do
309	// this later, without encoding format change.
310
311	// if we saw the type before, write its index (>= 0)
312	if i, ok := p.typIndex[t]; ok {
313		p.index('T', i)
314		return
315	}
316
317	// otherwise, remember the type, write the type tag (< 0) and type data
318	if trackAllTypes {
319		if trace {
320			p.tracef("T%d = {>\n", len(p.typIndex))
321			defer p.tracef("<\n} ")
322		}
323		p.typIndex[t] = len(p.typIndex)
324	}
325
326	switch t := t.(type) {
327	case *types.Named:
328		if !trackAllTypes {
329			// if we don't track all types, track named types now
330			p.typIndex[t] = len(p.typIndex)
331		}
332
333		p.tag(namedTag)
334		p.pos(t.Obj())
335		p.qualifiedName(t.Obj())
336		p.typ(t.Underlying())
337		if !types.IsInterface(t) {
338			p.assocMethods(t)
339		}
340
341	case *types.Array:
342		p.tag(arrayTag)
343		p.int64(t.Len())
344		p.typ(t.Elem())
345
346	case *types.Slice:
347		p.tag(sliceTag)
348		p.typ(t.Elem())
349
350	case *dddSlice:
351		p.tag(dddTag)
352		p.typ(t.elem)
353
354	case *types.Struct:
355		p.tag(structTag)
356		p.fieldList(t)
357
358	case *types.Pointer:
359		p.tag(pointerTag)
360		p.typ(t.Elem())
361
362	case *types.Signature:
363		p.tag(signatureTag)
364		p.paramList(t.Params(), t.Variadic())
365		p.paramList(t.Results(), false)
366
367	case *types.Interface:
368		p.tag(interfaceTag)
369		p.iface(t)
370
371	case *types.Map:
372		p.tag(mapTag)
373		p.typ(t.Key())
374		p.typ(t.Elem())
375
376	case *types.Chan:
377		p.tag(chanTag)
378		p.int(int(3 - t.Dir())) // hack
379		p.typ(t.Elem())
380
381	default:
382		panic(internalErrorf("unexpected type %T: %s", t, t))
383	}
384}
385
386func (p *exporter) assocMethods(named *types.Named) {
387	// Sort methods (for determinism).
388	var methods []*types.Func
389	for i := 0; i < named.NumMethods(); i++ {
390		methods = append(methods, named.Method(i))
391	}
392	sort.Sort(methodsByName(methods))
393
394	p.int(len(methods))
395
396	if trace && methods != nil {
397		p.tracef("associated methods {>\n")
398	}
399
400	for i, m := range methods {
401		if trace && i > 0 {
402			p.tracef("\n")
403		}
404
405		p.pos(m)
406		name := m.Name()
407		p.string(name)
408		if !exported(name) {
409			p.pkg(m.Pkg(), false)
410		}
411
412		sig := m.Type().(*types.Signature)
413		p.paramList(types.NewTuple(sig.Recv()), false)
414		p.paramList(sig.Params(), sig.Variadic())
415		p.paramList(sig.Results(), false)
416		p.int(0) // dummy value for go:nointerface pragma - ignored by importer
417	}
418
419	if trace && methods != nil {
420		p.tracef("<\n} ")
421	}
422}
423
424type methodsByName []*types.Func
425
426func (x methodsByName) Len() int           { return len(x) }
427func (x methodsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
428func (x methodsByName) Less(i, j int) bool { return x[i].Name() < x[j].Name() }
429
430func (p *exporter) fieldList(t *types.Struct) {
431	if trace && t.NumFields() > 0 {
432		p.tracef("fields {>\n")
433		defer p.tracef("<\n} ")
434	}
435
436	p.int(t.NumFields())
437	for i := 0; i < t.NumFields(); i++ {
438		if trace && i > 0 {
439			p.tracef("\n")
440		}
441		p.field(t.Field(i))
442		p.string(t.Tag(i))
443	}
444}
445
446func (p *exporter) field(f *types.Var) {
447	if !f.IsField() {
448		panic(internalError("field expected"))
449	}
450
451	p.pos(f)
452	p.fieldName(f)
453	p.typ(f.Type())
454}
455
456func (p *exporter) iface(t *types.Interface) {
457	// TODO(gri): enable importer to load embedded interfaces,
458	// then emit Embeddeds and ExplicitMethods separately here.
459	p.int(0)
460
461	n := t.NumMethods()
462	if trace && n > 0 {
463		p.tracef("methods {>\n")
464		defer p.tracef("<\n} ")
465	}
466	p.int(n)
467	for i := 0; i < n; i++ {
468		if trace && i > 0 {
469			p.tracef("\n")
470		}
471		p.method(t.Method(i))
472	}
473}
474
475func (p *exporter) method(m *types.Func) {
476	sig := m.Type().(*types.Signature)
477	if sig.Recv() == nil {
478		panic(internalError("method expected"))
479	}
480
481	p.pos(m)
482	p.string(m.Name())
483	if m.Name() != "_" && !ast.IsExported(m.Name()) {
484		p.pkg(m.Pkg(), false)
485	}
486
487	// interface method; no need to encode receiver.
488	p.paramList(sig.Params(), sig.Variadic())
489	p.paramList(sig.Results(), false)
490}
491
492func (p *exporter) fieldName(f *types.Var) {
493	name := f.Name()
494
495	if f.Anonymous() {
496		// anonymous field - we distinguish between 3 cases:
497		// 1) field name matches base type name and is exported
498		// 2) field name matches base type name and is not exported
499		// 3) field name doesn't match base type name (alias name)
500		bname := basetypeName(f.Type())
501		if name == bname {
502			if ast.IsExported(name) {
503				name = "" // 1) we don't need to know the field name or package
504			} else {
505				name = "?" // 2) use unexported name "?" to force package export
506			}
507		} else {
508			// 3) indicate alias and export name as is
509			// (this requires an extra "@" but this is a rare case)
510			p.string("@")
511		}
512	}
513
514	p.string(name)
515	if name != "" && !ast.IsExported(name) {
516		p.pkg(f.Pkg(), false)
517	}
518}
519
520func basetypeName(typ types.Type) string {
521	switch typ := deref(typ).(type) {
522	case *types.Basic:
523		return typ.Name()
524	case *types.Named:
525		return typ.Obj().Name()
526	default:
527		return "" // unnamed type
528	}
529}
530
531func (p *exporter) paramList(params *types.Tuple, variadic bool) {
532	// use negative length to indicate unnamed parameters
533	// (look at the first parameter only since either all
534	// names are present or all are absent)
535	n := params.Len()
536	if n > 0 && params.At(0).Name() == "" {
537		n = -n
538	}
539	p.int(n)
540	for i := 0; i < params.Len(); i++ {
541		q := params.At(i)
542		t := q.Type()
543		if variadic && i == params.Len()-1 {
544			t = &dddSlice{t.(*types.Slice).Elem()}
545		}
546		p.typ(t)
547		if n > 0 {
548			name := q.Name()
549			p.string(name)
550			if name != "_" {
551				p.pkg(q.Pkg(), false)
552			}
553		}
554		p.string("") // no compiler-specific info
555	}
556}
557
558func (p *exporter) value(x constant.Value) {
559	if trace {
560		p.tracef("= ")
561	}
562
563	switch x.Kind() {
564	case constant.Bool:
565		tag := falseTag
566		if constant.BoolVal(x) {
567			tag = trueTag
568		}
569		p.tag(tag)
570
571	case constant.Int:
572		if v, exact := constant.Int64Val(x); exact {
573			// common case: x fits into an int64 - use compact encoding
574			p.tag(int64Tag)
575			p.int64(v)
576			return
577		}
578		// uncommon case: large x - use float encoding
579		// (powers of 2 will be encoded efficiently with exponent)
580		p.tag(floatTag)
581		p.float(constant.ToFloat(x))
582
583	case constant.Float:
584		p.tag(floatTag)
585		p.float(x)
586
587	case constant.Complex:
588		p.tag(complexTag)
589		p.float(constant.Real(x))
590		p.float(constant.Imag(x))
591
592	case constant.String:
593		p.tag(stringTag)
594		p.string(constant.StringVal(x))
595
596	case constant.Unknown:
597		// package contains type errors
598		p.tag(unknownTag)
599
600	default:
601		panic(internalErrorf("unexpected value %v (%T)", x, x))
602	}
603}
604
605func (p *exporter) float(x constant.Value) {
606	if x.Kind() != constant.Float {
607		panic(internalErrorf("unexpected constant %v, want float", x))
608	}
609	// extract sign (there is no -0)
610	sign := constant.Sign(x)
611	if sign == 0 {
612		// x == 0
613		p.int(0)
614		return
615	}
616	// x != 0
617
618	var f big.Float
619	if v, exact := constant.Float64Val(x); exact {
620		// float64
621		f.SetFloat64(v)
622	} else if num, denom := constant.Num(x), constant.Denom(x); num.Kind() == constant.Int {
623		// TODO(gri): add big.Rat accessor to constant.Value.
624		r := valueToRat(num)
625		f.SetRat(r.Quo(r, valueToRat(denom)))
626	} else {
627		// Value too large to represent as a fraction => inaccessible.
628		// TODO(gri): add big.Float accessor to constant.Value.
629		f.SetFloat64(math.MaxFloat64) // FIXME
630	}
631
632	// extract exponent such that 0.5 <= m < 1.0
633	var m big.Float
634	exp := f.MantExp(&m)
635
636	// extract mantissa as *big.Int
637	// - set exponent large enough so mant satisfies mant.IsInt()
638	// - get *big.Int from mant
639	m.SetMantExp(&m, int(m.MinPrec()))
640	mant, acc := m.Int(nil)
641	if acc != big.Exact {
642		panic(internalError("internal error"))
643	}
644
645	p.int(sign)
646	p.int(exp)
647	p.string(string(mant.Bytes()))
648}
649
650func valueToRat(x constant.Value) *big.Rat {
651	// Convert little-endian to big-endian.
652	// I can't believe this is necessary.
653	bytes := constant.Bytes(x)
654	for i := 0; i < len(bytes)/2; i++ {
655		bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
656	}
657	return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
658}
659
660func (p *exporter) bool(b bool) bool {
661	if trace {
662		p.tracef("[")
663		defer p.tracef("= %v] ", b)
664	}
665
666	x := 0
667	if b {
668		x = 1
669	}
670	p.int(x)
671	return b
672}
673
674// ----------------------------------------------------------------------------
675// Low-level encoders
676
677func (p *exporter) index(marker byte, index int) {
678	if index < 0 {
679		panic(internalError("invalid index < 0"))
680	}
681	if debugFormat {
682		p.marker('t')
683	}
684	if trace {
685		p.tracef("%c%d ", marker, index)
686	}
687	p.rawInt64(int64(index))
688}
689
690func (p *exporter) tag(tag int) {
691	if tag >= 0 {
692		panic(internalError("invalid tag >= 0"))
693	}
694	if debugFormat {
695		p.marker('t')
696	}
697	if trace {
698		p.tracef("%s ", tagString[-tag])
699	}
700	p.rawInt64(int64(tag))
701}
702
703func (p *exporter) int(x int) {
704	p.int64(int64(x))
705}
706
707func (p *exporter) int64(x int64) {
708	if debugFormat {
709		p.marker('i')
710	}
711	if trace {
712		p.tracef("%d ", x)
713	}
714	p.rawInt64(x)
715}
716
717func (p *exporter) string(s string) {
718	if debugFormat {
719		p.marker('s')
720	}
721	if trace {
722		p.tracef("%q ", s)
723	}
724	// if we saw the string before, write its index (>= 0)
725	// (the empty string is mapped to 0)
726	if i, ok := p.strIndex[s]; ok {
727		p.rawInt64(int64(i))
728		return
729	}
730	// otherwise, remember string and write its negative length and bytes
731	p.strIndex[s] = len(p.strIndex)
732	p.rawInt64(-int64(len(s)))
733	for i := 0; i < len(s); i++ {
734		p.rawByte(s[i])
735	}
736}
737
738// marker emits a marker byte and position information which makes
739// it easy for a reader to detect if it is "out of sync". Used for
740// debugFormat format only.
741func (p *exporter) marker(m byte) {
742	p.rawByte(m)
743	// Enable this for help tracking down the location
744	// of an incorrect marker when running in debugFormat.
745	if false && trace {
746		p.tracef("#%d ", p.written)
747	}
748	p.rawInt64(int64(p.written))
749}
750
751// rawInt64 should only be used by low-level encoders.
752func (p *exporter) rawInt64(x int64) {
753	var tmp [binary.MaxVarintLen64]byte
754	n := binary.PutVarint(tmp[:], x)
755	for i := 0; i < n; i++ {
756		p.rawByte(tmp[i])
757	}
758}
759
760// rawStringln should only be used to emit the initial version string.
761func (p *exporter) rawStringln(s string) {
762	for i := 0; i < len(s); i++ {
763		p.rawByte(s[i])
764	}
765	p.rawByte('\n')
766}
767
768// rawByte is the bottleneck interface to write to p.out.
769// rawByte escapes b as follows (any encoding does that
770// hides '$'):
771//
772//	'$'  => '|' 'S'
773//	'|'  => '|' '|'
774//
775// Necessary so other tools can find the end of the
776// export data by searching for "$$".
777// rawByte should only be used by low-level encoders.
778func (p *exporter) rawByte(b byte) {
779	switch b {
780	case '$':
781		// write '$' as '|' 'S'
782		b = 'S'
783		fallthrough
784	case '|':
785		// write '|' as '|' '|'
786		p.out.WriteByte('|')
787		p.written++
788	}
789	p.out.WriteByte(b)
790	p.written++
791}
792
793// tracef is like fmt.Printf but it rewrites the format string
794// to take care of indentation.
795func (p *exporter) tracef(format string, args ...interface{}) {
796	if strings.ContainsAny(format, "<>\n") {
797		var buf bytes.Buffer
798		for i := 0; i < len(format); i++ {
799			// no need to deal with runes
800			ch := format[i]
801			switch ch {
802			case '>':
803				p.indent++
804				continue
805			case '<':
806				p.indent--
807				continue
808			}
809			buf.WriteByte(ch)
810			if ch == '\n' {
811				for j := p.indent; j > 0; j-- {
812					buf.WriteString(".  ")
813				}
814			}
815		}
816		format = buf.String()
817	}
818	fmt.Printf(format, args...)
819}
820
821// Debugging support.
822// (tagString is only used when tracing is enabled)
823var tagString = [...]string{
824	// Packages
825	-packageTag: "package",
826
827	// Types
828	-namedTag:     "named type",
829	-arrayTag:     "array",
830	-sliceTag:     "slice",
831	-dddTag:       "ddd",
832	-structTag:    "struct",
833	-pointerTag:   "pointer",
834	-signatureTag: "signature",
835	-interfaceTag: "interface",
836	-mapTag:       "map",
837	-chanTag:      "chan",
838
839	// Values
840	-falseTag:    "false",
841	-trueTag:     "true",
842	-int64Tag:    "int64",
843	-floatTag:    "float",
844	-fractionTag: "fraction",
845	-complexTag:  "complex",
846	-stringTag:   "string",
847	-unknownTag:  "unknown",
848
849	// Type aliases
850	-aliasTag: "alias",
851}
852