1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Binary package export.
6// This file was derived from $GOROOT/src/cmd/compile/internal/gc/bexport.go;
7// see that file for specification of the format.
8
9package gcimporter
10
11import (
12	"bytes"
13	"encoding/binary"
14	"fmt"
15	"go/ast"
16	"go/constant"
17	"go/token"
18	"go/types"
19	"log"
20	"math"
21	"math/big"
22	"sort"
23	"strings"
24)
25
26// If debugFormat is set, each integer and string value is preceded by a marker
27// and position information in the encoding. This mechanism permits an importer
28// to recognize immediately when it is out of sync. The importer recognizes this
29// mode automatically (i.e., it can import export data produced with debugging
30// support even if debugFormat is not set at the time of import). This mode will
31// lead to massively larger export data (by a factor of 2 to 3) and should only
32// be enabled during development and debugging.
33//
34// NOTE: This flag is the first flag to enable if importing dies because of
35// (suspected) format errors, and whenever a change is made to the format.
36const debugFormat = false // default: false
37
38// If trace is set, debugging output is printed to std out.
39const trace = false // default: false
40
41// Current export format version. Increase with each format change.
42// 4: type name objects support type aliases, uses aliasTag
43// 3: Go1.8 encoding (same as version 2, aliasTag defined but never used)
44// 2: removed unused bool in ODCL export (compiler only)
45// 1: header format change (more regular), export package for _ struct fields
46// 0: Go1.7 encoding
47const exportVersion = 4
48
49// trackAllTypes enables cycle tracking for all types, not just named
50// types. The existing compiler invariants assume that unnamed types
51// that are not completely set up are not used, or else there are spurious
52// errors.
53// If disabled, only named types are tracked, possibly leading to slightly
54// less efficient encoding in rare cases. It also prevents the export of
55// some corner-case type declarations (but those are not handled correctly
56// with with the textual export format either).
57// TODO(gri) enable and remove once issues caused by it are fixed
58const trackAllTypes = false
59
60type exporter struct {
61	fset *token.FileSet
62	out  bytes.Buffer
63
64	// object -> index maps, indexed in order of serialization
65	strIndex map[string]int
66	pkgIndex map[*types.Package]int
67	typIndex map[types.Type]int
68
69	// position encoding
70	posInfoFormat bool
71	prevFile      string
72	prevLine      int
73
74	// debugging support
75	written int // bytes written
76	indent  int // for trace
77}
78
79// BExportData returns binary export data for pkg.
80// If no file set is provided, position info will be missing.
81func BExportData(fset *token.FileSet, pkg *types.Package) []byte {
82	p := exporter{
83		fset:          fset,
84		strIndex:      map[string]int{"": 0}, // empty string is mapped to 0
85		pkgIndex:      make(map[*types.Package]int),
86		typIndex:      make(map[types.Type]int),
87		posInfoFormat: true, // TODO(gri) might become a flag, eventually
88	}
89
90	// write version info
91	// The version string must start with "version %d" where %d is the version
92	// number. Additional debugging information may follow after a blank; that
93	// text is ignored by the importer.
94	p.rawStringln(fmt.Sprintf("version %d", exportVersion))
95	var debug string
96	if debugFormat {
97		debug = "debug"
98	}
99	p.rawStringln(debug) // cannot use p.bool since it's affected by debugFormat; also want to see this clearly
100	p.bool(trackAllTypes)
101	p.bool(p.posInfoFormat)
102
103	// --- generic export data ---
104
105	// populate type map with predeclared "known" types
106	for index, typ := range predeclared {
107		p.typIndex[typ] = index
108	}
109	if len(p.typIndex) != len(predeclared) {
110		log.Fatalf("gcimporter: duplicate entries in type map?")
111	}
112
113	// write package data
114	p.pkg(pkg, true)
115	if trace {
116		p.tracef("\n")
117	}
118
119	// write objects
120	objcount := 0
121	scope := pkg.Scope()
122	for _, name := range scope.Names() {
123		if !ast.IsExported(name) {
124			continue
125		}
126		if trace {
127			p.tracef("\n")
128		}
129		p.obj(scope.Lookup(name))
130		objcount++
131	}
132
133	// indicate end of list
134	if trace {
135		p.tracef("\n")
136	}
137	p.tag(endTag)
138
139	// for self-verification only (redundant)
140	p.int(objcount)
141
142	if trace {
143		p.tracef("\n")
144	}
145
146	// --- end of export data ---
147
148	return p.out.Bytes()
149}
150
151func (p *exporter) pkg(pkg *types.Package, emptypath bool) {
152	if pkg == nil {
153		log.Fatalf("gcimporter: unexpected nil pkg")
154	}
155
156	// if we saw the package before, write its index (>= 0)
157	if i, ok := p.pkgIndex[pkg]; ok {
158		p.index('P', i)
159		return
160	}
161
162	// otherwise, remember the package, write the package tag (< 0) and package data
163	if trace {
164		p.tracef("P%d = { ", len(p.pkgIndex))
165		defer p.tracef("} ")
166	}
167	p.pkgIndex[pkg] = len(p.pkgIndex)
168
169	p.tag(packageTag)
170	p.string(pkg.Name())
171	if emptypath {
172		p.string("")
173	} else {
174		p.string(pkg.Path())
175	}
176}
177
178func (p *exporter) obj(obj types.Object) {
179	switch obj := obj.(type) {
180	case *types.Const:
181		p.tag(constTag)
182		p.pos(obj)
183		p.qualifiedName(obj)
184		p.typ(obj.Type())
185		p.value(obj.Val())
186
187	case *types.TypeName:
188		if isAlias(obj) {
189			p.tag(aliasTag)
190			p.pos(obj)
191			p.qualifiedName(obj)
192		} else {
193			p.tag(typeTag)
194		}
195		p.typ(obj.Type())
196
197	case *types.Var:
198		p.tag(varTag)
199		p.pos(obj)
200		p.qualifiedName(obj)
201		p.typ(obj.Type())
202
203	case *types.Func:
204		p.tag(funcTag)
205		p.pos(obj)
206		p.qualifiedName(obj)
207		sig := obj.Type().(*types.Signature)
208		p.paramList(sig.Params(), sig.Variadic())
209		p.paramList(sig.Results(), false)
210
211	default:
212		log.Fatalf("gcimporter: unexpected object %v (%T)", obj, obj)
213	}
214}
215
216func (p *exporter) pos(obj types.Object) {
217	if !p.posInfoFormat {
218		return
219	}
220
221	file, line := p.fileLine(obj)
222	if file == p.prevFile {
223		// common case: write line delta
224		// delta == 0 means different file or no line change
225		delta := line - p.prevLine
226		p.int(delta)
227		if delta == 0 {
228			p.int(-1) // -1 means no file change
229		}
230	} else {
231		// different file
232		p.int(0)
233		// Encode filename as length of common prefix with previous
234		// filename, followed by (possibly empty) suffix. Filenames
235		// frequently share path prefixes, so this can save a lot
236		// of space and make export data size less dependent on file
237		// path length. The suffix is unlikely to be empty because
238		// file names tend to end in ".go".
239		n := commonPrefixLen(p.prevFile, file)
240		p.int(n)           // n >= 0
241		p.string(file[n:]) // write suffix only
242		p.prevFile = file
243		p.int(line)
244	}
245	p.prevLine = line
246}
247
248func (p *exporter) fileLine(obj types.Object) (file string, line int) {
249	if p.fset != nil {
250		pos := p.fset.Position(obj.Pos())
251		file = pos.Filename
252		line = pos.Line
253	}
254	return
255}
256
257func commonPrefixLen(a, b string) int {
258	if len(a) > len(b) {
259		a, b = b, a
260	}
261	// len(a) <= len(b)
262	i := 0
263	for i < len(a) && a[i] == b[i] {
264		i++
265	}
266	return i
267}
268
269func (p *exporter) qualifiedName(obj types.Object) {
270	p.string(obj.Name())
271	p.pkg(obj.Pkg(), false)
272}
273
274func (p *exporter) typ(t types.Type) {
275	if t == nil {
276		log.Fatalf("gcimporter: nil type")
277	}
278
279	// Possible optimization: Anonymous pointer types *T where
280	// T is a named type are common. We could canonicalize all
281	// such types *T to a single type PT = *T. This would lead
282	// to at most one *T entry in typIndex, and all future *T's
283	// would be encoded as the respective index directly. Would
284	// save 1 byte (pointerTag) per *T and reduce the typIndex
285	// size (at the cost of a canonicalization map). We can do
286	// this later, without encoding format change.
287
288	// if we saw the type before, write its index (>= 0)
289	if i, ok := p.typIndex[t]; ok {
290		p.index('T', i)
291		return
292	}
293
294	// otherwise, remember the type, write the type tag (< 0) and type data
295	if trackAllTypes {
296		if trace {
297			p.tracef("T%d = {>\n", len(p.typIndex))
298			defer p.tracef("<\n} ")
299		}
300		p.typIndex[t] = len(p.typIndex)
301	}
302
303	switch t := t.(type) {
304	case *types.Named:
305		if !trackAllTypes {
306			// if we don't track all types, track named types now
307			p.typIndex[t] = len(p.typIndex)
308		}
309
310		p.tag(namedTag)
311		p.pos(t.Obj())
312		p.qualifiedName(t.Obj())
313		p.typ(t.Underlying())
314		if !types.IsInterface(t) {
315			p.assocMethods(t)
316		}
317
318	case *types.Array:
319		p.tag(arrayTag)
320		p.int64(t.Len())
321		p.typ(t.Elem())
322
323	case *types.Slice:
324		p.tag(sliceTag)
325		p.typ(t.Elem())
326
327	case *dddSlice:
328		p.tag(dddTag)
329		p.typ(t.elem)
330
331	case *types.Struct:
332		p.tag(structTag)
333		p.fieldList(t)
334
335	case *types.Pointer:
336		p.tag(pointerTag)
337		p.typ(t.Elem())
338
339	case *types.Signature:
340		p.tag(signatureTag)
341		p.paramList(t.Params(), t.Variadic())
342		p.paramList(t.Results(), false)
343
344	case *types.Interface:
345		p.tag(interfaceTag)
346		p.iface(t)
347
348	case *types.Map:
349		p.tag(mapTag)
350		p.typ(t.Key())
351		p.typ(t.Elem())
352
353	case *types.Chan:
354		p.tag(chanTag)
355		p.int(int(3 - t.Dir())) // hack
356		p.typ(t.Elem())
357
358	default:
359		log.Fatalf("gcimporter: unexpected type %T: %s", t, t)
360	}
361}
362
363func (p *exporter) assocMethods(named *types.Named) {
364	// Sort methods (for determinism).
365	var methods []*types.Func
366	for i := 0; i < named.NumMethods(); i++ {
367		methods = append(methods, named.Method(i))
368	}
369	sort.Sort(methodsByName(methods))
370
371	p.int(len(methods))
372
373	if trace && methods != nil {
374		p.tracef("associated methods {>\n")
375	}
376
377	for i, m := range methods {
378		if trace && i > 0 {
379			p.tracef("\n")
380		}
381
382		p.pos(m)
383		name := m.Name()
384		p.string(name)
385		if !exported(name) {
386			p.pkg(m.Pkg(), false)
387		}
388
389		sig := m.Type().(*types.Signature)
390		p.paramList(types.NewTuple(sig.Recv()), false)
391		p.paramList(sig.Params(), sig.Variadic())
392		p.paramList(sig.Results(), false)
393		p.int(0) // dummy value for go:nointerface pragma - ignored by importer
394	}
395
396	if trace && methods != nil {
397		p.tracef("<\n} ")
398	}
399}
400
401type methodsByName []*types.Func
402
403func (x methodsByName) Len() int           { return len(x) }
404func (x methodsByName) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
405func (x methodsByName) Less(i, j int) bool { return x[i].Name() < x[j].Name() }
406
407func (p *exporter) fieldList(t *types.Struct) {
408	if trace && t.NumFields() > 0 {
409		p.tracef("fields {>\n")
410		defer p.tracef("<\n} ")
411	}
412
413	p.int(t.NumFields())
414	for i := 0; i < t.NumFields(); i++ {
415		if trace && i > 0 {
416			p.tracef("\n")
417		}
418		p.field(t.Field(i))
419		p.string(t.Tag(i))
420	}
421}
422
423func (p *exporter) field(f *types.Var) {
424	if !f.IsField() {
425		log.Fatalf("gcimporter: field expected")
426	}
427
428	p.pos(f)
429	p.fieldName(f)
430	p.typ(f.Type())
431}
432
433func (p *exporter) iface(t *types.Interface) {
434	// TODO(gri): enable importer to load embedded interfaces,
435	// then emit Embeddeds and ExplicitMethods separately here.
436	p.int(0)
437
438	n := t.NumMethods()
439	if trace && n > 0 {
440		p.tracef("methods {>\n")
441		defer p.tracef("<\n} ")
442	}
443	p.int(n)
444	for i := 0; i < n; i++ {
445		if trace && i > 0 {
446			p.tracef("\n")
447		}
448		p.method(t.Method(i))
449	}
450}
451
452func (p *exporter) method(m *types.Func) {
453	sig := m.Type().(*types.Signature)
454	if sig.Recv() == nil {
455		log.Fatalf("gcimporter: method expected")
456	}
457
458	p.pos(m)
459	p.string(m.Name())
460	if m.Name() != "_" && !ast.IsExported(m.Name()) {
461		p.pkg(m.Pkg(), false)
462	}
463
464	// interface method; no need to encode receiver.
465	p.paramList(sig.Params(), sig.Variadic())
466	p.paramList(sig.Results(), false)
467}
468
469func (p *exporter) fieldName(f *types.Var) {
470	name := f.Name()
471
472	if f.Anonymous() {
473		// anonymous field - we distinguish between 3 cases:
474		// 1) field name matches base type name and is exported
475		// 2) field name matches base type name and is not exported
476		// 3) field name doesn't match base type name (alias name)
477		bname := basetypeName(f.Type())
478		if name == bname {
479			if ast.IsExported(name) {
480				name = "" // 1) we don't need to know the field name or package
481			} else {
482				name = "?" // 2) use unexported name "?" to force package export
483			}
484		} else {
485			// 3) indicate alias and export name as is
486			// (this requires an extra "@" but this is a rare case)
487			p.string("@")
488		}
489	}
490
491	p.string(name)
492	if name != "" && !ast.IsExported(name) {
493		p.pkg(f.Pkg(), false)
494	}
495}
496
497func basetypeName(typ types.Type) string {
498	switch typ := deref(typ).(type) {
499	case *types.Basic:
500		return typ.Name()
501	case *types.Named:
502		return typ.Obj().Name()
503	default:
504		return "" // unnamed type
505	}
506}
507
508func (p *exporter) paramList(params *types.Tuple, variadic bool) {
509	// use negative length to indicate unnamed parameters
510	// (look at the first parameter only since either all
511	// names are present or all are absent)
512	n := params.Len()
513	if n > 0 && params.At(0).Name() == "" {
514		n = -n
515	}
516	p.int(n)
517	for i := 0; i < params.Len(); i++ {
518		q := params.At(i)
519		t := q.Type()
520		if variadic && i == params.Len()-1 {
521			t = &dddSlice{t.(*types.Slice).Elem()}
522		}
523		p.typ(t)
524		if n > 0 {
525			name := q.Name()
526			p.string(name)
527			if name != "_" {
528				p.pkg(q.Pkg(), false)
529			}
530		}
531		p.string("") // no compiler-specific info
532	}
533}
534
535func (p *exporter) value(x constant.Value) {
536	if trace {
537		p.tracef("= ")
538	}
539
540	switch x.Kind() {
541	case constant.Bool:
542		tag := falseTag
543		if constant.BoolVal(x) {
544			tag = trueTag
545		}
546		p.tag(tag)
547
548	case constant.Int:
549		if v, exact := constant.Int64Val(x); exact {
550			// common case: x fits into an int64 - use compact encoding
551			p.tag(int64Tag)
552			p.int64(v)
553			return
554		}
555		// uncommon case: large x - use float encoding
556		// (powers of 2 will be encoded efficiently with exponent)
557		p.tag(floatTag)
558		p.float(constant.ToFloat(x))
559
560	case constant.Float:
561		p.tag(floatTag)
562		p.float(x)
563
564	case constant.Complex:
565		p.tag(complexTag)
566		p.float(constant.Real(x))
567		p.float(constant.Imag(x))
568
569	case constant.String:
570		p.tag(stringTag)
571		p.string(constant.StringVal(x))
572
573	case constant.Unknown:
574		// package contains type errors
575		p.tag(unknownTag)
576
577	default:
578		log.Fatalf("gcimporter: unexpected value %v (%T)", x, x)
579	}
580}
581
582func (p *exporter) float(x constant.Value) {
583	if x.Kind() != constant.Float {
584		log.Fatalf("gcimporter: unexpected constant %v, want float", x)
585	}
586	// extract sign (there is no -0)
587	sign := constant.Sign(x)
588	if sign == 0 {
589		// x == 0
590		p.int(0)
591		return
592	}
593	// x != 0
594
595	var f big.Float
596	if v, exact := constant.Float64Val(x); exact {
597		// float64
598		f.SetFloat64(v)
599	} else if num, denom := constant.Num(x), constant.Denom(x); num.Kind() == constant.Int {
600		// TODO(gri): add big.Rat accessor to constant.Value.
601		r := valueToRat(num)
602		f.SetRat(r.Quo(r, valueToRat(denom)))
603	} else {
604		// Value too large to represent as a fraction => inaccessible.
605		// TODO(gri): add big.Float accessor to constant.Value.
606		f.SetFloat64(math.MaxFloat64) // FIXME
607	}
608
609	// extract exponent such that 0.5 <= m < 1.0
610	var m big.Float
611	exp := f.MantExp(&m)
612
613	// extract mantissa as *big.Int
614	// - set exponent large enough so mant satisfies mant.IsInt()
615	// - get *big.Int from mant
616	m.SetMantExp(&m, int(m.MinPrec()))
617	mant, acc := m.Int(nil)
618	if acc != big.Exact {
619		log.Fatalf("gcimporter: internal error")
620	}
621
622	p.int(sign)
623	p.int(exp)
624	p.string(string(mant.Bytes()))
625}
626
627func valueToRat(x constant.Value) *big.Rat {
628	// Convert little-endian to big-endian.
629	// I can't believe this is necessary.
630	bytes := constant.Bytes(x)
631	for i := 0; i < len(bytes)/2; i++ {
632		bytes[i], bytes[len(bytes)-1-i] = bytes[len(bytes)-1-i], bytes[i]
633	}
634	return new(big.Rat).SetInt(new(big.Int).SetBytes(bytes))
635}
636
637func (p *exporter) bool(b bool) bool {
638	if trace {
639		p.tracef("[")
640		defer p.tracef("= %v] ", b)
641	}
642
643	x := 0
644	if b {
645		x = 1
646	}
647	p.int(x)
648	return b
649}
650
651// ----------------------------------------------------------------------------
652// Low-level encoders
653
654func (p *exporter) index(marker byte, index int) {
655	if index < 0 {
656		log.Fatalf("gcimporter: invalid index < 0")
657	}
658	if debugFormat {
659		p.marker('t')
660	}
661	if trace {
662		p.tracef("%c%d ", marker, index)
663	}
664	p.rawInt64(int64(index))
665}
666
667func (p *exporter) tag(tag int) {
668	if tag >= 0 {
669		log.Fatalf("gcimporter: invalid tag >= 0")
670	}
671	if debugFormat {
672		p.marker('t')
673	}
674	if trace {
675		p.tracef("%s ", tagString[-tag])
676	}
677	p.rawInt64(int64(tag))
678}
679
680func (p *exporter) int(x int) {
681	p.int64(int64(x))
682}
683
684func (p *exporter) int64(x int64) {
685	if debugFormat {
686		p.marker('i')
687	}
688	if trace {
689		p.tracef("%d ", x)
690	}
691	p.rawInt64(x)
692}
693
694func (p *exporter) string(s string) {
695	if debugFormat {
696		p.marker('s')
697	}
698	if trace {
699		p.tracef("%q ", s)
700	}
701	// if we saw the string before, write its index (>= 0)
702	// (the empty string is mapped to 0)
703	if i, ok := p.strIndex[s]; ok {
704		p.rawInt64(int64(i))
705		return
706	}
707	// otherwise, remember string and write its negative length and bytes
708	p.strIndex[s] = len(p.strIndex)
709	p.rawInt64(-int64(len(s)))
710	for i := 0; i < len(s); i++ {
711		p.rawByte(s[i])
712	}
713}
714
715// marker emits a marker byte and position information which makes
716// it easy for a reader to detect if it is "out of sync". Used for
717// debugFormat format only.
718func (p *exporter) marker(m byte) {
719	p.rawByte(m)
720	// Enable this for help tracking down the location
721	// of an incorrect marker when running in debugFormat.
722	if false && trace {
723		p.tracef("#%d ", p.written)
724	}
725	p.rawInt64(int64(p.written))
726}
727
728// rawInt64 should only be used by low-level encoders.
729func (p *exporter) rawInt64(x int64) {
730	var tmp [binary.MaxVarintLen64]byte
731	n := binary.PutVarint(tmp[:], x)
732	for i := 0; i < n; i++ {
733		p.rawByte(tmp[i])
734	}
735}
736
737// rawStringln should only be used to emit the initial version string.
738func (p *exporter) rawStringln(s string) {
739	for i := 0; i < len(s); i++ {
740		p.rawByte(s[i])
741	}
742	p.rawByte('\n')
743}
744
745// rawByte is the bottleneck interface to write to p.out.
746// rawByte escapes b as follows (any encoding does that
747// hides '$'):
748//
749//	'$'  => '|' 'S'
750//	'|'  => '|' '|'
751//
752// Necessary so other tools can find the end of the
753// export data by searching for "$$".
754// rawByte should only be used by low-level encoders.
755func (p *exporter) rawByte(b byte) {
756	switch b {
757	case '$':
758		// write '$' as '|' 'S'
759		b = 'S'
760		fallthrough
761	case '|':
762		// write '|' as '|' '|'
763		p.out.WriteByte('|')
764		p.written++
765	}
766	p.out.WriteByte(b)
767	p.written++
768}
769
770// tracef is like fmt.Printf but it rewrites the format string
771// to take care of indentation.
772func (p *exporter) tracef(format string, args ...interface{}) {
773	if strings.ContainsAny(format, "<>\n") {
774		var buf bytes.Buffer
775		for i := 0; i < len(format); i++ {
776			// no need to deal with runes
777			ch := format[i]
778			switch ch {
779			case '>':
780				p.indent++
781				continue
782			case '<':
783				p.indent--
784				continue
785			}
786			buf.WriteByte(ch)
787			if ch == '\n' {
788				for j := p.indent; j > 0; j-- {
789					buf.WriteString(".  ")
790				}
791			}
792		}
793		format = buf.String()
794	}
795	fmt.Printf(format, args...)
796}
797
798// Debugging support.
799// (tagString is only used when tracing is enabled)
800var tagString = [...]string{
801	// Packages
802	-packageTag: "package",
803
804	// Types
805	-namedTag:     "named type",
806	-arrayTag:     "array",
807	-sliceTag:     "slice",
808	-dddTag:       "ddd",
809	-structTag:    "struct",
810	-pointerTag:   "pointer",
811	-signatureTag: "signature",
812	-interfaceTag: "interface",
813	-mapTag:       "map",
814	-chanTag:      "chan",
815
816	// Values
817	-falseTag:    "false",
818	-trueTag:     "true",
819	-int64Tag:    "int64",
820	-floatTag:    "float",
821	-fractionTag: "fraction",
822	-complexTag:  "complex",
823	-stringTag:   "string",
824	-unknownTag:  "unknown",
825
826	// Type aliases
827	-aliasTag: "alias",
828}
829