1package transform
2
3// This file has some compiler support for run-time reflection using the reflect
4// package. In particular, it encodes type information in type codes in such a
5// way that the reflect package can decode the type from this information.
6// Where needed, it also adds some side tables for looking up more information
7// about a type, when that information cannot be stored directly in the type
8// code.
9//
10// Go has 26 different type kinds.
11//
12// Type kinds are subdivided in basic types (see the list of basicTypes below)
13// that are mostly numeric literals and non-basic (or "complex") types that are
14// more difficult to encode. These non-basic types come in two forms:
15//   * Prefix types (pointer, slice, interface, channel): these just add
16//     something to an existing type. For example, a pointer like *int just adds
17//     the fact that it's a pointer to an existing type (int).
18//     These are encoded efficiently by adding a prefix to a type code.
19//   * Types with multiple fields (struct, array, func, map). All of these have
20//     multiple fields contained within. Most obviously structs can contain many
21//     types as fields. Also arrays contain not just the element type but also
22//     the length parameter which can be any arbitrary number and thus may not
23//     fit in a type code.
24//     These types are encoded using side tables.
25//
26// This distinction is also important for how named types are encoded. At the
27// moment, named basic type just get a unique number assigned while named
28// non-basic types have their underlying type stored in a sidetable.
29
30import (
31	"encoding/binary"
32	"go/ast"
33	"math/big"
34	"strings"
35
36	"tinygo.org/x/go-llvm"
37)
38
39// A list of basic types and their numbers. This list should be kept in sync
40// with the list of Kind constants of type.go in the reflect package.
41var basicTypes = map[string]int64{
42	"bool":       1,
43	"int":        2,
44	"int8":       3,
45	"int16":      4,
46	"int32":      5,
47	"int64":      6,
48	"uint":       7,
49	"uint8":      8,
50	"uint16":     9,
51	"uint32":     10,
52	"uint64":     11,
53	"uintptr":    12,
54	"float32":    13,
55	"float64":    14,
56	"complex64":  15,
57	"complex128": 16,
58	"string":     17,
59	"unsafeptr":  18,
60}
61
62// A list of non-basic types. Adding 19 to this number will give the Kind as
63// used in src/reflect/types.go, and it must be kept in sync with that list.
64var nonBasicTypes = map[string]int64{
65	"chan":      0,
66	"interface": 1,
67	"pointer":   2,
68	"slice":     3,
69	"array":     4,
70	"func":      5,
71	"map":       6,
72	"struct":    7,
73}
74
75// typeCodeAssignmentState keeps some global state around for type code
76// assignments, used to assign one unique type code to each Go type.
77type typeCodeAssignmentState struct {
78	// An integer that's incremented each time it's used to give unique IDs to
79	// type codes that are not yet fully supported otherwise by the reflect
80	// package (or are simply unused in the compiled program).
81	fallbackIndex int
82
83	// This is the length of an uintptr. Only used occasionally to know whether
84	// a given number can be encoded as a varint.
85	uintptrLen int
86
87	// Map of named types to their type code. It is important that named types
88	// get unique IDs for each type.
89	namedBasicTypes    map[string]int
90	namedNonBasicTypes map[string]int
91
92	// Map of array types to their type code.
93	arrayTypes               map[string]int
94	arrayTypesSidetable      []byte
95	needsArrayTypesSidetable bool
96
97	// Map of struct types to their type code.
98	structTypes               map[string]int
99	structTypesSidetable      []byte
100	needsStructNamesSidetable bool
101
102	// Map of struct names and tags to their name string.
103	structNames               map[string]int
104	structNamesSidetable      []byte
105	needsStructTypesSidetable bool
106
107	// This byte array is stored in reflect.namedNonBasicTypesSidetable and is
108	// used at runtime to get details about a named non-basic type.
109	// Entries are varints (see makeVarint below and readVarint in
110	// reflect/sidetables.go for the encoding): one varint per entry. The
111	// integers in namedNonBasicTypes are indices into this array. Because these
112	// are varints, most type codes are really small (just one byte).
113	//
114	// Note that this byte buffer is not created when it is not needed
115	// (reflect.namedNonBasicTypesSidetable has no uses), see
116	// needsNamedTypesSidetable.
117	namedNonBasicTypesSidetable []uint64
118
119	// This indicates whether namedNonBasicTypesSidetable needs to be created at
120	// all. If it is false, namedNonBasicTypesSidetable will contain simple
121	// monotonically increasing numbers.
122	needsNamedNonBasicTypesSidetable bool
123}
124
125// assignTypeCodes is used to assign a type code to each type in the program
126// that is ever stored in an interface. It tries to use the smallest possible
127// numbers to make the code that works with interfaces as small as possible.
128func assignTypeCodes(mod llvm.Module, typeSlice typeInfoSlice) {
129	// if reflect were not used, we could skip generating the sidetable
130	// this does not help in practice, and is difficult to do correctly
131
132	// Assign typecodes the way the reflect package expects.
133	state := typeCodeAssignmentState{
134		fallbackIndex:                    1,
135		uintptrLen:                       llvm.NewTargetData(mod.DataLayout()).PointerSize() * 8,
136		namedBasicTypes:                  make(map[string]int),
137		namedNonBasicTypes:               make(map[string]int),
138		arrayTypes:                       make(map[string]int),
139		structTypes:                      make(map[string]int),
140		structNames:                      make(map[string]int),
141		needsNamedNonBasicTypesSidetable: len(getUses(mod.NamedGlobal("reflect.namedNonBasicTypesSidetable"))) != 0,
142		needsStructTypesSidetable:        len(getUses(mod.NamedGlobal("reflect.structTypesSidetable"))) != 0,
143		needsStructNamesSidetable:        len(getUses(mod.NamedGlobal("reflect.structNamesSidetable"))) != 0,
144		needsArrayTypesSidetable:         len(getUses(mod.NamedGlobal("reflect.arrayTypesSidetable"))) != 0,
145	}
146	for _, t := range typeSlice {
147		num := state.getTypeCodeNum(t.typecode)
148		if num.BitLen() > state.uintptrLen || !num.IsUint64() {
149			// TODO: support this in some way, using a side table for example.
150			// That's less efficient but better than not working at all.
151			// Particularly important on systems with 16-bit pointers (e.g.
152			// AVR).
153			panic("compiler: could not store type code number inside interface type code")
154		}
155		t.num = num.Uint64()
156	}
157
158	// Only create this sidetable when it is necessary.
159	if state.needsNamedNonBasicTypesSidetable {
160		global := replaceGlobalIntWithArray(mod, "reflect.namedNonBasicTypesSidetable", state.namedNonBasicTypesSidetable)
161		global.SetLinkage(llvm.InternalLinkage)
162		global.SetUnnamedAddr(true)
163		global.SetGlobalConstant(true)
164	}
165	if state.needsArrayTypesSidetable {
166		global := replaceGlobalIntWithArray(mod, "reflect.arrayTypesSidetable", state.arrayTypesSidetable)
167		global.SetLinkage(llvm.InternalLinkage)
168		global.SetUnnamedAddr(true)
169		global.SetGlobalConstant(true)
170	}
171	if state.needsStructTypesSidetable {
172		global := replaceGlobalIntWithArray(mod, "reflect.structTypesSidetable", state.structTypesSidetable)
173		global.SetLinkage(llvm.InternalLinkage)
174		global.SetUnnamedAddr(true)
175		global.SetGlobalConstant(true)
176	}
177	if state.needsStructNamesSidetable {
178		global := replaceGlobalIntWithArray(mod, "reflect.structNamesSidetable", state.structNamesSidetable)
179		global.SetLinkage(llvm.InternalLinkage)
180		global.SetUnnamedAddr(true)
181		global.SetGlobalConstant(true)
182	}
183}
184
185// getTypeCodeNum returns the typecode for a given type as expected by the
186// reflect package. Also see getTypeCodeName, which serializes types to a string
187// based on a types.Type value for this function.
188func (state *typeCodeAssignmentState) getTypeCodeNum(typecode llvm.Value) *big.Int {
189	// Note: see src/reflect/type.go for bit allocations.
190	class, value := getClassAndValueFromTypeCode(typecode)
191	name := ""
192	if class == "named" {
193		name = value
194		typecode = llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
195		class, value = getClassAndValueFromTypeCode(typecode)
196	}
197	if class == "basic" {
198		// Basic types follow the following bit pattern:
199		//    ...xxxxx0
200		// where xxxxx is allocated for the 18 possible basic types and all the
201		// upper bits are used to indicate the named type.
202		num, ok := basicTypes[value]
203		if !ok {
204			panic("invalid basic type: " + value)
205		}
206		if name != "" {
207			// This type is named, set the upper bits to the name ID.
208			num |= int64(state.getBasicNamedTypeNum(name)) << 5
209		}
210		return big.NewInt(num << 1)
211	} else {
212		// Non-baisc types use the following bit pattern:
213		//    ...nxxx1
214		// where xxx indicates the non-basic type. The upper bits contain
215		// whatever the type contains. Types that wrap a single other type
216		// (channel, interface, pointer, slice) just contain the bits of the
217		// wrapped type. Other types (like struct) need more fields and thus
218		// cannot be encoded as a simple prefix.
219		var classNumber int64
220		if n, ok := nonBasicTypes[class]; ok {
221			classNumber = n
222		} else {
223			panic("unknown type kind: " + class)
224		}
225		var num *big.Int
226		lowBits := (classNumber << 1) + 1 // the 5 low bits of the typecode
227		if name == "" {
228			num = state.getNonBasicTypeCode(class, typecode)
229		} else {
230			// We must return a named type here. But first check whether it
231			// has already been defined.
232			if index, ok := state.namedNonBasicTypes[name]; ok {
233				num := big.NewInt(int64(index))
234				num.Lsh(num, 5).Or(num, big.NewInt((classNumber<<1)+1+(1<<4)))
235				return num
236			}
237			lowBits |= 1 << 4 // set the 'n' bit (see above)
238			if !state.needsNamedNonBasicTypesSidetable {
239				// Use simple small integers in this case, to make these numbers
240				// smaller.
241				index := len(state.namedNonBasicTypes) + 1
242				state.namedNonBasicTypes[name] = index
243				num = big.NewInt(int64(index))
244			} else {
245				// We need to store full type information.
246				// First allocate a number in the named non-basic type
247				// sidetable.
248				index := len(state.namedNonBasicTypesSidetable)
249				state.namedNonBasicTypesSidetable = append(state.namedNonBasicTypesSidetable, 0)
250				state.namedNonBasicTypes[name] = index
251				// Get the typecode of the underlying type (which could be the
252				// element type in the case of pointers, for example).
253				num = state.getNonBasicTypeCode(class, typecode)
254				if num.BitLen() > state.uintptrLen || !num.IsUint64() {
255					panic("cannot store value in sidetable")
256				}
257				// Now update the side table with the number we just
258				// determined. We need this multi-step approach to avoid stack
259				// overflow due to adding types recursively in the case of
260				// linked lists (a pointer which points to a struct that
261				// contains that same pointer).
262				state.namedNonBasicTypesSidetable[index] = num.Uint64()
263				num = big.NewInt(int64(index))
264			}
265		}
266		// Concatenate the 'num' and 'lowBits' bitstrings.
267		num.Lsh(num, 5).Or(num, big.NewInt(lowBits))
268		return num
269	}
270}
271
272// getNonBasicTypeCode is used by getTypeCodeNum. It returns the upper bits of
273// the type code used there in the type code.
274func (state *typeCodeAssignmentState) getNonBasicTypeCode(class string, typecode llvm.Value) *big.Int {
275	switch class {
276	case "chan", "pointer", "slice":
277		// Prefix-style type kinds. The upper bits contain the element type.
278		sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
279		return state.getTypeCodeNum(sub)
280	case "array":
281		// An array is basically a pair of (typecode, length) stored in a
282		// sidetable.
283		return big.NewInt(int64(state.getArrayTypeNum(typecode)))
284	case "struct":
285		// More complicated type kind. The upper bits contain the index to the
286		// struct type in the struct types sidetable.
287		return big.NewInt(int64(state.getStructTypeNum(typecode)))
288	default:
289		// Type has not yet been implemented, so fall back by using a unique
290		// number.
291		num := big.NewInt(int64(state.fallbackIndex))
292		state.fallbackIndex++
293		return num
294	}
295}
296
297// getClassAndValueFromTypeCode takes a typecode (a llvm.Value of type
298// runtime.typecodeID), looks at the name, and extracts the typecode class and
299// value from it. For example, for a typecode with the following name:
300//     reflect/types.type:pointer:named:reflect.ValueError
301// It extracts:
302//     class = "pointer"
303//     value = "named:reflect.ValueError"
304func getClassAndValueFromTypeCode(typecode llvm.Value) (class, value string) {
305	typecodeName := typecode.Name()
306	const prefix = "reflect/types.type:"
307	if !strings.HasPrefix(typecodeName, prefix) {
308		panic("unexpected typecode name: " + typecodeName)
309	}
310	id := typecodeName[len(prefix):]
311	class = id[:strings.IndexByte(id, ':')]
312	value = id[len(class)+1:]
313	return
314}
315
316// getBasicNamedTypeNum returns an appropriate (unique) number for the given
317// named type. If the name already has a number that number is returned, else a
318// new number is returned. The number is always non-zero.
319func (state *typeCodeAssignmentState) getBasicNamedTypeNum(name string) int {
320	if num, ok := state.namedBasicTypes[name]; ok {
321		return num
322	}
323	num := len(state.namedBasicTypes) + 1
324	state.namedBasicTypes[name] = num
325	return num
326}
327
328// getArrayTypeNum returns the array type number, which is an index into the
329// reflect.arrayTypesSidetable or a unique number for this type if this table is
330// not used.
331func (state *typeCodeAssignmentState) getArrayTypeNum(typecode llvm.Value) int {
332	name := typecode.Name()
333	if num, ok := state.arrayTypes[name]; ok {
334		// This array type already has an entry in the sidetable. Don't store
335		// it twice.
336		return num
337	}
338
339	if !state.needsArrayTypesSidetable {
340		// We don't need array sidetables, so we can just assign monotonically
341		// increasing numbers to each array type.
342		num := len(state.arrayTypes)
343		state.arrayTypes[name] = num
344		return num
345	}
346
347	elemTypeCode := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0})
348	elemTypeNum := state.getTypeCodeNum(elemTypeCode)
349	if elemTypeNum.BitLen() > state.uintptrLen || !elemTypeNum.IsUint64() {
350		// TODO: make this a regular error
351		panic("array element type has a type code that is too big")
352	}
353
354	// The array side table is a sequence of {element type, array length}.
355	arrayLength := llvm.ConstExtractValue(typecode.Initializer(), []uint32{1}).ZExtValue()
356	buf := makeVarint(elemTypeNum.Uint64())
357	buf = append(buf, makeVarint(arrayLength)...)
358
359	index := len(state.arrayTypesSidetable)
360	state.arrayTypes[name] = index
361	state.arrayTypesSidetable = append(state.arrayTypesSidetable, buf...)
362	return index
363}
364
365// getStructTypeNum returns the struct type number, which is an index into
366// reflect.structTypesSidetable or an unique number for every struct if this
367// sidetable is not needed in the to-be-compiled program.
368func (state *typeCodeAssignmentState) getStructTypeNum(typecode llvm.Value) int {
369	name := typecode.Name()
370	if num, ok := state.structTypes[name]; ok {
371		// This struct already has an assigned type code.
372		return num
373	}
374
375	if !state.needsStructTypesSidetable {
376		// We don't need struct sidetables, so we can just assign monotonically
377		// increasing numbers to each struct type.
378		num := len(state.structTypes)
379		state.structTypes[name] = num
380		return num
381	}
382
383	// Get the fields this struct type contains.
384	// The struct number will be the start index of
385	structTypeGlobal := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}).Operand(0).Initializer()
386	numFields := structTypeGlobal.Type().ArrayLength()
387
388	// The first data that is stored in the struct sidetable is the number of
389	// fields this struct contains. This is usually just a single byte because
390	// most structs don't contain that many fields, but make it a varint just
391	// to be sure.
392	buf := makeVarint(uint64(numFields))
393
394	// Iterate over every field in the struct.
395	// Every field is stored sequentially in the struct sidetable. Fields can
396	// be retrieved from this list of fields at runtime by iterating over all
397	// of them until the right field has been found.
398	// Perhaps adding some index would speed things up, but it would also make
399	// the sidetable bigger.
400	for i := 0; i < numFields; i++ {
401		// Collect some information about this field.
402		field := llvm.ConstExtractValue(structTypeGlobal, []uint32{uint32(i)})
403
404		nameGlobal := llvm.ConstExtractValue(field, []uint32{1})
405		if nameGlobal == llvm.ConstPointerNull(nameGlobal.Type()) {
406			panic("compiler: no name for this struct field")
407		}
408		fieldNameBytes := getGlobalBytes(nameGlobal.Operand(0))
409		fieldNameNumber := state.getStructNameNumber(fieldNameBytes)
410
411		// See whether this struct field has an associated tag, and if so,
412		// store that tag in the tags sidetable.
413		tagGlobal := llvm.ConstExtractValue(field, []uint32{2})
414		hasTag := false
415		tagNumber := 0
416		if tagGlobal != llvm.ConstPointerNull(tagGlobal.Type()) {
417			hasTag = true
418			tagBytes := getGlobalBytes(tagGlobal.Operand(0))
419			tagNumber = state.getStructNameNumber(tagBytes)
420		}
421
422		// The 'embedded' or 'anonymous' flag for this field.
423		embedded := llvm.ConstExtractValue(field, []uint32{3}).ZExtValue() != 0
424
425		// The first byte in the struct types sidetable is a flags byte with
426		// two bits in it.
427		flagsByte := byte(0)
428		if embedded {
429			flagsByte |= 1
430		}
431		if hasTag {
432			flagsByte |= 2
433		}
434		if ast.IsExported(string(fieldNameBytes)) {
435			flagsByte |= 4
436		}
437		buf = append(buf, flagsByte)
438
439		// Get the type number and add it to the buffer.
440		// All fields have a type, so include it directly here.
441		typeNum := state.getTypeCodeNum(llvm.ConstExtractValue(field, []uint32{0}))
442		if typeNum.BitLen() > state.uintptrLen || !typeNum.IsUint64() {
443			// TODO: make this a regular error
444			panic("struct field has a type code that is too big")
445		}
446		buf = append(buf, makeVarint(typeNum.Uint64())...)
447
448		// Add the name.
449		buf = append(buf, makeVarint(uint64(fieldNameNumber))...)
450
451		// Add the tag, if there is one.
452		if hasTag {
453			buf = append(buf, makeVarint(uint64(tagNumber))...)
454		}
455	}
456
457	num := len(state.structTypesSidetable)
458	state.structTypes[name] = num
459	state.structTypesSidetable = append(state.structTypesSidetable, buf...)
460	return num
461}
462
463// getStructNameNumber stores this string (name or tag) onto the struct names
464// sidetable. The format is a varint of the length of the struct, followed by
465// the raw bytes of the name. Multiple identical strings are stored under the
466// same name for space efficiency.
467func (state *typeCodeAssignmentState) getStructNameNumber(nameBytes []byte) int {
468	name := string(nameBytes)
469	if n, ok := state.structNames[name]; ok {
470		// This name was used before, re-use it now (for space efficiency).
471		return n
472	}
473	// This name is not yet in the names sidetable. Add it now.
474	n := len(state.structNamesSidetable)
475	state.structNames[name] = n
476	state.structNamesSidetable = append(state.structNamesSidetable, makeVarint(uint64(len(nameBytes)))...)
477	state.structNamesSidetable = append(state.structNamesSidetable, nameBytes...)
478	return n
479}
480
481// makeVarint is a small helper function that returns the bytes of the number in
482// varint encoding.
483func makeVarint(n uint64) []byte {
484	buf := make([]byte, binary.MaxVarintLen64)
485	return buf[:binary.PutUvarint(buf, n)]
486}
487