1package transform 2 3// This file has some compiler support for run-time reflection using the reflect 4// package. In particular, it encodes type information in type codes in such a 5// way that the reflect package can decode the type from this information. 6// Where needed, it also adds some side tables for looking up more information 7// about a type, when that information cannot be stored directly in the type 8// code. 9// 10// Go has 26 different type kinds. 11// 12// Type kinds are subdivided in basic types (see the list of basicTypes below) 13// that are mostly numeric literals and non-basic (or "complex") types that are 14// more difficult to encode. These non-basic types come in two forms: 15// * Prefix types (pointer, slice, interface, channel): these just add 16// something to an existing type. For example, a pointer like *int just adds 17// the fact that it's a pointer to an existing type (int). 18// These are encoded efficiently by adding a prefix to a type code. 19// * Types with multiple fields (struct, array, func, map). All of these have 20// multiple fields contained within. Most obviously structs can contain many 21// types as fields. Also arrays contain not just the element type but also 22// the length parameter which can be any arbitrary number and thus may not 23// fit in a type code. 24// These types are encoded using side tables. 25// 26// This distinction is also important for how named types are encoded. At the 27// moment, named basic type just get a unique number assigned while named 28// non-basic types have their underlying type stored in a sidetable. 29 30import ( 31 "encoding/binary" 32 "go/ast" 33 "math/big" 34 "strings" 35 36 "tinygo.org/x/go-llvm" 37) 38 39// A list of basic types and their numbers. This list should be kept in sync 40// with the list of Kind constants of type.go in the reflect package. 41var basicTypes = map[string]int64{ 42 "bool": 1, 43 "int": 2, 44 "int8": 3, 45 "int16": 4, 46 "int32": 5, 47 "int64": 6, 48 "uint": 7, 49 "uint8": 8, 50 "uint16": 9, 51 "uint32": 10, 52 "uint64": 11, 53 "uintptr": 12, 54 "float32": 13, 55 "float64": 14, 56 "complex64": 15, 57 "complex128": 16, 58 "string": 17, 59 "unsafeptr": 18, 60} 61 62// A list of non-basic types. Adding 19 to this number will give the Kind as 63// used in src/reflect/types.go, and it must be kept in sync with that list. 64var nonBasicTypes = map[string]int64{ 65 "chan": 0, 66 "interface": 1, 67 "pointer": 2, 68 "slice": 3, 69 "array": 4, 70 "func": 5, 71 "map": 6, 72 "struct": 7, 73} 74 75// typeCodeAssignmentState keeps some global state around for type code 76// assignments, used to assign one unique type code to each Go type. 77type typeCodeAssignmentState struct { 78 // An integer that's incremented each time it's used to give unique IDs to 79 // type codes that are not yet fully supported otherwise by the reflect 80 // package (or are simply unused in the compiled program). 81 fallbackIndex int 82 83 // This is the length of an uintptr. Only used occasionally to know whether 84 // a given number can be encoded as a varint. 85 uintptrLen int 86 87 // Map of named types to their type code. It is important that named types 88 // get unique IDs for each type. 89 namedBasicTypes map[string]int 90 namedNonBasicTypes map[string]int 91 92 // Map of array types to their type code. 93 arrayTypes map[string]int 94 arrayTypesSidetable []byte 95 needsArrayTypesSidetable bool 96 97 // Map of struct types to their type code. 98 structTypes map[string]int 99 structTypesSidetable []byte 100 needsStructNamesSidetable bool 101 102 // Map of struct names and tags to their name string. 103 structNames map[string]int 104 structNamesSidetable []byte 105 needsStructTypesSidetable bool 106 107 // This byte array is stored in reflect.namedNonBasicTypesSidetable and is 108 // used at runtime to get details about a named non-basic type. 109 // Entries are varints (see makeVarint below and readVarint in 110 // reflect/sidetables.go for the encoding): one varint per entry. The 111 // integers in namedNonBasicTypes are indices into this array. Because these 112 // are varints, most type codes are really small (just one byte). 113 // 114 // Note that this byte buffer is not created when it is not needed 115 // (reflect.namedNonBasicTypesSidetable has no uses), see 116 // needsNamedTypesSidetable. 117 namedNonBasicTypesSidetable []uint64 118 119 // This indicates whether namedNonBasicTypesSidetable needs to be created at 120 // all. If it is false, namedNonBasicTypesSidetable will contain simple 121 // monotonically increasing numbers. 122 needsNamedNonBasicTypesSidetable bool 123} 124 125// assignTypeCodes is used to assign a type code to each type in the program 126// that is ever stored in an interface. It tries to use the smallest possible 127// numbers to make the code that works with interfaces as small as possible. 128func assignTypeCodes(mod llvm.Module, typeSlice typeInfoSlice) { 129 // if reflect were not used, we could skip generating the sidetable 130 // this does not help in practice, and is difficult to do correctly 131 132 // Assign typecodes the way the reflect package expects. 133 state := typeCodeAssignmentState{ 134 fallbackIndex: 1, 135 uintptrLen: llvm.NewTargetData(mod.DataLayout()).PointerSize() * 8, 136 namedBasicTypes: make(map[string]int), 137 namedNonBasicTypes: make(map[string]int), 138 arrayTypes: make(map[string]int), 139 structTypes: make(map[string]int), 140 structNames: make(map[string]int), 141 needsNamedNonBasicTypesSidetable: len(getUses(mod.NamedGlobal("reflect.namedNonBasicTypesSidetable"))) != 0, 142 needsStructTypesSidetable: len(getUses(mod.NamedGlobal("reflect.structTypesSidetable"))) != 0, 143 needsStructNamesSidetable: len(getUses(mod.NamedGlobal("reflect.structNamesSidetable"))) != 0, 144 needsArrayTypesSidetable: len(getUses(mod.NamedGlobal("reflect.arrayTypesSidetable"))) != 0, 145 } 146 for _, t := range typeSlice { 147 num := state.getTypeCodeNum(t.typecode) 148 if num.BitLen() > state.uintptrLen || !num.IsUint64() { 149 // TODO: support this in some way, using a side table for example. 150 // That's less efficient but better than not working at all. 151 // Particularly important on systems with 16-bit pointers (e.g. 152 // AVR). 153 panic("compiler: could not store type code number inside interface type code") 154 } 155 t.num = num.Uint64() 156 } 157 158 // Only create this sidetable when it is necessary. 159 if state.needsNamedNonBasicTypesSidetable { 160 global := replaceGlobalIntWithArray(mod, "reflect.namedNonBasicTypesSidetable", state.namedNonBasicTypesSidetable) 161 global.SetLinkage(llvm.InternalLinkage) 162 global.SetUnnamedAddr(true) 163 global.SetGlobalConstant(true) 164 } 165 if state.needsArrayTypesSidetable { 166 global := replaceGlobalIntWithArray(mod, "reflect.arrayTypesSidetable", state.arrayTypesSidetable) 167 global.SetLinkage(llvm.InternalLinkage) 168 global.SetUnnamedAddr(true) 169 global.SetGlobalConstant(true) 170 } 171 if state.needsStructTypesSidetable { 172 global := replaceGlobalIntWithArray(mod, "reflect.structTypesSidetable", state.structTypesSidetable) 173 global.SetLinkage(llvm.InternalLinkage) 174 global.SetUnnamedAddr(true) 175 global.SetGlobalConstant(true) 176 } 177 if state.needsStructNamesSidetable { 178 global := replaceGlobalIntWithArray(mod, "reflect.structNamesSidetable", state.structNamesSidetable) 179 global.SetLinkage(llvm.InternalLinkage) 180 global.SetUnnamedAddr(true) 181 global.SetGlobalConstant(true) 182 } 183} 184 185// getTypeCodeNum returns the typecode for a given type as expected by the 186// reflect package. Also see getTypeCodeName, which serializes types to a string 187// based on a types.Type value for this function. 188func (state *typeCodeAssignmentState) getTypeCodeNum(typecode llvm.Value) *big.Int { 189 // Note: see src/reflect/type.go for bit allocations. 190 class, value := getClassAndValueFromTypeCode(typecode) 191 name := "" 192 if class == "named" { 193 name = value 194 typecode = llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) 195 class, value = getClassAndValueFromTypeCode(typecode) 196 } 197 if class == "basic" { 198 // Basic types follow the following bit pattern: 199 // ...xxxxx0 200 // where xxxxx is allocated for the 18 possible basic types and all the 201 // upper bits are used to indicate the named type. 202 num, ok := basicTypes[value] 203 if !ok { 204 panic("invalid basic type: " + value) 205 } 206 if name != "" { 207 // This type is named, set the upper bits to the name ID. 208 num |= int64(state.getBasicNamedTypeNum(name)) << 5 209 } 210 return big.NewInt(num << 1) 211 } else { 212 // Non-baisc types use the following bit pattern: 213 // ...nxxx1 214 // where xxx indicates the non-basic type. The upper bits contain 215 // whatever the type contains. Types that wrap a single other type 216 // (channel, interface, pointer, slice) just contain the bits of the 217 // wrapped type. Other types (like struct) need more fields and thus 218 // cannot be encoded as a simple prefix. 219 var classNumber int64 220 if n, ok := nonBasicTypes[class]; ok { 221 classNumber = n 222 } else { 223 panic("unknown type kind: " + class) 224 } 225 var num *big.Int 226 lowBits := (classNumber << 1) + 1 // the 5 low bits of the typecode 227 if name == "" { 228 num = state.getNonBasicTypeCode(class, typecode) 229 } else { 230 // We must return a named type here. But first check whether it 231 // has already been defined. 232 if index, ok := state.namedNonBasicTypes[name]; ok { 233 num := big.NewInt(int64(index)) 234 num.Lsh(num, 5).Or(num, big.NewInt((classNumber<<1)+1+(1<<4))) 235 return num 236 } 237 lowBits |= 1 << 4 // set the 'n' bit (see above) 238 if !state.needsNamedNonBasicTypesSidetable { 239 // Use simple small integers in this case, to make these numbers 240 // smaller. 241 index := len(state.namedNonBasicTypes) + 1 242 state.namedNonBasicTypes[name] = index 243 num = big.NewInt(int64(index)) 244 } else { 245 // We need to store full type information. 246 // First allocate a number in the named non-basic type 247 // sidetable. 248 index := len(state.namedNonBasicTypesSidetable) 249 state.namedNonBasicTypesSidetable = append(state.namedNonBasicTypesSidetable, 0) 250 state.namedNonBasicTypes[name] = index 251 // Get the typecode of the underlying type (which could be the 252 // element type in the case of pointers, for example). 253 num = state.getNonBasicTypeCode(class, typecode) 254 if num.BitLen() > state.uintptrLen || !num.IsUint64() { 255 panic("cannot store value in sidetable") 256 } 257 // Now update the side table with the number we just 258 // determined. We need this multi-step approach to avoid stack 259 // overflow due to adding types recursively in the case of 260 // linked lists (a pointer which points to a struct that 261 // contains that same pointer). 262 state.namedNonBasicTypesSidetable[index] = num.Uint64() 263 num = big.NewInt(int64(index)) 264 } 265 } 266 // Concatenate the 'num' and 'lowBits' bitstrings. 267 num.Lsh(num, 5).Or(num, big.NewInt(lowBits)) 268 return num 269 } 270} 271 272// getNonBasicTypeCode is used by getTypeCodeNum. It returns the upper bits of 273// the type code used there in the type code. 274func (state *typeCodeAssignmentState) getNonBasicTypeCode(class string, typecode llvm.Value) *big.Int { 275 switch class { 276 case "chan", "pointer", "slice": 277 // Prefix-style type kinds. The upper bits contain the element type. 278 sub := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) 279 return state.getTypeCodeNum(sub) 280 case "array": 281 // An array is basically a pair of (typecode, length) stored in a 282 // sidetable. 283 return big.NewInt(int64(state.getArrayTypeNum(typecode))) 284 case "struct": 285 // More complicated type kind. The upper bits contain the index to the 286 // struct type in the struct types sidetable. 287 return big.NewInt(int64(state.getStructTypeNum(typecode))) 288 default: 289 // Type has not yet been implemented, so fall back by using a unique 290 // number. 291 num := big.NewInt(int64(state.fallbackIndex)) 292 state.fallbackIndex++ 293 return num 294 } 295} 296 297// getClassAndValueFromTypeCode takes a typecode (a llvm.Value of type 298// runtime.typecodeID), looks at the name, and extracts the typecode class and 299// value from it. For example, for a typecode with the following name: 300// reflect/types.type:pointer:named:reflect.ValueError 301// It extracts: 302// class = "pointer" 303// value = "named:reflect.ValueError" 304func getClassAndValueFromTypeCode(typecode llvm.Value) (class, value string) { 305 typecodeName := typecode.Name() 306 const prefix = "reflect/types.type:" 307 if !strings.HasPrefix(typecodeName, prefix) { 308 panic("unexpected typecode name: " + typecodeName) 309 } 310 id := typecodeName[len(prefix):] 311 class = id[:strings.IndexByte(id, ':')] 312 value = id[len(class)+1:] 313 return 314} 315 316// getBasicNamedTypeNum returns an appropriate (unique) number for the given 317// named type. If the name already has a number that number is returned, else a 318// new number is returned. The number is always non-zero. 319func (state *typeCodeAssignmentState) getBasicNamedTypeNum(name string) int { 320 if num, ok := state.namedBasicTypes[name]; ok { 321 return num 322 } 323 num := len(state.namedBasicTypes) + 1 324 state.namedBasicTypes[name] = num 325 return num 326} 327 328// getArrayTypeNum returns the array type number, which is an index into the 329// reflect.arrayTypesSidetable or a unique number for this type if this table is 330// not used. 331func (state *typeCodeAssignmentState) getArrayTypeNum(typecode llvm.Value) int { 332 name := typecode.Name() 333 if num, ok := state.arrayTypes[name]; ok { 334 // This array type already has an entry in the sidetable. Don't store 335 // it twice. 336 return num 337 } 338 339 if !state.needsArrayTypesSidetable { 340 // We don't need array sidetables, so we can just assign monotonically 341 // increasing numbers to each array type. 342 num := len(state.arrayTypes) 343 state.arrayTypes[name] = num 344 return num 345 } 346 347 elemTypeCode := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}) 348 elemTypeNum := state.getTypeCodeNum(elemTypeCode) 349 if elemTypeNum.BitLen() > state.uintptrLen || !elemTypeNum.IsUint64() { 350 // TODO: make this a regular error 351 panic("array element type has a type code that is too big") 352 } 353 354 // The array side table is a sequence of {element type, array length}. 355 arrayLength := llvm.ConstExtractValue(typecode.Initializer(), []uint32{1}).ZExtValue() 356 buf := makeVarint(elemTypeNum.Uint64()) 357 buf = append(buf, makeVarint(arrayLength)...) 358 359 index := len(state.arrayTypesSidetable) 360 state.arrayTypes[name] = index 361 state.arrayTypesSidetable = append(state.arrayTypesSidetable, buf...) 362 return index 363} 364 365// getStructTypeNum returns the struct type number, which is an index into 366// reflect.structTypesSidetable or an unique number for every struct if this 367// sidetable is not needed in the to-be-compiled program. 368func (state *typeCodeAssignmentState) getStructTypeNum(typecode llvm.Value) int { 369 name := typecode.Name() 370 if num, ok := state.structTypes[name]; ok { 371 // This struct already has an assigned type code. 372 return num 373 } 374 375 if !state.needsStructTypesSidetable { 376 // We don't need struct sidetables, so we can just assign monotonically 377 // increasing numbers to each struct type. 378 num := len(state.structTypes) 379 state.structTypes[name] = num 380 return num 381 } 382 383 // Get the fields this struct type contains. 384 // The struct number will be the start index of 385 structTypeGlobal := llvm.ConstExtractValue(typecode.Initializer(), []uint32{0}).Operand(0).Initializer() 386 numFields := structTypeGlobal.Type().ArrayLength() 387 388 // The first data that is stored in the struct sidetable is the number of 389 // fields this struct contains. This is usually just a single byte because 390 // most structs don't contain that many fields, but make it a varint just 391 // to be sure. 392 buf := makeVarint(uint64(numFields)) 393 394 // Iterate over every field in the struct. 395 // Every field is stored sequentially in the struct sidetable. Fields can 396 // be retrieved from this list of fields at runtime by iterating over all 397 // of them until the right field has been found. 398 // Perhaps adding some index would speed things up, but it would also make 399 // the sidetable bigger. 400 for i := 0; i < numFields; i++ { 401 // Collect some information about this field. 402 field := llvm.ConstExtractValue(structTypeGlobal, []uint32{uint32(i)}) 403 404 nameGlobal := llvm.ConstExtractValue(field, []uint32{1}) 405 if nameGlobal == llvm.ConstPointerNull(nameGlobal.Type()) { 406 panic("compiler: no name for this struct field") 407 } 408 fieldNameBytes := getGlobalBytes(nameGlobal.Operand(0)) 409 fieldNameNumber := state.getStructNameNumber(fieldNameBytes) 410 411 // See whether this struct field has an associated tag, and if so, 412 // store that tag in the tags sidetable. 413 tagGlobal := llvm.ConstExtractValue(field, []uint32{2}) 414 hasTag := false 415 tagNumber := 0 416 if tagGlobal != llvm.ConstPointerNull(tagGlobal.Type()) { 417 hasTag = true 418 tagBytes := getGlobalBytes(tagGlobal.Operand(0)) 419 tagNumber = state.getStructNameNumber(tagBytes) 420 } 421 422 // The 'embedded' or 'anonymous' flag for this field. 423 embedded := llvm.ConstExtractValue(field, []uint32{3}).ZExtValue() != 0 424 425 // The first byte in the struct types sidetable is a flags byte with 426 // two bits in it. 427 flagsByte := byte(0) 428 if embedded { 429 flagsByte |= 1 430 } 431 if hasTag { 432 flagsByte |= 2 433 } 434 if ast.IsExported(string(fieldNameBytes)) { 435 flagsByte |= 4 436 } 437 buf = append(buf, flagsByte) 438 439 // Get the type number and add it to the buffer. 440 // All fields have a type, so include it directly here. 441 typeNum := state.getTypeCodeNum(llvm.ConstExtractValue(field, []uint32{0})) 442 if typeNum.BitLen() > state.uintptrLen || !typeNum.IsUint64() { 443 // TODO: make this a regular error 444 panic("struct field has a type code that is too big") 445 } 446 buf = append(buf, makeVarint(typeNum.Uint64())...) 447 448 // Add the name. 449 buf = append(buf, makeVarint(uint64(fieldNameNumber))...) 450 451 // Add the tag, if there is one. 452 if hasTag { 453 buf = append(buf, makeVarint(uint64(tagNumber))...) 454 } 455 } 456 457 num := len(state.structTypesSidetable) 458 state.structTypes[name] = num 459 state.structTypesSidetable = append(state.structTypesSidetable, buf...) 460 return num 461} 462 463// getStructNameNumber stores this string (name or tag) onto the struct names 464// sidetable. The format is a varint of the length of the struct, followed by 465// the raw bytes of the name. Multiple identical strings are stored under the 466// same name for space efficiency. 467func (state *typeCodeAssignmentState) getStructNameNumber(nameBytes []byte) int { 468 name := string(nameBytes) 469 if n, ok := state.structNames[name]; ok { 470 // This name was used before, re-use it now (for space efficiency). 471 return n 472 } 473 // This name is not yet in the names sidetable. Add it now. 474 n := len(state.structNamesSidetable) 475 state.structNames[name] = n 476 state.structNamesSidetable = append(state.structNamesSidetable, makeVarint(uint64(len(nameBytes)))...) 477 state.structNamesSidetable = append(state.structNamesSidetable, nameBytes...) 478 return n 479} 480 481// makeVarint is a small helper function that returns the bytes of the number in 482// varint encoding. 483func makeVarint(n uint64) []byte { 484 buf := make([]byte, binary.MaxVarintLen64) 485 return buf[:binary.PutUvarint(buf, n)] 486} 487