1// Copyright 2020 Brad Fitzpatrick. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package intern lets you make smaller comparable values by boxing 6// a larger comparable value (such as a 16 byte string header) down 7// into a globally unique 8 byte pointer. 8// 9// The globally unique pointers are garbage collected with weak 10// references and finalizers. This package hides that. 11// 12// The GitHub repo is https://github.com/go4org/intern 13package intern // import "go4.org/intern" 14 15import ( 16 "os" 17 "runtime" 18 "strconv" 19 "sync" 20 "unsafe" 21 22 _ "go4.org/unsafe/assume-no-moving-gc" 23) 24 25// A Value pointer is the handle to an underlying comparable value. 26// See func Get for how Value pointers may be used. 27type Value struct { 28 _ [0]func() // prevent people from accidentally using value type as comparable 29 cmpVal interface{} 30 // resurrected is guarded by mu (for all instances of Value). 31 // It is set true whenever v is synthesized from a uintptr. 32 resurrected bool 33} 34 35// Get returns the comparable value passed to the Get func 36// that returned v. 37func (v *Value) Get() interface{} { return v.cmpVal } 38 39// key is a key in our global value map. 40// It contains type-specialized fields to avoid allocations 41// when converting common types to empty interfaces. 42type key struct { 43 s string 44 cmpVal interface{} 45 // isString reports whether key contains a string. 46 // Without it, the zero value of key is ambiguous. 47 isString bool 48} 49 50// keyFor returns a key to use with cmpVal. 51func keyFor(cmpVal interface{}) key { 52 if s, ok := cmpVal.(string); ok { 53 return key{s: s, isString: true} 54 } 55 return key{cmpVal: cmpVal} 56} 57 58// Value returns a *Value built from k. 59func (k key) Value() *Value { 60 if k.isString { 61 return &Value{cmpVal: k.s} 62 } 63 return &Value{cmpVal: k.cmpVal} 64} 65 66var ( 67 // mu guards valMap, a weakref map of *Value by underlying value. 68 // It also guards the resurrected field of all *Values. 69 mu sync.Mutex 70 valMap = map[key]uintptr{} // to uintptr(*Value) 71 valSafe = safeMap() // non-nil in safe+leaky mode 72) 73 74// safeMap returns a non-nil map if we're in safe-but-leaky mode, 75// as controlled by GO4_INTERN_SAFE_BUT_LEAKY. 76func safeMap() map[key]*Value { 77 if v, _ := strconv.ParseBool(os.Getenv("GO4_INTERN_SAFE_BUT_LEAKY")); v { 78 return map[key]*Value{} 79 } 80 return nil 81} 82 83// Get returns a pointer representing the comparable value cmpVal. 84// 85// The returned pointer will be the same for Get(v) and Get(v2) 86// if and only if v == v2, and can be used as a map key. 87func Get(cmpVal interface{}) *Value { 88 return get(keyFor(cmpVal)) 89} 90 91// GetByString is identical to Get, except that it is specialized for strings. 92// This avoids an allocation from putting a string into an interface{} 93// to pass as an argument to Get. 94func GetByString(s string) *Value { 95 return get(key{s: s, isString: true}) 96} 97 98// We play unsafe games that violate Go's rules (and assume a non-moving 99// collector). So we quiet Go here. 100// See the comment below Get for more implementation details. 101//go:nocheckptr 102func get(k key) *Value { 103 mu.Lock() 104 defer mu.Unlock() 105 106 var v *Value 107 if valSafe != nil { 108 v = valSafe[k] 109 } else if addr, ok := valMap[k]; ok { 110 v = (*Value)((unsafe.Pointer)(addr)) 111 v.resurrected = true 112 } 113 if v != nil { 114 return v 115 } 116 v = k.Value() 117 if valSafe != nil { 118 valSafe[k] = v 119 } else { 120 // SetFinalizer before uintptr conversion (theoretical concern; 121 // see https://github.com/go4org/intern/issues/13) 122 runtime.SetFinalizer(v, finalize) 123 valMap[k] = uintptr(unsafe.Pointer(v)) 124 } 125 return v 126} 127 128func finalize(v *Value) { 129 mu.Lock() 130 defer mu.Unlock() 131 if v.resurrected { 132 // We lost the race. Somebody resurrected it while we 133 // were about to finalize it. Try again next round. 134 v.resurrected = false 135 runtime.SetFinalizer(v, finalize) 136 return 137 } 138 delete(valMap, keyFor(v.cmpVal)) 139} 140 141// Interning is simple if you don't require that unused values be 142// garbage collectable. But we do require that; we don't want to be 143// DOS vector. We do this by using a uintptr to hide the pointer from 144// the garbage collector, and using a finalizer to eliminate the 145// pointer when no other code is using it. 146// 147// The obvious implementation of this is to use a 148// map[interface{}]uintptr-of-*interface{}, and set up a finalizer to 149// delete from the map. Unfortunately, this is racy. Because pointers 150// are being created in violation of Go's unsafety rules, it's 151// possible to create a pointer to a value concurrently with the GC 152// concluding that the value can be collected. There are other races 153// that break the equality invariant as well, but the use-after-free 154// will cause a runtime crash. 155// 156// To make this work, the finalizer needs to know that no references 157// have been unsafely created since the finalizer was set up. To do 158// this, values carry a "resurrected" sentinel, which gets set 159// whenever a pointer is unsafely created. If the finalizer encounters 160// the sentinel, it clears the sentinel and delays collection for one 161// additional GC cycle, by re-installing itself as finalizer. This 162// ensures that the unsafely created pointer is visible to the GC, and 163// will correctly prevent collection. 164// 165// This technique does mean that interned values that get reused take 166// at least 3 GC cycles to fully collect (1 to clear the sentinel, 1 167// to clean up the unsafe map, 1 to be actually deleted). 168// 169// @ianlancetaylor commented in 170// https://github.com/golang/go/issues/41303#issuecomment-717401656 171// that it is possible to implement weak references in terms of 172// finalizers without unsafe. Unfortunately, the approach he outlined 173// does not work here, for two reasons. First, there is no way to 174// construct a strong pointer out of a weak pointer; our map stores 175// weak pointers, but we must return strong pointers to callers. 176// Second, and more fundamentally, we must return not just _a_ strong 177// pointer to callers, but _the same_ strong pointer to callers. In 178// order to return _the same_ strong pointer to callers, we must track 179// it, which is exactly what we cannot do with strong pointers. 180// 181// See https://github.com/inetaf/netaddr/issues/53 for more 182// discussion, and https://github.com/go4org/intern/issues/2 for an 183// illustration of the subtleties at play. 184