1// Copyright 2020 Brad Fitzpatrick. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package intern lets you make smaller comparable values by boxing
6// a larger comparable value (such as a 16 byte string header) down
7// into a globally unique 8 byte pointer.
8//
9// The globally unique pointers are garbage collected with weak
10// references and finalizers. This package hides that.
11//
12// The GitHub repo is https://github.com/go4org/intern
13package intern // import "go4.org/intern"
14
15import (
16	"os"
17	"runtime"
18	"strconv"
19	"sync"
20	"unsafe"
21
22	_ "go4.org/unsafe/assume-no-moving-gc"
23)
24
25// A Value pointer is the handle to an underlying comparable value.
26// See func Get for how Value pointers may be used.
27type Value struct {
28	_      [0]func() // prevent people from accidentally using value type as comparable
29	cmpVal interface{}
30	// resurrected is guarded by mu (for all instances of Value).
31	// It is set true whenever v is synthesized from a uintptr.
32	resurrected bool
33}
34
35// Get returns the comparable value passed to the Get func
36// that returned v.
37func (v *Value) Get() interface{} { return v.cmpVal }
38
39// key is a key in our global value map.
40// It contains type-specialized fields to avoid allocations
41// when converting common types to empty interfaces.
42type key struct {
43	s      string
44	cmpVal interface{}
45	// isString reports whether key contains a string.
46	// Without it, the zero value of key is ambiguous.
47	isString bool
48}
49
50// keyFor returns a key to use with cmpVal.
51func keyFor(cmpVal interface{}) key {
52	if s, ok := cmpVal.(string); ok {
53		return key{s: s, isString: true}
54	}
55	return key{cmpVal: cmpVal}
56}
57
58// Value returns a *Value built from k.
59func (k key) Value() *Value {
60	if k.isString {
61		return &Value{cmpVal: k.s}
62	}
63	return &Value{cmpVal: k.cmpVal}
64}
65
66var (
67	// mu guards valMap, a weakref map of *Value by underlying value.
68	// It also guards the resurrected field of all *Values.
69	mu      sync.Mutex
70	valMap  = map[key]uintptr{} // to uintptr(*Value)
71	valSafe = safeMap()         // non-nil in safe+leaky mode
72)
73
74// safeMap returns a non-nil map if we're in safe-but-leaky mode,
75// as controlled by GO4_INTERN_SAFE_BUT_LEAKY.
76func safeMap() map[key]*Value {
77	if v, _ := strconv.ParseBool(os.Getenv("GO4_INTERN_SAFE_BUT_LEAKY")); v {
78		return map[key]*Value{}
79	}
80	return nil
81}
82
83// Get returns a pointer representing the comparable value cmpVal.
84//
85// The returned pointer will be the same for Get(v) and Get(v2)
86// if and only if v == v2, and can be used as a map key.
87func Get(cmpVal interface{}) *Value {
88	return get(keyFor(cmpVal))
89}
90
91// GetByString is identical to Get, except that it is specialized for strings.
92// This avoids an allocation from putting a string into an interface{}
93// to pass as an argument to Get.
94func GetByString(s string) *Value {
95	return get(key{s: s, isString: true})
96}
97
98// We play unsafe games that violate Go's rules (and assume a non-moving
99// collector). So we quiet Go here.
100// See the comment below Get for more implementation details.
101//go:nocheckptr
102func get(k key) *Value {
103	mu.Lock()
104	defer mu.Unlock()
105
106	var v *Value
107	if valSafe != nil {
108		v = valSafe[k]
109	} else if addr, ok := valMap[k]; ok {
110		v = (*Value)((unsafe.Pointer)(addr))
111		v.resurrected = true
112	}
113	if v != nil {
114		return v
115	}
116	v = k.Value()
117	if valSafe != nil {
118		valSafe[k] = v
119	} else {
120		// SetFinalizer before uintptr conversion (theoretical concern;
121		// see https://github.com/go4org/intern/issues/13)
122		runtime.SetFinalizer(v, finalize)
123		valMap[k] = uintptr(unsafe.Pointer(v))
124	}
125	return v
126}
127
128func finalize(v *Value) {
129	mu.Lock()
130	defer mu.Unlock()
131	if v.resurrected {
132		// We lost the race. Somebody resurrected it while we
133		// were about to finalize it. Try again next round.
134		v.resurrected = false
135		runtime.SetFinalizer(v, finalize)
136		return
137	}
138	delete(valMap, keyFor(v.cmpVal))
139}
140
141// Interning is simple if you don't require that unused values be
142// garbage collectable. But we do require that; we don't want to be
143// DOS vector. We do this by using a uintptr to hide the pointer from
144// the garbage collector, and using a finalizer to eliminate the
145// pointer when no other code is using it.
146//
147// The obvious implementation of this is to use a
148// map[interface{}]uintptr-of-*interface{}, and set up a finalizer to
149// delete from the map. Unfortunately, this is racy. Because pointers
150// are being created in violation of Go's unsafety rules, it's
151// possible to create a pointer to a value concurrently with the GC
152// concluding that the value can be collected. There are other races
153// that break the equality invariant as well, but the use-after-free
154// will cause a runtime crash.
155//
156// To make this work, the finalizer needs to know that no references
157// have been unsafely created since the finalizer was set up. To do
158// this, values carry a "resurrected" sentinel, which gets set
159// whenever a pointer is unsafely created. If the finalizer encounters
160// the sentinel, it clears the sentinel and delays collection for one
161// additional GC cycle, by re-installing itself as finalizer. This
162// ensures that the unsafely created pointer is visible to the GC, and
163// will correctly prevent collection.
164//
165// This technique does mean that interned values that get reused take
166// at least 3 GC cycles to fully collect (1 to clear the sentinel, 1
167// to clean up the unsafe map, 1 to be actually deleted).
168//
169// @ianlancetaylor commented in
170// https://github.com/golang/go/issues/41303#issuecomment-717401656
171// that it is possible to implement weak references in terms of
172// finalizers without unsafe. Unfortunately, the approach he outlined
173// does not work here, for two reasons. First, there is no way to
174// construct a strong pointer out of a weak pointer; our map stores
175// weak pointers, but we must return strong pointers to callers.
176// Second, and more fundamentally, we must return not just _a_ strong
177// pointer to callers, but _the same_ strong pointer to callers. In
178// order to return _the same_ strong pointer to callers, we must track
179// it, which is exactly what we cannot do with strong pointers.
180//
181// See https://github.com/inetaf/netaddr/issues/53 for more
182// discussion, and https://github.com/go4org/intern/issues/2 for an
183// illustration of the subtleties at play.
184