1// Fully persistent data structures. A persistent data structure is a data
2// structure that always preserves the previous version of itself when
3// it is modified. Such data structures are effectively immutable,
4// as their operations do not update the structure in-place, but instead
5// always yield a new structure.
6//
7// Persistent
8// data structures typically share structure among themselves.  This allows
9// operations to avoid copying the entire data structure.
10package ps
11
12import (
13	"bytes"
14	"fmt"
15)
16
17// Any is a shorthand for Go's verbose interface{} type.
18type Any interface{}
19
20// A Map associates unique keys (type string) with values (type Any).
21type Map interface {
22	// IsNil returns true if the Map is empty
23	IsNil() bool
24
25	// Set returns a new map in which key and value are associated.
26	// If the key didn't exist before, it's created; otherwise, the
27	// associated value is changed.
28	// This operation is O(log N) in the number of keys.
29	Set(key string, value Any) Map
30
31	// Delete returns a new map with the association for key, if any, removed.
32	// This operation is O(log N) in the number of keys.
33	Delete(key string) Map
34
35	// Lookup returns the value associated with a key, if any.  If the key
36	// exists, the second return value is true; otherwise, false.
37	// This operation is O(log N) in the number of keys.
38	Lookup(key string) (Any, bool)
39
40	// Size returns the number of key value pairs in the map.
41	// This takes O(1) time.
42	Size() int
43
44	// ForEach executes a callback on each key value pair in the map.
45	ForEach(f func(key string, val Any))
46
47	// Keys returns a slice with all keys in this map.
48	// This operation is O(N) in the number of keys.
49	Keys() []string
50
51	String() string
52}
53
54// Immutable (i.e. persistent) associative array
55const childCount = 8
56const shiftSize = 3
57
58type tree struct {
59	count    int
60	hash     uint64 // hash of the key (used for tree balancing)
61	key      string
62	value    Any
63	children [childCount]*tree
64}
65
66var nilMap = &tree{}
67
68// Recursively set nilMap's subtrees to point at itself.
69// This eliminates all nil pointers in the map structure.
70// All map nodes are created by cloning this structure so
71// they avoid the problem too.
72func init() {
73	for i := range nilMap.children {
74		nilMap.children[i] = nilMap
75	}
76}
77
78// NewMap allocates a new, persistent map from strings to values of
79// any type.
80// This is currently implemented as a path-copying binary tree.
81func NewMap() Map {
82	return nilMap
83}
84
85func (self *tree) IsNil() bool {
86	return self == nilMap
87}
88
89// clone returns an exact duplicate of a tree node
90func (self *tree) clone() *tree {
91	var m tree
92	m = *self
93	return &m
94}
95
96// constants for FNV-1a hash algorithm
97const (
98	offset64 uint64 = 14695981039346656037
99	prime64  uint64 = 1099511628211
100)
101
102// hashKey returns a hash code for a given string
103func hashKey(key string) uint64 {
104	hash := offset64
105	for _, codepoint := range key {
106		hash ^= uint64(codepoint)
107		hash *= prime64
108	}
109	return hash
110}
111
112// Set returns a new map similar to this one but with key and value
113// associated.  If the key didn't exist, it's created; otherwise, the
114// associated value is changed.
115func (self *tree) Set(key string, value Any) Map {
116	hash := hashKey(key)
117	return setLowLevel(self, hash, hash, key, value)
118}
119
120func setLowLevel(self *tree, partialHash, hash uint64, key string, value Any) *tree {
121	if self.IsNil() { // an empty tree is easy
122		m := self.clone()
123		m.count = 1
124		m.hash = hash
125		m.key = key
126		m.value = value
127		return m
128	}
129
130	if hash != self.hash {
131		m := self.clone()
132		i := partialHash % childCount
133		m.children[i] = setLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value)
134		recalculateCount(m)
135		return m
136	}
137
138	// replacing a key's previous value
139	m := self.clone()
140	m.value = value
141	return m
142}
143
144// modifies a map by recalculating its key count based on the counts
145// of its subtrees
146func recalculateCount(m *tree) {
147	count := 0
148	for _, t := range m.children {
149		count += t.Size()
150	}
151	m.count = count + 1 // add one to count ourself
152}
153
154func (m *tree) Delete(key string) Map {
155	hash := hashKey(key)
156	newMap, _ := deleteLowLevel(m, hash, hash)
157	return newMap
158}
159
160func deleteLowLevel(self *tree, partialHash, hash uint64) (*tree, bool) {
161	// empty trees are easy
162	if self.IsNil() {
163		return self, false
164	}
165
166	if hash != self.hash {
167		i := partialHash % childCount
168		child, found := deleteLowLevel(self.children[i], partialHash>>shiftSize, hash)
169		if !found {
170			return self, false
171		}
172		newMap := self.clone()
173		newMap.children[i] = child
174		recalculateCount(newMap)
175		return newMap, true // ? this wasn't in the original code
176	}
177
178	// we must delete our own node
179	if self.isLeaf() { // we have no children
180		return nilMap, true
181	}
182	/*
183	   if self.subtreeCount() == 1 { // only one subtree
184	       for _, t := range self.children {
185	           if t != nilMap {
186	               return t, true
187	           }
188	       }
189	       panic("Tree with 1 subtree actually had no subtrees")
190	   }
191	*/
192
193	// find a node to replace us
194	i := -1
195	size := -1
196	for j, t := range self.children {
197		if t.Size() > size {
198			i = j
199			size = t.Size()
200		}
201	}
202
203	// make chosen leaf smaller
204	replacement, child := self.children[i].deleteLeftmost()
205	newMap := replacement.clone()
206	for j := range self.children {
207		if j == i {
208			newMap.children[j] = child
209		} else {
210			newMap.children[j] = self.children[j]
211		}
212	}
213	recalculateCount(newMap)
214	return newMap, true
215}
216
217// delete the leftmost node in a tree returning the node that
218// was deleted and the tree left over after its deletion
219func (m *tree) deleteLeftmost() (*tree, *tree) {
220	if m.isLeaf() {
221		return m, nilMap
222	}
223
224	for i, t := range m.children {
225		if t != nilMap {
226			deleted, child := t.deleteLeftmost()
227			newMap := m.clone()
228			newMap.children[i] = child
229			recalculateCount(newMap)
230			return deleted, newMap
231		}
232	}
233	panic("Tree isn't a leaf but also had no children. How does that happen?")
234}
235
236// isLeaf returns true if this is a leaf node
237func (m *tree) isLeaf() bool {
238	return m.Size() == 1
239}
240
241// returns the number of child subtrees we have
242func (m *tree) subtreeCount() int {
243	count := 0
244	for _, t := range m.children {
245		if t != nilMap {
246			count++
247		}
248	}
249	return count
250}
251
252func (m *tree) Lookup(key string) (Any, bool) {
253	hash := hashKey(key)
254	return lookupLowLevel(m, hash, hash)
255}
256
257func lookupLowLevel(self *tree, partialHash, hash uint64) (Any, bool) {
258	if self.IsNil() { // an empty tree is easy
259		return nil, false
260	}
261
262	if hash != self.hash {
263		i := partialHash % childCount
264		return lookupLowLevel(self.children[i], partialHash>>shiftSize, hash)
265	}
266
267	// we found it
268	return self.value, true
269}
270
271func (m *tree) Size() int {
272	return m.count
273}
274
275func (m *tree) ForEach(f func(key string, val Any)) {
276	if m.IsNil() {
277		return
278	}
279
280	// ourself
281	f(m.key, m.value)
282
283	// children
284	for _, t := range m.children {
285		if t != nilMap {
286			t.ForEach(f)
287		}
288	}
289}
290
291func (m *tree) Keys() []string {
292	keys := make([]string, m.Size())
293	i := 0
294	m.ForEach(func(k string, v Any) {
295		keys[i] = k
296		i++
297	})
298	return keys
299}
300
301// make it easier to display maps for debugging
302func (m *tree) String() string {
303	keys := m.Keys()
304	buf := bytes.NewBufferString("{")
305	for _, key := range keys {
306		val, _ := m.Lookup(key)
307		fmt.Fprintf(buf, "%s: %s, ", key, val)
308	}
309	fmt.Fprintf(buf, "}\n")
310	return buf.String()
311}
312