1// Fully persistent data structures. A persistent data structure is a data 2// structure that always preserves the previous version of itself when 3// it is modified. Such data structures are effectively immutable, 4// as their operations do not update the structure in-place, but instead 5// always yield a new structure. 6// 7// Persistent 8// data structures typically share structure among themselves. This allows 9// operations to avoid copying the entire data structure. 10package ps 11 12import ( 13 "bytes" 14 "fmt" 15) 16 17// Any is a shorthand for Go's verbose interface{} type. 18type Any interface{} 19 20// A Map associates unique keys (type string) with values (type Any). 21type Map interface { 22 // IsNil returns true if the Map is empty 23 IsNil() bool 24 25 // Set returns a new map in which key and value are associated. 26 // If the key didn't exist before, it's created; otherwise, the 27 // associated value is changed. 28 // This operation is O(log N) in the number of keys. 29 Set(key string, value Any) Map 30 31 // Delete returns a new map with the association for key, if any, removed. 32 // This operation is O(log N) in the number of keys. 33 Delete(key string) Map 34 35 // Lookup returns the value associated with a key, if any. If the key 36 // exists, the second return value is true; otherwise, false. 37 // This operation is O(log N) in the number of keys. 38 Lookup(key string) (Any, bool) 39 40 // Size returns the number of key value pairs in the map. 41 // This takes O(1) time. 42 Size() int 43 44 // ForEach executes a callback on each key value pair in the map. 45 ForEach(f func(key string, val Any)) 46 47 // Keys returns a slice with all keys in this map. 48 // This operation is O(N) in the number of keys. 49 Keys() []string 50 51 String() string 52} 53 54// Immutable (i.e. persistent) associative array 55const childCount = 8 56const shiftSize = 3 57 58type tree struct { 59 count int 60 hash uint64 // hash of the key (used for tree balancing) 61 key string 62 value Any 63 children [childCount]*tree 64} 65 66var nilMap = &tree{} 67 68// Recursively set nilMap's subtrees to point at itself. 69// This eliminates all nil pointers in the map structure. 70// All map nodes are created by cloning this structure so 71// they avoid the problem too. 72func init() { 73 for i := range nilMap.children { 74 nilMap.children[i] = nilMap 75 } 76} 77 78// NewMap allocates a new, persistent map from strings to values of 79// any type. 80// This is currently implemented as a path-copying binary tree. 81func NewMap() Map { 82 return nilMap 83} 84 85func (self *tree) IsNil() bool { 86 return self == nilMap 87} 88 89// clone returns an exact duplicate of a tree node 90func (self *tree) clone() *tree { 91 var m tree 92 m = *self 93 return &m 94} 95 96// constants for FNV-1a hash algorithm 97const ( 98 offset64 uint64 = 14695981039346656037 99 prime64 uint64 = 1099511628211 100) 101 102// hashKey returns a hash code for a given string 103func hashKey(key string) uint64 { 104 hash := offset64 105 for _, codepoint := range key { 106 hash ^= uint64(codepoint) 107 hash *= prime64 108 } 109 return hash 110} 111 112// Set returns a new map similar to this one but with key and value 113// associated. If the key didn't exist, it's created; otherwise, the 114// associated value is changed. 115func (self *tree) Set(key string, value Any) Map { 116 hash := hashKey(key) 117 return setLowLevel(self, hash, hash, key, value) 118} 119 120func setLowLevel(self *tree, partialHash, hash uint64, key string, value Any) *tree { 121 if self.IsNil() { // an empty tree is easy 122 m := self.clone() 123 m.count = 1 124 m.hash = hash 125 m.key = key 126 m.value = value 127 return m 128 } 129 130 if hash != self.hash { 131 m := self.clone() 132 i := partialHash % childCount 133 m.children[i] = setLowLevel(self.children[i], partialHash>>shiftSize, hash, key, value) 134 recalculateCount(m) 135 return m 136 } 137 138 // replacing a key's previous value 139 m := self.clone() 140 m.value = value 141 return m 142} 143 144// modifies a map by recalculating its key count based on the counts 145// of its subtrees 146func recalculateCount(m *tree) { 147 count := 0 148 for _, t := range m.children { 149 count += t.Size() 150 } 151 m.count = count + 1 // add one to count ourself 152} 153 154func (m *tree) Delete(key string) Map { 155 hash := hashKey(key) 156 newMap, _ := deleteLowLevel(m, hash, hash) 157 return newMap 158} 159 160func deleteLowLevel(self *tree, partialHash, hash uint64) (*tree, bool) { 161 // empty trees are easy 162 if self.IsNil() { 163 return self, false 164 } 165 166 if hash != self.hash { 167 i := partialHash % childCount 168 child, found := deleteLowLevel(self.children[i], partialHash>>shiftSize, hash) 169 if !found { 170 return self, false 171 } 172 newMap := self.clone() 173 newMap.children[i] = child 174 recalculateCount(newMap) 175 return newMap, true // ? this wasn't in the original code 176 } 177 178 // we must delete our own node 179 if self.isLeaf() { // we have no children 180 return nilMap, true 181 } 182 /* 183 if self.subtreeCount() == 1 { // only one subtree 184 for _, t := range self.children { 185 if t != nilMap { 186 return t, true 187 } 188 } 189 panic("Tree with 1 subtree actually had no subtrees") 190 } 191 */ 192 193 // find a node to replace us 194 i := -1 195 size := -1 196 for j, t := range self.children { 197 if t.Size() > size { 198 i = j 199 size = t.Size() 200 } 201 } 202 203 // make chosen leaf smaller 204 replacement, child := self.children[i].deleteLeftmost() 205 newMap := replacement.clone() 206 for j := range self.children { 207 if j == i { 208 newMap.children[j] = child 209 } else { 210 newMap.children[j] = self.children[j] 211 } 212 } 213 recalculateCount(newMap) 214 return newMap, true 215} 216 217// delete the leftmost node in a tree returning the node that 218// was deleted and the tree left over after its deletion 219func (m *tree) deleteLeftmost() (*tree, *tree) { 220 if m.isLeaf() { 221 return m, nilMap 222 } 223 224 for i, t := range m.children { 225 if t != nilMap { 226 deleted, child := t.deleteLeftmost() 227 newMap := m.clone() 228 newMap.children[i] = child 229 recalculateCount(newMap) 230 return deleted, newMap 231 } 232 } 233 panic("Tree isn't a leaf but also had no children. How does that happen?") 234} 235 236// isLeaf returns true if this is a leaf node 237func (m *tree) isLeaf() bool { 238 return m.Size() == 1 239} 240 241// returns the number of child subtrees we have 242func (m *tree) subtreeCount() int { 243 count := 0 244 for _, t := range m.children { 245 if t != nilMap { 246 count++ 247 } 248 } 249 return count 250} 251 252func (m *tree) Lookup(key string) (Any, bool) { 253 hash := hashKey(key) 254 return lookupLowLevel(m, hash, hash) 255} 256 257func lookupLowLevel(self *tree, partialHash, hash uint64) (Any, bool) { 258 if self.IsNil() { // an empty tree is easy 259 return nil, false 260 } 261 262 if hash != self.hash { 263 i := partialHash % childCount 264 return lookupLowLevel(self.children[i], partialHash>>shiftSize, hash) 265 } 266 267 // we found it 268 return self.value, true 269} 270 271func (m *tree) Size() int { 272 return m.count 273} 274 275func (m *tree) ForEach(f func(key string, val Any)) { 276 if m.IsNil() { 277 return 278 } 279 280 // ourself 281 f(m.key, m.value) 282 283 // children 284 for _, t := range m.children { 285 if t != nilMap { 286 t.ForEach(f) 287 } 288 } 289} 290 291func (m *tree) Keys() []string { 292 keys := make([]string, m.Size()) 293 i := 0 294 m.ForEach(func(k string, v Any) { 295 keys[i] = k 296 i++ 297 }) 298 return keys 299} 300 301// make it easier to display maps for debugging 302func (m *tree) String() string { 303 keys := m.Keys() 304 buf := bytes.NewBufferString("{") 305 for _, key := range keys { 306 val, _ := m.Lookup(key) 307 fmt.Fprintf(buf, "%s: %s, ", key, val) 308 } 309 fmt.Fprintf(buf, "}\n") 310 return buf.String() 311} 312