1// Copyright 2020 The go-ethereum Authors 2// This file is part of the go-ethereum library. 3// 4// The go-ethereum library is free software: you can redistribute it and/or modify 5// it under the terms of the GNU Lesser General Public License as published by 6// the Free Software Foundation, either version 3 of the License, or 7// (at your option) any later version. 8// 9// The go-ethereum library is distributed in the hope that it will be useful, 10// but WITHOUT ANY WARRANTY; without even the implied warranty of 11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12// GNU Lesser General Public License for more details. 13// 14// You should have received a copy of the GNU Lesser General Public License 15// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17package pruner 18 19import ( 20 "bytes" 21 "encoding/binary" 22 "errors" 23 "fmt" 24 "math" 25 "os" 26 "path/filepath" 27 "strings" 28 "time" 29 30 "github.com/ethereum/go-ethereum/common" 31 "github.com/ethereum/go-ethereum/core/rawdb" 32 "github.com/ethereum/go-ethereum/core/state/snapshot" 33 "github.com/ethereum/go-ethereum/core/types" 34 "github.com/ethereum/go-ethereum/crypto" 35 "github.com/ethereum/go-ethereum/ethdb" 36 "github.com/ethereum/go-ethereum/log" 37 "github.com/ethereum/go-ethereum/rlp" 38 "github.com/ethereum/go-ethereum/trie" 39) 40 41const ( 42 // stateBloomFilePrefix is the filename prefix of state bloom filter. 43 stateBloomFilePrefix = "statebloom" 44 45 // stateBloomFilePrefix is the filename suffix of state bloom filter. 46 stateBloomFileSuffix = "bf.gz" 47 48 // stateBloomFileTempSuffix is the filename suffix of state bloom filter 49 // while it is being written out to detect write aborts. 50 stateBloomFileTempSuffix = ".tmp" 51 52 // rangeCompactionThreshold is the minimal deleted entry number for 53 // triggering range compaction. It's a quite arbitrary number but just 54 // to avoid triggering range compaction because of small deletion. 55 rangeCompactionThreshold = 100000 56) 57 58var ( 59 // emptyRoot is the known root hash of an empty trie. 60 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 61 62 // emptyCode is the known hash of the empty EVM bytecode. 63 emptyCode = crypto.Keccak256(nil) 64) 65 66// Pruner is an offline tool to prune the stale state with the 67// help of the snapshot. The workflow of pruner is very simple: 68// 69// - iterate the snapshot, reconstruct the relevant state 70// - iterate the database, delete all other state entries which 71// don't belong to the target state and the genesis state 72// 73// It can take several hours(around 2 hours for mainnet) to finish 74// the whole pruning work. It's recommended to run this offline tool 75// periodically in order to release the disk usage and improve the 76// disk read performance to some extent. 77type Pruner struct { 78 db ethdb.Database 79 stateBloom *stateBloom 80 datadir string 81 trieCachePath string 82 headHeader *types.Header 83 snaptree *snapshot.Tree 84} 85 86// NewPruner creates the pruner instance. 87func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint64) (*Pruner, error) { 88 headBlock := rawdb.ReadHeadBlock(db) 89 if headBlock == nil { 90 return nil, errors.New("Failed to load head block") 91 } 92 snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false) 93 if err != nil { 94 return nil, err // The relevant snapshot(s) might not exist 95 } 96 // Sanitize the bloom filter size if it's too small. 97 if bloomSize < 256 { 98 log.Warn("Sanitizing bloomfilter size", "provided(MB)", bloomSize, "updated(MB)", 256) 99 bloomSize = 256 100 } 101 stateBloom, err := newStateBloomWithSize(bloomSize) 102 if err != nil { 103 return nil, err 104 } 105 return &Pruner{ 106 db: db, 107 stateBloom: stateBloom, 108 datadir: datadir, 109 trieCachePath: trieCachePath, 110 headHeader: headBlock.Header(), 111 snaptree: snaptree, 112 }, nil 113} 114 115func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error { 116 // Delete all stale trie nodes in the disk. With the help of state bloom 117 // the trie nodes(and codes) belong to the active state will be filtered 118 // out. A very small part of stale tries will also be filtered because of 119 // the false-positive rate of bloom filter. But the assumption is held here 120 // that the false-positive is low enough(~0.05%). The probablity of the 121 // dangling node is the state root is super low. So the dangling nodes in 122 // theory will never ever be visited again. 123 var ( 124 count int 125 size common.StorageSize 126 pstart = time.Now() 127 logged = time.Now() 128 batch = maindb.NewBatch() 129 iter = maindb.NewIterator(nil, nil) 130 ) 131 for iter.Next() { 132 key := iter.Key() 133 134 // All state entries don't belong to specific state and genesis are deleted here 135 // - trie node 136 // - legacy contract code 137 // - new-scheme contract code 138 isCode, codeKey := rawdb.IsCodeKey(key) 139 if len(key) == common.HashLength || isCode { 140 checkKey := key 141 if isCode { 142 checkKey = codeKey 143 } 144 if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist { 145 log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey)) 146 } else { 147 if ok, err := stateBloom.Contain(checkKey); err != nil { 148 return err 149 } else if ok { 150 continue 151 } 152 } 153 count += 1 154 size += common.StorageSize(len(key) + len(iter.Value())) 155 batch.Delete(key) 156 157 var eta time.Duration // Realistically will never remain uninited 158 if done := binary.BigEndian.Uint64(key[:8]); done > 0 { 159 var ( 160 left = math.MaxUint64 - binary.BigEndian.Uint64(key[:8]) 161 speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero 162 ) 163 eta = time.Duration(left/speed) * time.Millisecond 164 } 165 if time.Since(logged) > 8*time.Second { 166 log.Info("Pruning state data", "nodes", count, "size", size, 167 "elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta)) 168 logged = time.Now() 169 } 170 // Recreate the iterator after every batch commit in order 171 // to allow the underlying compactor to delete the entries. 172 if batch.ValueSize() >= ethdb.IdealBatchSize { 173 batch.Write() 174 batch.Reset() 175 176 iter.Release() 177 iter = maindb.NewIterator(nil, key) 178 } 179 } 180 } 181 if batch.ValueSize() > 0 { 182 batch.Write() 183 batch.Reset() 184 } 185 iter.Release() 186 log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart))) 187 188 // Pruning is done, now drop the "useless" layers from the snapshot. 189 // Firstly, flushing the target layer into the disk. After that all 190 // diff layers below the target will all be merged into the disk. 191 if err := snaptree.Cap(root, 0); err != nil { 192 return err 193 } 194 // Secondly, flushing the snapshot journal into the disk. All diff 195 // layers upon are dropped silently. Eventually the entire snapshot 196 // tree is converted into a single disk layer with the pruning target 197 // as the root. 198 if _, err := snaptree.Journal(root); err != nil { 199 return err 200 } 201 // Delete the state bloom, it marks the entire pruning procedure is 202 // finished. If any crashes or manual exit happens before this, 203 // `RecoverPruning` will pick it up in the next restarts to redo all 204 // the things. 205 os.RemoveAll(bloomPath) 206 207 // Start compactions, will remove the deleted data from the disk immediately. 208 // Note for small pruning, the compaction is skipped. 209 if count >= rangeCompactionThreshold { 210 cstart := time.Now() 211 for b := 0x00; b <= 0xf0; b += 0x10 { 212 var ( 213 start = []byte{byte(b)} 214 end = []byte{byte(b + 0x10)} 215 ) 216 if b == 0xf0 { 217 end = nil 218 } 219 log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart))) 220 if err := maindb.Compact(start, end); err != nil { 221 log.Error("Database compaction failed", "error", err) 222 return err 223 } 224 } 225 log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart))) 226 } 227 log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start))) 228 return nil 229} 230 231// Prune deletes all historical state nodes except the nodes belong to the 232// specified state version. If user doesn't specify the state version, use 233// the bottom-most snapshot diff layer as the target. 234func (p *Pruner) Prune(root common.Hash) error { 235 // If the state bloom filter is already committed previously, 236 // reuse it for pruning instead of generating a new one. It's 237 // mandatory because a part of state may already be deleted, 238 // the recovery procedure is necessary. 239 _, stateBloomRoot, err := findBloomFilter(p.datadir) 240 if err != nil { 241 return err 242 } 243 if stateBloomRoot != (common.Hash{}) { 244 return RecoverPruning(p.datadir, p.db, p.trieCachePath) 245 } 246 // If the target state root is not specified, use the HEAD-127 as the 247 // target. The reason for picking it is: 248 // - in most of the normal cases, the related state is available 249 // - the probability of this layer being reorg is very low 250 var layers []snapshot.Snapshot 251 if root == (common.Hash{}) { 252 // Retrieve all snapshot layers from the current HEAD. 253 // In theory there are 128 difflayers + 1 disk layer present, 254 // so 128 diff layers are expected to be returned. 255 layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true) 256 if len(layers) != 128 { 257 // Reject if the accumulated diff layers are less than 128. It 258 // means in most of normal cases, there is no associated state 259 // with bottom-most diff layer. 260 return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers)) 261 } 262 // Use the bottom-most diff layer as the target 263 root = layers[len(layers)-1].Root() 264 } 265 // Ensure the root is really present. The weak assumption 266 // is the presence of root can indicate the presence of the 267 // entire trie. 268 if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 { 269 // The special case is for clique based networks(rinkeby, goerli 270 // and some other private networks), it's possible that two 271 // consecutive blocks will have same root. In this case snapshot 272 // difflayer won't be created. So HEAD-127 may not paired with 273 // head-127 layer. Instead the paired layer is higher than the 274 // bottom-most diff layer. Try to find the bottom-most snapshot 275 // layer with state available. 276 // 277 // Note HEAD and HEAD-1 is ignored. Usually there is the associated 278 // state available, but we don't want to use the topmost state 279 // as the pruning target. 280 var found bool 281 for i := len(layers) - 2; i >= 2; i-- { 282 if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 { 283 root = layers[i].Root() 284 found = true 285 log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i) 286 break 287 } 288 } 289 if !found { 290 if len(layers) > 0 { 291 return errors.New("no snapshot paired state") 292 } 293 return fmt.Errorf("associated state[%x] is not present", root) 294 } 295 } else { 296 if len(layers) > 0 { 297 log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127) 298 } else { 299 log.Info("Selecting user-specified state as the pruning target", "root", root) 300 } 301 } 302 // Before start the pruning, delete the clean trie cache first. 303 // It's necessary otherwise in the next restart we will hit the 304 // deleted state root in the "clean cache" so that the incomplete 305 // state is picked for usage. 306 deleteCleanTrieCache(p.trieCachePath) 307 308 // All the state roots of the middle layer should be forcibly pruned, 309 // otherwise the dangling state will be left. 310 middleRoots := make(map[common.Hash]struct{}) 311 for _, layer := range layers { 312 if layer.Root() == root { 313 break 314 } 315 middleRoots[layer.Root()] = struct{}{} 316 } 317 // Traverse the target state, re-construct the whole state trie and 318 // commit to the given bloom filter. 319 start := time.Now() 320 if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil { 321 return err 322 } 323 // Traverse the genesis, put all genesis state entries into the 324 // bloom filter too. 325 if err := extractGenesis(p.db, p.stateBloom); err != nil { 326 return err 327 } 328 filterName := bloomFilterName(p.datadir, root) 329 330 log.Info("Writing state bloom to disk", "name", filterName) 331 if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil { 332 return err 333 } 334 log.Info("State bloom filter committed", "name", filterName) 335 return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start) 336} 337 338// RecoverPruning will resume the pruning procedure during the system restart. 339// This function is used in this case: user tries to prune state data, but the 340// system was interrupted midway because of crash or manual-kill. In this case 341// if the bloom filter for filtering active state is already constructed, the 342// pruning can be resumed. What's more if the bloom filter is constructed, the 343// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left 344// in the disk. 345func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error { 346 stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir) 347 if err != nil { 348 return err 349 } 350 if stateBloomPath == "" { 351 return nil // nothing to recover 352 } 353 headBlock := rawdb.ReadHeadBlock(db) 354 if headBlock == nil { 355 return errors.New("Failed to load head block") 356 } 357 // Initialize the snapshot tree in recovery mode to handle this special case: 358 // - Users run the `prune-state` command multiple times 359 // - Neither these `prune-state` running is finished(e.g. interrupted manually) 360 // - The state bloom filter is already generated, a part of state is deleted, 361 // so that resuming the pruning here is mandatory 362 // - The state HEAD is rewound already because of multiple incomplete `prune-state` 363 // In this case, even the state HEAD is not exactly matched with snapshot, it 364 // still feasible to recover the pruning correctly. 365 snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, true) 366 if err != nil { 367 return err // The relevant snapshot(s) might not exist 368 } 369 stateBloom, err := NewStateBloomFromDisk(stateBloomPath) 370 if err != nil { 371 return err 372 } 373 log.Info("Loaded state bloom filter", "path", stateBloomPath) 374 375 // Before start the pruning, delete the clean trie cache first. 376 // It's necessary otherwise in the next restart we will hit the 377 // deleted state root in the "clean cache" so that the incomplete 378 // state is picked for usage. 379 deleteCleanTrieCache(trieCachePath) 380 381 // All the state roots of the middle layers should be forcibly pruned, 382 // otherwise the dangling state will be left. 383 var ( 384 found bool 385 layers = snaptree.Snapshots(headBlock.Root(), 128, true) 386 middleRoots = make(map[common.Hash]struct{}) 387 ) 388 for _, layer := range layers { 389 if layer.Root() == stateBloomRoot { 390 found = true 391 break 392 } 393 middleRoots[layer.Root()] = struct{}{} 394 } 395 if !found { 396 log.Error("Pruning target state is not existent") 397 return errors.New("non-existent target state") 398 } 399 return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now()) 400} 401 402// extractGenesis loads the genesis state and commits all the state entries 403// into the given bloomfilter. 404func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error { 405 genesisHash := rawdb.ReadCanonicalHash(db, 0) 406 if genesisHash == (common.Hash{}) { 407 return errors.New("missing genesis hash") 408 } 409 genesis := rawdb.ReadBlock(db, genesisHash, 0) 410 if genesis == nil { 411 return errors.New("missing genesis block") 412 } 413 t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db)) 414 if err != nil { 415 return err 416 } 417 accIter := t.NodeIterator(nil) 418 for accIter.Next(true) { 419 hash := accIter.Hash() 420 421 // Embedded nodes don't have hash. 422 if hash != (common.Hash{}) { 423 stateBloom.Put(hash.Bytes(), nil) 424 } 425 // If it's a leaf node, yes we are touching an account, 426 // dig into the storage trie further. 427 if accIter.Leaf() { 428 var acc types.StateAccount 429 if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { 430 return err 431 } 432 if acc.Root != emptyRoot { 433 storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db)) 434 if err != nil { 435 return err 436 } 437 storageIter := storageTrie.NodeIterator(nil) 438 for storageIter.Next(true) { 439 hash := storageIter.Hash() 440 if hash != (common.Hash{}) { 441 stateBloom.Put(hash.Bytes(), nil) 442 } 443 } 444 if storageIter.Error() != nil { 445 return storageIter.Error() 446 } 447 } 448 if !bytes.Equal(acc.CodeHash, emptyCode) { 449 stateBloom.Put(acc.CodeHash, nil) 450 } 451 } 452 } 453 return accIter.Error() 454} 455 456func bloomFilterName(datadir string, hash common.Hash) string { 457 return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix)) 458} 459 460func isBloomFilter(filename string) (bool, common.Hash) { 461 filename = filepath.Base(filename) 462 if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) { 463 return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1]) 464 } 465 return false, common.Hash{} 466} 467 468func findBloomFilter(datadir string) (string, common.Hash, error) { 469 var ( 470 stateBloomPath string 471 stateBloomRoot common.Hash 472 ) 473 if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error { 474 if info != nil && !info.IsDir() { 475 ok, root := isBloomFilter(path) 476 if ok { 477 stateBloomPath = path 478 stateBloomRoot = root 479 } 480 } 481 return nil 482 }); err != nil { 483 return "", common.Hash{}, err 484 } 485 return stateBloomPath, stateBloomRoot, nil 486} 487 488const warningLog = ` 489 490WARNING! 491 492The clean trie cache is not found. Please delete it by yourself after the 493pruning. Remember don't start the Geth without deleting the clean trie cache 494otherwise the entire database may be damaged! 495 496Check the command description "geth snapshot prune-state --help" for more details. 497` 498 499func deleteCleanTrieCache(path string) { 500 if _, err := os.Stat(path); os.IsNotExist(err) { 501 log.Warn(warningLog) 502 return 503 } 504 os.RemoveAll(path) 505 log.Info("Deleted trie clean cache", "path", path) 506} 507