1// Copyright 2020 The go-ethereum Authors
2// This file is part of the go-ethereum library.
3//
4// The go-ethereum library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Lesser General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// The go-ethereum library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Lesser General Public License for more details.
13//
14// You should have received a copy of the GNU Lesser General Public License
15// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16
17package pruner
18
19import (
20	"bytes"
21	"encoding/binary"
22	"errors"
23	"fmt"
24	"math"
25	"os"
26	"path/filepath"
27	"strings"
28	"time"
29
30	"github.com/ethereum/go-ethereum/common"
31	"github.com/ethereum/go-ethereum/core/rawdb"
32	"github.com/ethereum/go-ethereum/core/state/snapshot"
33	"github.com/ethereum/go-ethereum/core/types"
34	"github.com/ethereum/go-ethereum/crypto"
35	"github.com/ethereum/go-ethereum/ethdb"
36	"github.com/ethereum/go-ethereum/log"
37	"github.com/ethereum/go-ethereum/rlp"
38	"github.com/ethereum/go-ethereum/trie"
39)
40
41const (
42	// stateBloomFilePrefix is the filename prefix of state bloom filter.
43	stateBloomFilePrefix = "statebloom"
44
45	// stateBloomFilePrefix is the filename suffix of state bloom filter.
46	stateBloomFileSuffix = "bf.gz"
47
48	// stateBloomFileTempSuffix is the filename suffix of state bloom filter
49	// while it is being written out to detect write aborts.
50	stateBloomFileTempSuffix = ".tmp"
51
52	// rangeCompactionThreshold is the minimal deleted entry number for
53	// triggering range compaction. It's a quite arbitrary number but just
54	// to avoid triggering range compaction because of small deletion.
55	rangeCompactionThreshold = 100000
56)
57
58var (
59	// emptyRoot is the known root hash of an empty trie.
60	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
61
62	// emptyCode is the known hash of the empty EVM bytecode.
63	emptyCode = crypto.Keccak256(nil)
64)
65
66// Pruner is an offline tool to prune the stale state with the
67// help of the snapshot. The workflow of pruner is very simple:
68//
69// - iterate the snapshot, reconstruct the relevant state
70// - iterate the database, delete all other state entries which
71//   don't belong to the target state and the genesis state
72//
73// It can take several hours(around 2 hours for mainnet) to finish
74// the whole pruning work. It's recommended to run this offline tool
75// periodically in order to release the disk usage and improve the
76// disk read performance to some extent.
77type Pruner struct {
78	db            ethdb.Database
79	stateBloom    *stateBloom
80	datadir       string
81	trieCachePath string
82	headHeader    *types.Header
83	snaptree      *snapshot.Tree
84}
85
86// NewPruner creates the pruner instance.
87func NewPruner(db ethdb.Database, datadir, trieCachePath string, bloomSize uint64) (*Pruner, error) {
88	headBlock := rawdb.ReadHeadBlock(db)
89	if headBlock == nil {
90		return nil, errors.New("Failed to load head block")
91	}
92	snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, false)
93	if err != nil {
94		return nil, err // The relevant snapshot(s) might not exist
95	}
96	// Sanitize the bloom filter size if it's too small.
97	if bloomSize < 256 {
98		log.Warn("Sanitizing bloomfilter size", "provided(MB)", bloomSize, "updated(MB)", 256)
99		bloomSize = 256
100	}
101	stateBloom, err := newStateBloomWithSize(bloomSize)
102	if err != nil {
103		return nil, err
104	}
105	return &Pruner{
106		db:            db,
107		stateBloom:    stateBloom,
108		datadir:       datadir,
109		trieCachePath: trieCachePath,
110		headHeader:    headBlock.Header(),
111		snaptree:      snaptree,
112	}, nil
113}
114
115func prune(snaptree *snapshot.Tree, root common.Hash, maindb ethdb.Database, stateBloom *stateBloom, bloomPath string, middleStateRoots map[common.Hash]struct{}, start time.Time) error {
116	// Delete all stale trie nodes in the disk. With the help of state bloom
117	// the trie nodes(and codes) belong to the active state will be filtered
118	// out. A very small part of stale tries will also be filtered because of
119	// the false-positive rate of bloom filter. But the assumption is held here
120	// that the false-positive is low enough(~0.05%). The probablity of the
121	// dangling node is the state root is super low. So the dangling nodes in
122	// theory will never ever be visited again.
123	var (
124		count  int
125		size   common.StorageSize
126		pstart = time.Now()
127		logged = time.Now()
128		batch  = maindb.NewBatch()
129		iter   = maindb.NewIterator(nil, nil)
130	)
131	for iter.Next() {
132		key := iter.Key()
133
134		// All state entries don't belong to specific state and genesis are deleted here
135		// - trie node
136		// - legacy contract code
137		// - new-scheme contract code
138		isCode, codeKey := rawdb.IsCodeKey(key)
139		if len(key) == common.HashLength || isCode {
140			checkKey := key
141			if isCode {
142				checkKey = codeKey
143			}
144			if _, exist := middleStateRoots[common.BytesToHash(checkKey)]; exist {
145				log.Debug("Forcibly delete the middle state roots", "hash", common.BytesToHash(checkKey))
146			} else {
147				if ok, err := stateBloom.Contain(checkKey); err != nil {
148					return err
149				} else if ok {
150					continue
151				}
152			}
153			count += 1
154			size += common.StorageSize(len(key) + len(iter.Value()))
155			batch.Delete(key)
156
157			var eta time.Duration // Realistically will never remain uninited
158			if done := binary.BigEndian.Uint64(key[:8]); done > 0 {
159				var (
160					left  = math.MaxUint64 - binary.BigEndian.Uint64(key[:8])
161					speed = done/uint64(time.Since(pstart)/time.Millisecond+1) + 1 // +1s to avoid division by zero
162				)
163				eta = time.Duration(left/speed) * time.Millisecond
164			}
165			if time.Since(logged) > 8*time.Second {
166				log.Info("Pruning state data", "nodes", count, "size", size,
167					"elapsed", common.PrettyDuration(time.Since(pstart)), "eta", common.PrettyDuration(eta))
168				logged = time.Now()
169			}
170			// Recreate the iterator after every batch commit in order
171			// to allow the underlying compactor to delete the entries.
172			if batch.ValueSize() >= ethdb.IdealBatchSize {
173				batch.Write()
174				batch.Reset()
175
176				iter.Release()
177				iter = maindb.NewIterator(nil, key)
178			}
179		}
180	}
181	if batch.ValueSize() > 0 {
182		batch.Write()
183		batch.Reset()
184	}
185	iter.Release()
186	log.Info("Pruned state data", "nodes", count, "size", size, "elapsed", common.PrettyDuration(time.Since(pstart)))
187
188	// Pruning is done, now drop the "useless" layers from the snapshot.
189	// Firstly, flushing the target layer into the disk. After that all
190	// diff layers below the target will all be merged into the disk.
191	if err := snaptree.Cap(root, 0); err != nil {
192		return err
193	}
194	// Secondly, flushing the snapshot journal into the disk. All diff
195	// layers upon are dropped silently. Eventually the entire snapshot
196	// tree is converted into a single disk layer with the pruning target
197	// as the root.
198	if _, err := snaptree.Journal(root); err != nil {
199		return err
200	}
201	// Delete the state bloom, it marks the entire pruning procedure is
202	// finished. If any crashes or manual exit happens before this,
203	// `RecoverPruning` will pick it up in the next restarts to redo all
204	// the things.
205	os.RemoveAll(bloomPath)
206
207	// Start compactions, will remove the deleted data from the disk immediately.
208	// Note for small pruning, the compaction is skipped.
209	if count >= rangeCompactionThreshold {
210		cstart := time.Now()
211		for b := 0x00; b <= 0xf0; b += 0x10 {
212			var (
213				start = []byte{byte(b)}
214				end   = []byte{byte(b + 0x10)}
215			)
216			if b == 0xf0 {
217				end = nil
218			}
219			log.Info("Compacting database", "range", fmt.Sprintf("%#x-%#x", start, end), "elapsed", common.PrettyDuration(time.Since(cstart)))
220			if err := maindb.Compact(start, end); err != nil {
221				log.Error("Database compaction failed", "error", err)
222				return err
223			}
224		}
225		log.Info("Database compaction finished", "elapsed", common.PrettyDuration(time.Since(cstart)))
226	}
227	log.Info("State pruning successful", "pruned", size, "elapsed", common.PrettyDuration(time.Since(start)))
228	return nil
229}
230
231// Prune deletes all historical state nodes except the nodes belong to the
232// specified state version. If user doesn't specify the state version, use
233// the bottom-most snapshot diff layer as the target.
234func (p *Pruner) Prune(root common.Hash) error {
235	// If the state bloom filter is already committed previously,
236	// reuse it for pruning instead of generating a new one. It's
237	// mandatory because a part of state may already be deleted,
238	// the recovery procedure is necessary.
239	_, stateBloomRoot, err := findBloomFilter(p.datadir)
240	if err != nil {
241		return err
242	}
243	if stateBloomRoot != (common.Hash{}) {
244		return RecoverPruning(p.datadir, p.db, p.trieCachePath)
245	}
246	// If the target state root is not specified, use the HEAD-127 as the
247	// target. The reason for picking it is:
248	// - in most of the normal cases, the related state is available
249	// - the probability of this layer being reorg is very low
250	var layers []snapshot.Snapshot
251	if root == (common.Hash{}) {
252		// Retrieve all snapshot layers from the current HEAD.
253		// In theory there are 128 difflayers + 1 disk layer present,
254		// so 128 diff layers are expected to be returned.
255		layers = p.snaptree.Snapshots(p.headHeader.Root, 128, true)
256		if len(layers) != 128 {
257			// Reject if the accumulated diff layers are less than 128. It
258			// means in most of normal cases, there is no associated state
259			// with bottom-most diff layer.
260			return fmt.Errorf("snapshot not old enough yet: need %d more blocks", 128-len(layers))
261		}
262		// Use the bottom-most diff layer as the target
263		root = layers[len(layers)-1].Root()
264	}
265	// Ensure the root is really present. The weak assumption
266	// is the presence of root can indicate the presence of the
267	// entire trie.
268	if blob := rawdb.ReadTrieNode(p.db, root); len(blob) == 0 {
269		// The special case is for clique based networks(rinkeby, goerli
270		// and some other private networks), it's possible that two
271		// consecutive blocks will have same root. In this case snapshot
272		// difflayer won't be created. So HEAD-127 may not paired with
273		// head-127 layer. Instead the paired layer is higher than the
274		// bottom-most diff layer. Try to find the bottom-most snapshot
275		// layer with state available.
276		//
277		// Note HEAD and HEAD-1 is ignored. Usually there is the associated
278		// state available, but we don't want to use the topmost state
279		// as the pruning target.
280		var found bool
281		for i := len(layers) - 2; i >= 2; i-- {
282			if blob := rawdb.ReadTrieNode(p.db, layers[i].Root()); len(blob) != 0 {
283				root = layers[i].Root()
284				found = true
285				log.Info("Selecting middle-layer as the pruning target", "root", root, "depth", i)
286				break
287			}
288		}
289		if !found {
290			if len(layers) > 0 {
291				return errors.New("no snapshot paired state")
292			}
293			return fmt.Errorf("associated state[%x] is not present", root)
294		}
295	} else {
296		if len(layers) > 0 {
297			log.Info("Selecting bottom-most difflayer as the pruning target", "root", root, "height", p.headHeader.Number.Uint64()-127)
298		} else {
299			log.Info("Selecting user-specified state as the pruning target", "root", root)
300		}
301	}
302	// Before start the pruning, delete the clean trie cache first.
303	// It's necessary otherwise in the next restart we will hit the
304	// deleted state root in the "clean cache" so that the incomplete
305	// state is picked for usage.
306	deleteCleanTrieCache(p.trieCachePath)
307
308	// All the state roots of the middle layer should be forcibly pruned,
309	// otherwise the dangling state will be left.
310	middleRoots := make(map[common.Hash]struct{})
311	for _, layer := range layers {
312		if layer.Root() == root {
313			break
314		}
315		middleRoots[layer.Root()] = struct{}{}
316	}
317	// Traverse the target state, re-construct the whole state trie and
318	// commit to the given bloom filter.
319	start := time.Now()
320	if err := snapshot.GenerateTrie(p.snaptree, root, p.db, p.stateBloom); err != nil {
321		return err
322	}
323	// Traverse the genesis, put all genesis state entries into the
324	// bloom filter too.
325	if err := extractGenesis(p.db, p.stateBloom); err != nil {
326		return err
327	}
328	filterName := bloomFilterName(p.datadir, root)
329
330	log.Info("Writing state bloom to disk", "name", filterName)
331	if err := p.stateBloom.Commit(filterName, filterName+stateBloomFileTempSuffix); err != nil {
332		return err
333	}
334	log.Info("State bloom filter committed", "name", filterName)
335	return prune(p.snaptree, root, p.db, p.stateBloom, filterName, middleRoots, start)
336}
337
338// RecoverPruning will resume the pruning procedure during the system restart.
339// This function is used in this case: user tries to prune state data, but the
340// system was interrupted midway because of crash or manual-kill. In this case
341// if the bloom filter for filtering active state is already constructed, the
342// pruning can be resumed. What's more if the bloom filter is constructed, the
343// pruning **has to be resumed**. Otherwise a lot of dangling nodes may be left
344// in the disk.
345func RecoverPruning(datadir string, db ethdb.Database, trieCachePath string) error {
346	stateBloomPath, stateBloomRoot, err := findBloomFilter(datadir)
347	if err != nil {
348		return err
349	}
350	if stateBloomPath == "" {
351		return nil // nothing to recover
352	}
353	headBlock := rawdb.ReadHeadBlock(db)
354	if headBlock == nil {
355		return errors.New("Failed to load head block")
356	}
357	// Initialize the snapshot tree in recovery mode to handle this special case:
358	// - Users run the `prune-state` command multiple times
359	// - Neither these `prune-state` running is finished(e.g. interrupted manually)
360	// - The state bloom filter is already generated, a part of state is deleted,
361	//   so that resuming the pruning here is mandatory
362	// - The state HEAD is rewound already because of multiple incomplete `prune-state`
363	// In this case, even the state HEAD is not exactly matched with snapshot, it
364	// still feasible to recover the pruning correctly.
365	snaptree, err := snapshot.New(db, trie.NewDatabase(db), 256, headBlock.Root(), false, false, true)
366	if err != nil {
367		return err // The relevant snapshot(s) might not exist
368	}
369	stateBloom, err := NewStateBloomFromDisk(stateBloomPath)
370	if err != nil {
371		return err
372	}
373	log.Info("Loaded state bloom filter", "path", stateBloomPath)
374
375	// Before start the pruning, delete the clean trie cache first.
376	// It's necessary otherwise in the next restart we will hit the
377	// deleted state root in the "clean cache" so that the incomplete
378	// state is picked for usage.
379	deleteCleanTrieCache(trieCachePath)
380
381	// All the state roots of the middle layers should be forcibly pruned,
382	// otherwise the dangling state will be left.
383	var (
384		found       bool
385		layers      = snaptree.Snapshots(headBlock.Root(), 128, true)
386		middleRoots = make(map[common.Hash]struct{})
387	)
388	for _, layer := range layers {
389		if layer.Root() == stateBloomRoot {
390			found = true
391			break
392		}
393		middleRoots[layer.Root()] = struct{}{}
394	}
395	if !found {
396		log.Error("Pruning target state is not existent")
397		return errors.New("non-existent target state")
398	}
399	return prune(snaptree, stateBloomRoot, db, stateBloom, stateBloomPath, middleRoots, time.Now())
400}
401
402// extractGenesis loads the genesis state and commits all the state entries
403// into the given bloomfilter.
404func extractGenesis(db ethdb.Database, stateBloom *stateBloom) error {
405	genesisHash := rawdb.ReadCanonicalHash(db, 0)
406	if genesisHash == (common.Hash{}) {
407		return errors.New("missing genesis hash")
408	}
409	genesis := rawdb.ReadBlock(db, genesisHash, 0)
410	if genesis == nil {
411		return errors.New("missing genesis block")
412	}
413	t, err := trie.NewSecure(genesis.Root(), trie.NewDatabase(db))
414	if err != nil {
415		return err
416	}
417	accIter := t.NodeIterator(nil)
418	for accIter.Next(true) {
419		hash := accIter.Hash()
420
421		// Embedded nodes don't have hash.
422		if hash != (common.Hash{}) {
423			stateBloom.Put(hash.Bytes(), nil)
424		}
425		// If it's a leaf node, yes we are touching an account,
426		// dig into the storage trie further.
427		if accIter.Leaf() {
428			var acc types.StateAccount
429			if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil {
430				return err
431			}
432			if acc.Root != emptyRoot {
433				storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(db))
434				if err != nil {
435					return err
436				}
437				storageIter := storageTrie.NodeIterator(nil)
438				for storageIter.Next(true) {
439					hash := storageIter.Hash()
440					if hash != (common.Hash{}) {
441						stateBloom.Put(hash.Bytes(), nil)
442					}
443				}
444				if storageIter.Error() != nil {
445					return storageIter.Error()
446				}
447			}
448			if !bytes.Equal(acc.CodeHash, emptyCode) {
449				stateBloom.Put(acc.CodeHash, nil)
450			}
451		}
452	}
453	return accIter.Error()
454}
455
456func bloomFilterName(datadir string, hash common.Hash) string {
457	return filepath.Join(datadir, fmt.Sprintf("%s.%s.%s", stateBloomFilePrefix, hash.Hex(), stateBloomFileSuffix))
458}
459
460func isBloomFilter(filename string) (bool, common.Hash) {
461	filename = filepath.Base(filename)
462	if strings.HasPrefix(filename, stateBloomFilePrefix) && strings.HasSuffix(filename, stateBloomFileSuffix) {
463		return true, common.HexToHash(filename[len(stateBloomFilePrefix)+1 : len(filename)-len(stateBloomFileSuffix)-1])
464	}
465	return false, common.Hash{}
466}
467
468func findBloomFilter(datadir string) (string, common.Hash, error) {
469	var (
470		stateBloomPath string
471		stateBloomRoot common.Hash
472	)
473	if err := filepath.Walk(datadir, func(path string, info os.FileInfo, err error) error {
474		if info != nil && !info.IsDir() {
475			ok, root := isBloomFilter(path)
476			if ok {
477				stateBloomPath = path
478				stateBloomRoot = root
479			}
480		}
481		return nil
482	}); err != nil {
483		return "", common.Hash{}, err
484	}
485	return stateBloomPath, stateBloomRoot, nil
486}
487
488const warningLog = `
489
490WARNING!
491
492The clean trie cache is not found. Please delete it by yourself after the
493pruning. Remember don't start the Geth without deleting the clean trie cache
494otherwise the entire database may be damaged!
495
496Check the command description "geth snapshot prune-state --help" for more details.
497`
498
499func deleteCleanTrieCache(path string) {
500	if _, err := os.Stat(path); os.IsNotExist(err) {
501		log.Warn(warningLog)
502		return
503	}
504	os.RemoveAll(path)
505	log.Info("Deleted trie clean cache", "path", path)
506}
507