1// Copyright 2019 The go-ethereum Authors
2// This file is part of the go-ethereum library.
3//
4// The go-ethereum library is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Lesser General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// The go-ethereum library is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Lesser General Public License for more details.
13//
14// You should have received a copy of the GNU Lesser General Public License
15// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16
17package trie
18
19import (
20	"errors"
21	"fmt"
22	"sync"
23
24	"github.com/ethereum/go-ethereum/common"
25	"github.com/ethereum/go-ethereum/crypto"
26	"golang.org/x/crypto/sha3"
27)
28
29// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
30// some parallelism but not incur too much memory overhead.
31const leafChanSize = 200
32
33// leaf represents a trie leaf value
34type leaf struct {
35	size int         // size of the rlp data (estimate)
36	hash common.Hash // hash of rlp data
37	node node        // the node to commit
38}
39
40// committer is a type used for the trie Commit operation. A committer has some
41// internal preallocated temp space, and also a callback that is invoked when
42// leaves are committed. The leafs are passed through the `leafCh`,  to allow
43// some level of parallelism.
44// By 'some level' of parallelism, it's still the case that all leaves will be
45// processed sequentially - onleaf will never be called in parallel or out of order.
46type committer struct {
47	tmp sliceBuffer
48	sha crypto.KeccakState
49
50	onleaf LeafCallback
51	leafCh chan *leaf
52}
53
54// committers live in a global sync.Pool
55var committerPool = sync.Pool{
56	New: func() interface{} {
57		return &committer{
58			tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
59			sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
60		}
61	},
62}
63
64// newCommitter creates a new committer or picks one from the pool.
65func newCommitter() *committer {
66	return committerPool.Get().(*committer)
67}
68
69func returnCommitterToPool(h *committer) {
70	h.onleaf = nil
71	h.leafCh = nil
72	committerPool.Put(h)
73}
74
75// Commit collapses a node down into a hash node and inserts it into the database
76func (c *committer) Commit(n node, db *Database) (hashNode, int, error) {
77	if db == nil {
78		return nil, 0, errors.New("no db provided")
79	}
80	h, committed, err := c.commit(n, db)
81	if err != nil {
82		return nil, 0, err
83	}
84	return h.(hashNode), committed, nil
85}
86
87// commit collapses a node down into a hash node and inserts it into the database
88func (c *committer) commit(n node, db *Database) (node, int, error) {
89	// if this path is clean, use available cached data
90	hash, dirty := n.cache()
91	if hash != nil && !dirty {
92		return hash, 0, nil
93	}
94	// Commit children, then parent, and remove remove the dirty flag.
95	switch cn := n.(type) {
96	case *shortNode:
97		// Commit child
98		collapsed := cn.copy()
99
100		// If the child is fullNode, recursively commit,
101		// otherwise it can only be hashNode or valueNode.
102		var childCommitted int
103		if _, ok := cn.Val.(*fullNode); ok {
104			childV, committed, err := c.commit(cn.Val, db)
105			if err != nil {
106				return nil, 0, err
107			}
108			collapsed.Val, childCommitted = childV, committed
109		}
110		// The key needs to be copied, since we're delivering it to database
111		collapsed.Key = hexToCompact(cn.Key)
112		hashedNode := c.store(collapsed, db)
113		if hn, ok := hashedNode.(hashNode); ok {
114			return hn, childCommitted + 1, nil
115		}
116		return collapsed, childCommitted, nil
117	case *fullNode:
118		hashedKids, childCommitted, err := c.commitChildren(cn, db)
119		if err != nil {
120			return nil, 0, err
121		}
122		collapsed := cn.copy()
123		collapsed.Children = hashedKids
124
125		hashedNode := c.store(collapsed, db)
126		if hn, ok := hashedNode.(hashNode); ok {
127			return hn, childCommitted + 1, nil
128		}
129		return collapsed, childCommitted, nil
130	case hashNode:
131		return cn, 0, nil
132	default:
133		// nil, valuenode shouldn't be committed
134		panic(fmt.Sprintf("%T: invalid node: %v", n, n))
135	}
136}
137
138// commitChildren commits the children of the given fullnode
139func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, int, error) {
140	var (
141		committed int
142		children  [17]node
143	)
144	for i := 0; i < 16; i++ {
145		child := n.Children[i]
146		if child == nil {
147			continue
148		}
149		// If it's the hashed child, save the hash value directly.
150		// Note: it's impossible that the child in range [0, 15]
151		// is a valueNode.
152		if hn, ok := child.(hashNode); ok {
153			children[i] = hn
154			continue
155		}
156		// Commit the child recursively and store the "hashed" value.
157		// Note the returned node can be some embedded nodes, so it's
158		// possible the type is not hashNode.
159		hashed, childCommitted, err := c.commit(child, db)
160		if err != nil {
161			return children, 0, err
162		}
163		children[i] = hashed
164		committed += childCommitted
165	}
166	// For the 17th child, it's possible the type is valuenode.
167	if n.Children[16] != nil {
168		children[16] = n.Children[16]
169	}
170	return children, committed, nil
171}
172
173// store hashes the node n and if we have a storage layer specified, it writes
174// the key/value pair to it and tracks any node->child references as well as any
175// node->external trie references.
176func (c *committer) store(n node, db *Database) node {
177	// Larger nodes are replaced by their hash and stored in the database.
178	var (
179		hash, _ = n.cache()
180		size    int
181	)
182	if hash == nil {
183		// This was not generated - must be a small node stored in the parent.
184		// In theory, we should apply the leafCall here if it's not nil(embedded
185		// node usually contains value). But small value(less than 32bytes) is
186		// not our target.
187		return n
188	} else {
189		// We have the hash already, estimate the RLP encoding-size of the node.
190		// The size is used for mem tracking, does not need to be exact
191		size = estimateSize(n)
192	}
193	// If we're using channel-based leaf-reporting, send to channel.
194	// The leaf channel will be active only when there an active leaf-callback
195	if c.leafCh != nil {
196		c.leafCh <- &leaf{
197			size: size,
198			hash: common.BytesToHash(hash),
199			node: n,
200		}
201	} else if db != nil {
202		// No leaf-callback used, but there's still a database. Do serial
203		// insertion
204		db.lock.Lock()
205		db.insert(common.BytesToHash(hash), size, n)
206		db.lock.Unlock()
207	}
208	return hash
209}
210
211// commitLoop does the actual insert + leaf callback for nodes.
212func (c *committer) commitLoop(db *Database) {
213	for item := range c.leafCh {
214		var (
215			hash = item.hash
216			size = item.size
217			n    = item.node
218		)
219		// We are pooling the trie nodes into an intermediate memory cache
220		db.lock.Lock()
221		db.insert(hash, size, n)
222		db.lock.Unlock()
223
224		if c.onleaf != nil {
225			switch n := n.(type) {
226			case *shortNode:
227				if child, ok := n.Val.(valueNode); ok {
228					c.onleaf(nil, nil, child, hash)
229				}
230			case *fullNode:
231				// For children in range [0, 15], it's impossible
232				// to contain valueNode. Only check the 17th child.
233				if n.Children[16] != nil {
234					c.onleaf(nil, nil, n.Children[16].(valueNode), hash)
235				}
236			}
237		}
238	}
239}
240
241func (c *committer) makeHashNode(data []byte) hashNode {
242	n := make(hashNode, c.sha.Size())
243	c.sha.Reset()
244	c.sha.Write(data)
245	c.sha.Read(n)
246	return n
247}
248
249// estimateSize estimates the size of an rlp-encoded node, without actually
250// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
251// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
252// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
253func estimateSize(n node) int {
254	switch n := n.(type) {
255	case *shortNode:
256		// A short node contains a compacted key, and a value.
257		return 3 + len(n.Key) + estimateSize(n.Val)
258	case *fullNode:
259		// A full node contains up to 16 hashes (some nils), and a key
260		s := 3
261		for i := 0; i < 16; i++ {
262			if child := n.Children[i]; child != nil {
263				s += estimateSize(child)
264			} else {
265				s++
266			}
267		}
268		return s
269	case valueNode:
270		return 1 + len(n)
271	case hashNode:
272		return 1 + len(n)
273	default:
274		panic(fmt.Sprintf("node type %T", n))
275	}
276}
277