1// Copyright (c) 2017 The btcsuite developers
2// Use of this source code is governed by an ISC
3// license that can be found in the LICENSE file.
4
5package blockchain
6
7import (
8	"bytes"
9	"container/list"
10	"errors"
11	"fmt"
12	"time"
13
14	"github.com/btcsuite/btcd/chaincfg/chainhash"
15	"github.com/btcsuite/btcd/database"
16	"github.com/btcsuite/btcd/wire"
17)
18
19const (
20	// blockHdrOffset defines the offsets into a v1 block index row for the
21	// block header.
22	//
23	// The serialized block index row format is:
24	//   <blocklocation><blockheader>
25	blockHdrOffset = 12
26)
27
28// errInterruptRequested indicates that an operation was cancelled due
29// to a user-requested interrupt.
30var errInterruptRequested = errors.New("interrupt requested")
31
32// interruptRequested returns true when the provided channel has been closed.
33// This simplifies early shutdown slightly since the caller can just use an if
34// statement instead of a select.
35func interruptRequested(interrupted <-chan struct{}) bool {
36	select {
37	case <-interrupted:
38		return true
39	default:
40	}
41
42	return false
43}
44
45// blockChainContext represents a particular block's placement in the block
46// chain. This is used by the block index migration to track block metadata that
47// will be written to disk.
48type blockChainContext struct {
49	parent    *chainhash.Hash
50	children  []*chainhash.Hash
51	height    int32
52	mainChain bool
53}
54
55// migrateBlockIndex migrates all block entries from the v1 block index bucket
56// to the v2 bucket. The v1 bucket stores all block entries keyed by block hash,
57// whereas the v2 bucket stores the exact same values, but keyed instead by
58// block height + hash.
59func migrateBlockIndex(db database.DB) error {
60	// Hardcoded bucket names so updates to the global values do not affect
61	// old upgrades.
62	v1BucketName := []byte("ffldb-blockidx")
63	v2BucketName := []byte("blockheaderidx")
64
65	err := db.Update(func(dbTx database.Tx) error {
66		v1BlockIdxBucket := dbTx.Metadata().Bucket(v1BucketName)
67		if v1BlockIdxBucket == nil {
68			return fmt.Errorf("Bucket %s does not exist", v1BucketName)
69		}
70
71		log.Info("Re-indexing block information in the database. This might take a while...")
72
73		v2BlockIdxBucket, err :=
74			dbTx.Metadata().CreateBucketIfNotExists(v2BucketName)
75		if err != nil {
76			return err
77		}
78
79		// Get tip of the main chain.
80		serializedData := dbTx.Metadata().Get(chainStateKeyName)
81		state, err := deserializeBestChainState(serializedData)
82		if err != nil {
83			return err
84		}
85		tip := &state.hash
86
87		// Scan the old block index bucket and construct a mapping of each block
88		// to parent block and all child blocks.
89		blocksMap, err := readBlockTree(v1BlockIdxBucket)
90		if err != nil {
91			return err
92		}
93
94		// Use the block graph to calculate the height of each block.
95		err = determineBlockHeights(blocksMap)
96		if err != nil {
97			return err
98		}
99
100		// Find blocks on the main chain with the block graph and current tip.
101		determineMainChainBlocks(blocksMap, tip)
102
103		// Now that we have heights for all blocks, scan the old block index
104		// bucket and insert all rows into the new one.
105		return v1BlockIdxBucket.ForEach(func(hashBytes, blockRow []byte) error {
106			endOffset := blockHdrOffset + blockHdrSize
107			headerBytes := blockRow[blockHdrOffset:endOffset:endOffset]
108
109			var hash chainhash.Hash
110			copy(hash[:], hashBytes[0:chainhash.HashSize])
111			chainContext := blocksMap[hash]
112
113			if chainContext.height == -1 {
114				return fmt.Errorf("Unable to calculate chain height for "+
115					"stored block %s", hash)
116			}
117
118			// Mark blocks as valid if they are part of the main chain.
119			status := statusDataStored
120			if chainContext.mainChain {
121				status |= statusValid
122			}
123
124			// Write header to v2 bucket
125			value := make([]byte, blockHdrSize+1)
126			copy(value[0:blockHdrSize], headerBytes)
127			value[blockHdrSize] = byte(status)
128
129			key := blockIndexKey(&hash, uint32(chainContext.height))
130			err := v2BlockIdxBucket.Put(key, value)
131			if err != nil {
132				return err
133			}
134
135			// Delete header from v1 bucket
136			truncatedRow := blockRow[0:blockHdrOffset:blockHdrOffset]
137			return v1BlockIdxBucket.Put(hashBytes, truncatedRow)
138		})
139	})
140	if err != nil {
141		return err
142	}
143
144	log.Infof("Block database migration complete")
145	return nil
146}
147
148// readBlockTree reads the old block index bucket and constructs a mapping of
149// each block to its parent block and all child blocks. This mapping represents
150// the full tree of blocks. This function does not populate the height or
151// mainChain fields of the returned blockChainContext values.
152func readBlockTree(v1BlockIdxBucket database.Bucket) (map[chainhash.Hash]*blockChainContext, error) {
153	blocksMap := make(map[chainhash.Hash]*blockChainContext)
154	err := v1BlockIdxBucket.ForEach(func(_, blockRow []byte) error {
155		var header wire.BlockHeader
156		endOffset := blockHdrOffset + blockHdrSize
157		headerBytes := blockRow[blockHdrOffset:endOffset:endOffset]
158		err := header.Deserialize(bytes.NewReader(headerBytes))
159		if err != nil {
160			return err
161		}
162
163		blockHash := header.BlockHash()
164		prevHash := header.PrevBlock
165
166		if blocksMap[blockHash] == nil {
167			blocksMap[blockHash] = &blockChainContext{height: -1}
168		}
169		if blocksMap[prevHash] == nil {
170			blocksMap[prevHash] = &blockChainContext{height: -1}
171		}
172
173		blocksMap[blockHash].parent = &prevHash
174		blocksMap[prevHash].children =
175			append(blocksMap[prevHash].children, &blockHash)
176		return nil
177	})
178	return blocksMap, err
179}
180
181// determineBlockHeights takes a map of block hashes to a slice of child hashes
182// and uses it to compute the height for each block. The function assigns a
183// height of 0 to the genesis hash and explores the tree of blocks
184// breadth-first, assigning a height to every block with a path back to the
185// genesis block. This function modifies the height field on the blocksMap
186// entries.
187func determineBlockHeights(blocksMap map[chainhash.Hash]*blockChainContext) error {
188	queue := list.New()
189
190	// The genesis block is included in blocksMap as a child of the zero hash
191	// because that is the value of the PrevBlock field in the genesis header.
192	preGenesisContext, exists := blocksMap[zeroHash]
193	if !exists || len(preGenesisContext.children) == 0 {
194		return fmt.Errorf("Unable to find genesis block")
195	}
196
197	for _, genesisHash := range preGenesisContext.children {
198		blocksMap[*genesisHash].height = 0
199		queue.PushBack(genesisHash)
200	}
201
202	for e := queue.Front(); e != nil; e = queue.Front() {
203		queue.Remove(e)
204		hash := e.Value.(*chainhash.Hash)
205		height := blocksMap[*hash].height
206
207		// For each block with this one as a parent, assign it a height and
208		// push to queue for future processing.
209		for _, childHash := range blocksMap[*hash].children {
210			blocksMap[*childHash].height = height + 1
211			queue.PushBack(childHash)
212		}
213	}
214
215	return nil
216}
217
218// determineMainChainBlocks traverses the block graph down from the tip to
219// determine which block hashes that are part of the main chain. This function
220// modifies the mainChain field on the blocksMap entries.
221func determineMainChainBlocks(blocksMap map[chainhash.Hash]*blockChainContext, tip *chainhash.Hash) {
222	for nextHash := tip; *nextHash != zeroHash; nextHash = blocksMap[*nextHash].parent {
223		blocksMap[*nextHash].mainChain = true
224	}
225}
226
227// deserializeUtxoEntryV0 decodes a utxo entry from the passed serialized byte
228// slice according to the legacy version 0 format into a map of utxos keyed by
229// the output index within the transaction.  The map is necessary because the
230// previous format encoded all unspent outputs for a transaction using a single
231// entry, whereas the new format encodes each unspent output individually.
232//
233// The legacy format is as follows:
234//
235//   <version><height><header code><unspentness bitmap>[<compressed txouts>,...]
236//
237//   Field                Type     Size
238//   version              VLQ      variable
239//   block height         VLQ      variable
240//   header code          VLQ      variable
241//   unspentness bitmap   []byte   variable
242//   compressed txouts
243//     compressed amount  VLQ      variable
244//     compressed script  []byte   variable
245//
246// The serialized header code format is:
247//   bit 0 - containing transaction is a coinbase
248//   bit 1 - output zero is unspent
249//   bit 2 - output one is unspent
250//   bits 3-x - number of bytes in unspentness bitmap.  When both bits 1 and 2
251//     are unset, it encodes N-1 since there must be at least one unspent
252//     output.
253//
254// The rationale for the header code scheme is as follows:
255//   - Transactions which only pay to a single output and a change output are
256//     extremely common, thus an extra byte for the unspentness bitmap can be
257//     avoided for them by encoding those two outputs in the low order bits.
258//   - Given it is encoded as a VLQ which can encode values up to 127 with a
259//     single byte, that leaves 4 bits to represent the number of bytes in the
260//     unspentness bitmap while still only consuming a single byte for the
261//     header code.  In other words, an unspentness bitmap with up to 120
262//     transaction outputs can be encoded with a single-byte header code.
263//     This covers the vast majority of transactions.
264//   - Encoding N-1 bytes when both bits 1 and 2 are unset allows an additional
265//     8 outpoints to be encoded before causing the header code to require an
266//     additional byte.
267//
268// Example 1:
269// From tx in main blockchain:
270// Blk 1, 0e3e2357e806b6cdb1f70b54c3a3a17b6714ee1f0e68bebb44a74b1efd512098
271//
272//    010103320496b538e853519c726a2c91e61ec11600ae1390813a627c66fb8be7947be63c52
273//    <><><><------------------------------------------------------------------>
274//     | | \--------\                               |
275//     | height     |                      compressed txout 0
276//  version    header code
277//
278//  - version: 1
279//  - height: 1
280//  - header code: 0x03 (coinbase, output zero unspent, 0 bytes of unspentness)
281//  - unspentness: Nothing since it is zero bytes
282//  - compressed txout 0:
283//    - 0x32: VLQ-encoded compressed amount for 5000000000 (50 BTC)
284//    - 0x04: special script type pay-to-pubkey
285//    - 0x96...52: x-coordinate of the pubkey
286//
287// Example 2:
288// From tx in main blockchain:
289// Blk 113931, 4a16969aa4764dd7507fc1de7f0baa4850a246de90c45e59a3207f9a26b5036f
290//
291//    0185f90b0a011200e2ccd6ec7c6e2e581349c77e067385fa8236bf8a800900b8025be1b3efc63b0ad48e7f9f10e87544528d58
292//    <><----><><><------------------------------------------><-------------------------------------------->
293//     |    |  | \-------------------\            |                            |
294//  version |  \--------\       unspentness       |                    compressed txout 2
295//        height     header code          compressed txout 0
296//
297//  - version: 1
298//  - height: 113931
299//  - header code: 0x0a (output zero unspent, 1 byte in unspentness bitmap)
300//  - unspentness: [0x01] (bit 0 is set, so output 0+2 = 2 is unspent)
301//    NOTE: It's +2 since the first two outputs are encoded in the header code
302//  - compressed txout 0:
303//    - 0x12: VLQ-encoded compressed amount for 20000000 (0.2 BTC)
304//    - 0x00: special script type pay-to-pubkey-hash
305//    - 0xe2...8a: pubkey hash
306//  - compressed txout 2:
307//    - 0x8009: VLQ-encoded compressed amount for 15000000 (0.15 BTC)
308//    - 0x00: special script type pay-to-pubkey-hash
309//    - 0xb8...58: pubkey hash
310//
311// Example 3:
312// From tx in main blockchain:
313// Blk 338156, 1b02d1c8cfef60a189017b9a420c682cf4a0028175f2f563209e4ff61c8c3620
314//
315//    0193d06c100000108ba5b9e763011dd46a006572d820e448e12d2bbb38640bc718e6
316//    <><----><><----><-------------------------------------------------->
317//     |    |  |   \-----------------\            |
318//  version |  \--------\       unspentness       |
319//        height     header code          compressed txout 22
320//
321//  - version: 1
322//  - height: 338156
323//  - header code: 0x10 (2+1 = 3 bytes in unspentness bitmap)
324//    NOTE: It's +1 since neither bit 1 nor 2 are set, so N-1 is encoded.
325//  - unspentness: [0x00 0x00 0x10] (bit 20 is set, so output 20+2 = 22 is unspent)
326//    NOTE: It's +2 since the first two outputs are encoded in the header code
327//  - compressed txout 22:
328//    - 0x8ba5b9e763: VLQ-encoded compressed amount for 366875659 (3.66875659 BTC)
329//    - 0x01: special script type pay-to-script-hash
330//    - 0x1d...e6: script hash
331func deserializeUtxoEntryV0(serialized []byte) (map[uint32]*UtxoEntry, error) {
332	// Deserialize the version.
333	//
334	// NOTE: Ignore version since it is no longer used in the new format.
335	_, bytesRead := deserializeVLQ(serialized)
336	offset := bytesRead
337	if offset >= len(serialized) {
338		return nil, errDeserialize("unexpected end of data after version")
339	}
340
341	// Deserialize the block height.
342	blockHeight, bytesRead := deserializeVLQ(serialized[offset:])
343	offset += bytesRead
344	if offset >= len(serialized) {
345		return nil, errDeserialize("unexpected end of data after height")
346	}
347
348	// Deserialize the header code.
349	code, bytesRead := deserializeVLQ(serialized[offset:])
350	offset += bytesRead
351	if offset >= len(serialized) {
352		return nil, errDeserialize("unexpected end of data after header")
353	}
354
355	// Decode the header code.
356	//
357	// Bit 0 indicates whether the containing transaction is a coinbase.
358	// Bit 1 indicates output 0 is unspent.
359	// Bit 2 indicates output 1 is unspent.
360	// Bits 3-x encodes the number of non-zero unspentness bitmap bytes that
361	// follow.  When both output 0 and 1 are spent, it encodes N-1.
362	isCoinBase := code&0x01 != 0
363	output0Unspent := code&0x02 != 0
364	output1Unspent := code&0x04 != 0
365	numBitmapBytes := code >> 3
366	if !output0Unspent && !output1Unspent {
367		numBitmapBytes++
368	}
369
370	// Ensure there are enough bytes left to deserialize the unspentness
371	// bitmap.
372	if uint64(len(serialized[offset:])) < numBitmapBytes {
373		return nil, errDeserialize("unexpected end of data for " +
374			"unspentness bitmap")
375	}
376
377	// Add sparse output for unspent outputs 0 and 1 as needed based on the
378	// details provided by the header code.
379	var outputIndexes []uint32
380	if output0Unspent {
381		outputIndexes = append(outputIndexes, 0)
382	}
383	if output1Unspent {
384		outputIndexes = append(outputIndexes, 1)
385	}
386
387	// Decode the unspentness bitmap adding a sparse output for each unspent
388	// output.
389	for i := uint32(0); i < uint32(numBitmapBytes); i++ {
390		unspentBits := serialized[offset]
391		for j := uint32(0); j < 8; j++ {
392			if unspentBits&0x01 != 0 {
393				// The first 2 outputs are encoded via the
394				// header code, so adjust the output number
395				// accordingly.
396				outputNum := 2 + i*8 + j
397				outputIndexes = append(outputIndexes, outputNum)
398			}
399			unspentBits >>= 1
400		}
401		offset++
402	}
403
404	// Map to hold all of the converted outputs.
405	entries := make(map[uint32]*UtxoEntry)
406
407	// All entries will need to potentially be marked as a coinbase.
408	var packedFlags txoFlags
409	if isCoinBase {
410		packedFlags |= tfCoinBase
411	}
412
413	// Decode and add all of the utxos.
414	for i, outputIndex := range outputIndexes {
415		// Decode the next utxo.
416		amount, pkScript, bytesRead, err := decodeCompressedTxOut(
417			serialized[offset:])
418		if err != nil {
419			return nil, errDeserialize(fmt.Sprintf("unable to "+
420				"decode utxo at index %d: %v", i, err))
421		}
422		offset += bytesRead
423
424		// Create a new utxo entry with the details deserialized above.
425		entries[outputIndex] = &UtxoEntry{
426			amount:      int64(amount),
427			pkScript:    pkScript,
428			blockHeight: int32(blockHeight),
429			packedFlags: packedFlags,
430		}
431	}
432
433	return entries, nil
434}
435
436// upgradeUtxoSetToV2 migrates the utxo set entries from version 1 to 2 in
437// batches.  It is guaranteed to updated if this returns without failure.
438func upgradeUtxoSetToV2(db database.DB, interrupt <-chan struct{}) error {
439	// Hardcoded bucket names so updates to the global values do not affect
440	// old upgrades.
441	var (
442		v1BucketName = []byte("utxoset")
443		v2BucketName = []byte("utxosetv2")
444	)
445
446	log.Infof("Upgrading utxo set to v2.  This will take a while...")
447	start := time.Now()
448
449	// Create the new utxo set bucket as needed.
450	err := db.Update(func(dbTx database.Tx) error {
451		_, err := dbTx.Metadata().CreateBucketIfNotExists(v2BucketName)
452		return err
453	})
454	if err != nil {
455		return err
456	}
457
458	// doBatch contains the primary logic for upgrading the utxo set from
459	// version 1 to 2 in batches.  This is done because the utxo set can be
460	// huge and thus attempting to migrate in a single database transaction
461	// would result in massive memory usage and could potentially crash on
462	// many systems due to ulimits.
463	//
464	// It returns the number of utxos processed.
465	const maxUtxos = 200000
466	doBatch := func(dbTx database.Tx) (uint32, error) {
467		v1Bucket := dbTx.Metadata().Bucket(v1BucketName)
468		v2Bucket := dbTx.Metadata().Bucket(v2BucketName)
469		v1Cursor := v1Bucket.Cursor()
470
471		// Migrate utxos so long as the max number of utxos for this
472		// batch has not been exceeded.
473		var numUtxos uint32
474		for ok := v1Cursor.First(); ok && numUtxos < maxUtxos; ok =
475			v1Cursor.Next() {
476
477			// Old key was the transaction hash.
478			oldKey := v1Cursor.Key()
479			var txHash chainhash.Hash
480			copy(txHash[:], oldKey)
481
482			// Deserialize the old entry which included all utxos
483			// for the given transaction.
484			utxos, err := deserializeUtxoEntryV0(v1Cursor.Value())
485			if err != nil {
486				return 0, err
487			}
488
489			// Add an entry for each utxo into the new bucket using
490			// the new format.
491			for txOutIdx, utxo := range utxos {
492				reserialized, err := serializeUtxoEntry(utxo)
493				if err != nil {
494					return 0, err
495				}
496
497				key := outpointKey(wire.OutPoint{
498					Hash:  txHash,
499					Index: txOutIdx,
500				})
501				err = v2Bucket.Put(*key, reserialized)
502				// NOTE: The key is intentionally not recycled
503				// here since the database interface contract
504				// prohibits modifications.  It will be garbage
505				// collected normally when the database is done
506				// with it.
507				if err != nil {
508					return 0, err
509				}
510			}
511
512			// Remove old entry.
513			err = v1Bucket.Delete(oldKey)
514			if err != nil {
515				return 0, err
516			}
517
518			numUtxos += uint32(len(utxos))
519
520			if interruptRequested(interrupt) {
521				// No error here so the database transaction
522				// is not cancelled and therefore outstanding
523				// work is written to disk.
524				break
525			}
526		}
527
528		return numUtxos, nil
529	}
530
531	// Migrate all entries in batches for the reasons mentioned above.
532	var totalUtxos uint64
533	for {
534		var numUtxos uint32
535		err := db.Update(func(dbTx database.Tx) error {
536			var err error
537			numUtxos, err = doBatch(dbTx)
538			return err
539		})
540		if err != nil {
541			return err
542		}
543
544		if interruptRequested(interrupt) {
545			return errInterruptRequested
546		}
547
548		if numUtxos == 0 {
549			break
550		}
551
552		totalUtxos += uint64(numUtxos)
553		log.Infof("Migrated %d utxos (%d total)", numUtxos, totalUtxos)
554	}
555
556	// Remove the old bucket and update the utxo set version once it has
557	// been fully migrated.
558	err = db.Update(func(dbTx database.Tx) error {
559		err := dbTx.Metadata().DeleteBucket(v1BucketName)
560		if err != nil {
561			return err
562		}
563
564		return dbPutVersion(dbTx, utxoSetVersionKeyName, 2)
565	})
566	if err != nil {
567		return err
568	}
569
570	seconds := int64(time.Since(start) / time.Second)
571	log.Infof("Done upgrading utxo set.  Total utxos: %d in %d seconds",
572		totalUtxos, seconds)
573	return nil
574}
575
576// maybeUpgradeDbBuckets checks the database version of the buckets used by this
577// package and performs any needed upgrades to bring them to the latest version.
578//
579// All buckets used by this package are guaranteed to be the latest version if
580// this function returns without error.
581func (b *BlockChain) maybeUpgradeDbBuckets(interrupt <-chan struct{}) error {
582	// Load or create bucket versions as needed.
583	var utxoSetVersion uint32
584	err := b.db.Update(func(dbTx database.Tx) error {
585		// Load the utxo set version from the database or create it and
586		// initialize it to version 1 if it doesn't exist.
587		var err error
588		utxoSetVersion, err = dbFetchOrCreateVersion(dbTx,
589			utxoSetVersionKeyName, 1)
590		return err
591	})
592	if err != nil {
593		return err
594	}
595
596	// Update the utxo set to v2 if needed.
597	if utxoSetVersion < 2 {
598		if err := upgradeUtxoSetToV2(b.db, interrupt); err != nil {
599			return err
600		}
601	}
602
603	return nil
604}
605