1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package tlog
6
7import (
8	"fmt"
9	"strconv"
10	"strings"
11)
12
13// A Tile is a description of a transparency log tile.
14// A tile of height H at level L offset N lists W consecutive hashes
15// at level H*L of the tree starting at offset N*(2**H).
16// A complete tile lists 2**H hashes; a partial tile lists fewer.
17// Note that a tile represents the entire subtree of height H
18// with those hashes as the leaves. The levels above H*L
19// can be reconstructed by hashing the leaves.
20//
21// Each Tile can be encoded as a “tile coordinate path”
22// of the form tile/H/L/NNN[.p/W].
23// The .p/W suffix is present only for partial tiles, meaning W < 2**H.
24// The NNN element is an encoding of N into 3-digit path elements.
25// All but the last path element begins with an "x".
26// For example,
27// Tile{H: 3, L: 4, N: 1234067, W: 1}'s path
28// is tile/3/4/x001/x234/067.p/1, and
29// Tile{H: 3, L: 4, N: 1234067, W: 8}'s path
30// is tile/3/4/x001/x234/067.
31// See Tile's Path method and the ParseTilePath function.
32//
33// The special level L=-1 holds raw record data instead of hashes.
34// In this case, the level encodes into a tile path as the path element
35// "data" instead of "-1".
36//
37// See also https://golang.org/design/25530-sumdb#checksum-database
38// and https://research.swtch.com/tlog#tiling_a_log.
39type Tile struct {
40	H int   // height of tile (1 ≤ H ≤ 30)
41	L int   // level in tiling (-1 ≤ L ≤ 63)
42	N int64 // number within level (0 ≤ N, unbounded)
43	W int   // width of tile (1 ≤ W ≤ 2**H; 2**H is complete tile)
44}
45
46// TileForIndex returns the tile of fixed height h ≥ 1
47// and least width storing the given hash storage index.
48//
49// If h ≤ 0, TileForIndex panics.
50func TileForIndex(h int, index int64) Tile {
51	if h <= 0 {
52		panic(fmt.Sprintf("TileForIndex: invalid height %d", h))
53	}
54	t, _, _ := tileForIndex(h, index)
55	return t
56}
57
58// tileForIndex returns the tile of height h ≥ 1
59// storing the given hash index, which can be
60// reconstructed using tileHash(data[start:end]).
61func tileForIndex(h int, index int64) (t Tile, start, end int) {
62	level, n := SplitStoredHashIndex(index)
63	t.H = h
64	t.L = level / h
65	level -= t.L * h // now level within tile
66	t.N = n << uint(level) >> uint(t.H)
67	n -= t.N << uint(t.H) >> uint(level) // now n within tile at level
68	t.W = int((n + 1) << uint(level))
69	return t, int(n<<uint(level)) * HashSize, int((n+1)<<uint(level)) * HashSize
70}
71
72// HashFromTile returns the hash at the given storage index,
73// provided that t == TileForIndex(t.H, index) or a wider version,
74// and data is t's tile data (of length at least t.W*HashSize).
75func HashFromTile(t Tile, data []byte, index int64) (Hash, error) {
76	if t.H < 1 || t.H > 30 || t.L < 0 || t.L >= 64 || t.W < 1 || t.W > 1<<uint(t.H) {
77		return Hash{}, fmt.Errorf("invalid tile %v", t.Path())
78	}
79	if len(data) < t.W*HashSize {
80		return Hash{}, fmt.Errorf("data len %d too short for tile %v", len(data), t.Path())
81	}
82	t1, start, end := tileForIndex(t.H, index)
83	if t.L != t1.L || t.N != t1.N || t.W < t1.W {
84		return Hash{}, fmt.Errorf("index %v is in %v not %v", index, t1.Path(), t.Path())
85	}
86	return tileHash(data[start:end]), nil
87}
88
89// tileHash computes the subtree hash corresponding to the (2^K)-1 hashes in data.
90func tileHash(data []byte) Hash {
91	if len(data) == 0 {
92		panic("bad math in tileHash")
93	}
94	if len(data) == HashSize {
95		var h Hash
96		copy(h[:], data)
97		return h
98	}
99	n := len(data) / 2
100	return NodeHash(tileHash(data[:n]), tileHash(data[n:]))
101}
102
103// NewTiles returns the coordinates of the tiles of height h ≥ 1
104// that must be published when publishing from a tree of
105// size newTreeSize to replace a tree of size oldTreeSize.
106// (No tiles need to be published for a tree of size zero.)
107//
108// If h ≤ 0, TileForIndex panics.
109func NewTiles(h int, oldTreeSize, newTreeSize int64) []Tile {
110	if h <= 0 {
111		panic(fmt.Sprintf("NewTiles: invalid height %d", h))
112	}
113	H := uint(h)
114	var tiles []Tile
115	for level := uint(0); newTreeSize>>(H*level) > 0; level++ {
116		oldN := oldTreeSize >> (H * level)
117		newN := newTreeSize >> (H * level)
118		for n := oldN >> H; n < newN>>H; n++ {
119			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: 1 << H})
120		}
121		n := newN >> H
122		maxW := int(newN - n<<H)
123		minW := 1
124		if oldN > n<<H {
125			minW = int(oldN - n<<H)
126		}
127		for w := minW; w <= maxW; w++ {
128			tiles = append(tiles, Tile{H: h, L: int(level), N: n, W: w})
129		}
130	}
131	return tiles
132}
133
134// ReadTileData reads the hashes for tile t from r
135// and returns the corresponding tile data.
136func ReadTileData(t Tile, r HashReader) ([]byte, error) {
137	size := t.W
138	if size == 0 {
139		size = 1 << uint(t.H)
140	}
141	start := t.N << uint(t.H)
142	indexes := make([]int64, size)
143	for i := 0; i < size; i++ {
144		indexes[i] = StoredHashIndex(t.H*t.L, start+int64(i))
145	}
146
147	hashes, err := r.ReadHashes(indexes)
148	if err != nil {
149		return nil, err
150	}
151	if len(hashes) != len(indexes) {
152		return nil, fmt.Errorf("tlog: ReadHashes(%d indexes) = %d hashes", len(indexes), len(hashes))
153	}
154
155	tile := make([]byte, size*HashSize)
156	for i := 0; i < size; i++ {
157		copy(tile[i*HashSize:], hashes[i][:])
158	}
159	return tile, nil
160}
161
162// To limit the size of any particular directory listing,
163// we encode the (possibly very large) number N
164// by encoding three digits at a time.
165// For example, 123456789 encodes as x123/x456/789.
166// Each directory has at most 1000 each xNNN, NNN, and NNN.p children,
167// so there are at most 3000 entries in any one directory.
168const pathBase = 1000
169
170// Path returns a tile coordinate path describing t.
171func (t Tile) Path() string {
172	n := t.N
173	nStr := fmt.Sprintf("%03d", n%pathBase)
174	for n >= pathBase {
175		n /= pathBase
176		nStr = fmt.Sprintf("x%03d/%s", n%pathBase, nStr)
177	}
178	pStr := ""
179	if t.W != 1<<uint(t.H) {
180		pStr = fmt.Sprintf(".p/%d", t.W)
181	}
182	var L string
183	if t.L == -1 {
184		L = "data"
185	} else {
186		L = fmt.Sprintf("%d", t.L)
187	}
188	return fmt.Sprintf("tile/%d/%s/%s%s", t.H, L, nStr, pStr)
189}
190
191// ParseTilePath parses a tile coordinate path.
192func ParseTilePath(path string) (Tile, error) {
193	f := strings.Split(path, "/")
194	if len(f) < 4 || f[0] != "tile" {
195		return Tile{}, &badPathError{path}
196	}
197	h, err1 := strconv.Atoi(f[1])
198	isData := false
199	if f[2] == "data" {
200		isData = true
201		f[2] = "0"
202	}
203	l, err2 := strconv.Atoi(f[2])
204	if err1 != nil || err2 != nil || h < 1 || l < 0 || h > 30 {
205		return Tile{}, &badPathError{path}
206	}
207	w := 1 << uint(h)
208	if dotP := f[len(f)-2]; strings.HasSuffix(dotP, ".p") {
209		ww, err := strconv.Atoi(f[len(f)-1])
210		if err != nil || ww <= 0 || ww >= w {
211			return Tile{}, &badPathError{path}
212		}
213		w = ww
214		f[len(f)-2] = dotP[:len(dotP)-len(".p")]
215		f = f[:len(f)-1]
216	}
217	f = f[3:]
218	n := int64(0)
219	for _, s := range f {
220		nn, err := strconv.Atoi(strings.TrimPrefix(s, "x"))
221		if err != nil || nn < 0 || nn >= pathBase {
222			return Tile{}, &badPathError{path}
223		}
224		n = n*pathBase + int64(nn)
225	}
226	if isData {
227		l = -1
228	}
229	t := Tile{H: h, L: l, N: n, W: w}
230	if path != t.Path() {
231		return Tile{}, &badPathError{path}
232	}
233	return t, nil
234}
235
236type badPathError struct {
237	path string
238}
239
240func (e *badPathError) Error() string {
241	return fmt.Sprintf("malformed tile path %q", e.path)
242}
243
244// A TileReader reads tiles from a go.sum database log.
245type TileReader interface {
246	// Height returns the height of the available tiles.
247	Height() int
248
249	// ReadTiles returns the data for each requested tile.
250	// If ReadTiles returns err == nil, it must also return
251	// a data record for each tile (len(data) == len(tiles))
252	// and each data record must be the correct length
253	// (len(data[i]) == tiles[i].W*HashSize).
254	//
255	// An implementation of ReadTiles typically reads
256	// them from an on-disk cache or else from a remote
257	// tile server. Tile data downloaded from a server should
258	// be considered suspect and not saved into a persistent
259	// on-disk cache before returning from ReadTiles.
260	// When the client confirms the validity of the tile data,
261	// it will call SaveTiles to signal that they can be safely
262	// written to persistent storage.
263	// See also https://research.swtch.com/tlog#authenticating_tiles.
264	ReadTiles(tiles []Tile) (data [][]byte, err error)
265
266	// SaveTiles informs the TileReader that the tile data
267	// returned by ReadTiles has been confirmed as valid
268	// and can be saved in persistent storage (on disk).
269	SaveTiles(tiles []Tile, data [][]byte)
270}
271
272// TileHashReader returns a HashReader that satisfies requests
273// by loading tiles of the given tree.
274//
275// The returned HashReader checks that loaded tiles are
276// valid for the given tree. Therefore, any hashes returned
277// by the HashReader are already proven to be in the tree.
278func TileHashReader(tree Tree, tr TileReader) HashReader {
279	return &tileHashReader{tree: tree, tr: tr}
280}
281
282type tileHashReader struct {
283	tree Tree
284	tr   TileReader
285}
286
287// tileParent returns t's k'th tile parent in the tiles for a tree of size n.
288// If there is no such parent, tileParent returns Tile{}.
289func tileParent(t Tile, k int, n int64) Tile {
290	t.L += k
291	t.N >>= uint(k * t.H)
292	t.W = 1 << uint(t.H)
293	if max := n >> uint(t.L*t.H); t.N<<uint(t.H)+int64(t.W) >= max {
294		if t.N<<uint(t.H) >= max {
295			return Tile{}
296		}
297		t.W = int(max - t.N<<uint(t.H))
298	}
299	return t
300}
301
302func (r *tileHashReader) ReadHashes(indexes []int64) ([]Hash, error) {
303	h := r.tr.Height()
304
305	tileOrder := make(map[Tile]int) // tileOrder[tileKey(tiles[i])] = i
306	var tiles []Tile
307
308	// Plan to fetch tiles necessary to recompute tree hash.
309	// If it matches, those tiles are authenticated.
310	stx := subTreeIndex(0, r.tree.N, nil)
311	stxTileOrder := make([]int, len(stx))
312	for i, x := range stx {
313		tile, _, _ := tileForIndex(h, x)
314		tile = tileParent(tile, 0, r.tree.N)
315		if j, ok := tileOrder[tile]; ok {
316			stxTileOrder[i] = j
317			continue
318		}
319		stxTileOrder[i] = len(tiles)
320		tileOrder[tile] = len(tiles)
321		tiles = append(tiles, tile)
322	}
323
324	// Plan to fetch tiles containing the indexes,
325	// along with any parent tiles needed
326	// for authentication. For most calls,
327	// the parents are being fetched anyway.
328	indexTileOrder := make([]int, len(indexes))
329	for i, x := range indexes {
330		if x >= StoredHashIndex(0, r.tree.N) {
331			return nil, fmt.Errorf("indexes not in tree")
332		}
333
334		tile, _, _ := tileForIndex(h, x)
335
336		// Walk up parent tiles until we find one we've requested.
337		// That one will be authenticated.
338		k := 0
339		for ; ; k++ {
340			p := tileParent(tile, k, r.tree.N)
341			if j, ok := tileOrder[p]; ok {
342				if k == 0 {
343					indexTileOrder[i] = j
344				}
345				break
346			}
347		}
348
349		// Walk back down recording child tiles after parents.
350		// This loop ends by revisiting the tile for this index
351		// (tileParent(tile, 0, r.tree.N)) unless k == 0, in which
352		// case the previous loop did it.
353		for k--; k >= 0; k-- {
354			p := tileParent(tile, k, r.tree.N)
355			if p.W != 1<<uint(p.H) {
356				// Only full tiles have parents.
357				// This tile has a parent, so it must be full.
358				return nil, fmt.Errorf("bad math in tileHashReader: %d %d %v", r.tree.N, x, p)
359			}
360			tileOrder[p] = len(tiles)
361			if k == 0 {
362				indexTileOrder[i] = len(tiles)
363			}
364			tiles = append(tiles, p)
365		}
366	}
367
368	// Fetch all the tile data.
369	data, err := r.tr.ReadTiles(tiles)
370	if err != nil {
371		return nil, err
372	}
373	if len(data) != len(tiles) {
374		return nil, fmt.Errorf("TileReader returned bad result slice (len=%d, want %d)", len(data), len(tiles))
375	}
376	for i, tile := range tiles {
377		if len(data[i]) != tile.W*HashSize {
378			return nil, fmt.Errorf("TileReader returned bad result slice (%v len=%d, want %d)", tile.Path(), len(data[i]), tile.W*HashSize)
379		}
380	}
381
382	// Authenticate the initial tiles against the tree hash.
383	// They are arranged so that parents are authenticated before children.
384	// First the tiles needed for the tree hash.
385	th, err := HashFromTile(tiles[stxTileOrder[len(stx)-1]], data[stxTileOrder[len(stx)-1]], stx[len(stx)-1])
386	if err != nil {
387		return nil, err
388	}
389	for i := len(stx) - 2; i >= 0; i-- {
390		h, err := HashFromTile(tiles[stxTileOrder[i]], data[stxTileOrder[i]], stx[i])
391		if err != nil {
392			return nil, err
393		}
394		th = NodeHash(h, th)
395	}
396	if th != r.tree.Hash {
397		// The tiles do not support the tree hash.
398		// We know at least one is wrong, but not which one.
399		return nil, fmt.Errorf("downloaded inconsistent tile")
400	}
401
402	// Authenticate full tiles against their parents.
403	for i := len(stx); i < len(tiles); i++ {
404		tile := tiles[i]
405		p := tileParent(tile, 1, r.tree.N)
406		j, ok := tileOrder[p]
407		if !ok {
408			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost parent of %v", r.tree.N, indexes, tile)
409		}
410		h, err := HashFromTile(p, data[j], StoredHashIndex(p.L*p.H, tile.N))
411		if err != nil {
412			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash of %v: %v", r.tree.N, indexes, tile, err)
413		}
414		if h != tileHash(data[i]) {
415			return nil, fmt.Errorf("downloaded inconsistent tile")
416		}
417	}
418
419	// Now we have all the tiles needed for the requested hashes,
420	// and we've authenticated the full tile set against the trusted tree hash.
421	r.tr.SaveTiles(tiles, data)
422
423	// Pull out the requested hashes.
424	hashes := make([]Hash, len(indexes))
425	for i, x := range indexes {
426		j := indexTileOrder[i]
427		h, err := HashFromTile(tiles[j], data[j], x)
428		if err != nil {
429			return nil, fmt.Errorf("bad math in tileHashReader %d %v: lost hash %v: %v", r.tree.N, indexes, x, err)
430		}
431		hashes[i] = h
432	}
433
434	return hashes, nil
435}
436