1/*
2 * tuple.go
3 *
4 * This source file is part of the FoundationDB open source project
5 *
6 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *     http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21// FoundationDB Go Tuple Layer
22
23// Package tuple provides a layer for encoding and decoding multi-element tuples
24// into keys usable by FoundationDB. The encoded key maintains the same sort
25// order as the original tuple: sorted first by the first element, then by the
26// second element, etc. This makes the tuple layer ideal for building a variety
27// of higher-level data models.
28//
29// For general guidance on tuple usage, see the Tuple section of Data Modeling
30// (https://apple.github.io/foundationdb/data-modeling.html#tuples).
31//
32// FoundationDB tuples can currently encode byte and unicode strings, integers,
33// large integers, floats, doubles, booleans, UUIDs, tuples, and NULL values.
34// In Go these are represented as []byte (or fdb.KeyConvertible), string, int64
35// (or int, uint, uint64), *big.Int (or big.Int), float32, float64, bool,
36// UUID, Tuple, and nil.
37package tuple
38
39import (
40	"bytes"
41	"encoding/binary"
42	"errors"
43	"fmt"
44	"math"
45	"math/big"
46
47	"github.com/apple/foundationdb/bindings/go/src/fdb"
48)
49
50// A TupleElement is one of the types that may be encoded in FoundationDB
51// tuples. Although the Go compiler cannot enforce this, it is a programming
52// error to use an unsupported types as a TupleElement (and will typically
53// result in a runtime panic).
54//
55// The valid types for TupleElement are []byte (or fdb.KeyConvertible), string,
56// int64 (or int, uint, uint64), *big.Int (or big.Int), float, double, bool,
57// UUID, Tuple, and nil.
58type TupleElement interface{}
59
60// Tuple is a slice of objects that can be encoded as FoundationDB tuples. If
61// any of the TupleElements are of unsupported types, a runtime panic will occur
62// when the Tuple is packed.
63//
64// Given a Tuple T containing objects only of these types, then T will be
65// identical to the Tuple returned by unpacking the byte slice obtained by
66// packing T (modulo type normalization to []byte, uint64, and int64).
67type Tuple []TupleElement
68
69// UUID wraps a basic byte array as a UUID. We do not provide any special
70// methods for accessing or generating the UUID, but as Go does not provide
71// a built-in UUID type, this simple wrapper allows for other libraries
72// to write the output of their UUID type as a 16-byte array into
73// an instance of this type.
74type UUID [16]byte
75
76// Versionstamp is struct for a FoundationDB verionstamp. Versionstamps are
77// 12 bytes long composed of a 10 byte transaction version and a 2 byte user
78// version. The transaction version is filled in at commit time and the user
79// version is provided by the application to order results within a transaction.
80type Versionstamp struct {
81	TransactionVersion [10]byte
82	UserVersion        uint16
83}
84
85var incompleteTransactionVersion = [10]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}
86
87const versionstampLength = 12
88
89// IncompleteVersionstamp is the constructor you should use to make
90// an incomplete versionstamp to use in a tuple.
91func IncompleteVersionstamp(userVersion uint16) Versionstamp {
92	return Versionstamp{
93		TransactionVersion: incompleteTransactionVersion,
94		UserVersion:        userVersion,
95	}
96}
97
98// Bytes converts a Versionstamp struct to a byte slice for encoding in a tuple.
99func (v Versionstamp) Bytes() []byte {
100	var scratch [versionstampLength]byte
101
102	copy(scratch[:], v.TransactionVersion[:])
103
104	binary.BigEndian.PutUint16(scratch[10:], v.UserVersion)
105
106	return scratch[:]
107}
108
109// Type codes: These prefix the different elements in a packed Tuple
110// to indicate what type they are.
111const nilCode = 0x00
112const bytesCode = 0x01
113const stringCode = 0x02
114const nestedCode = 0x05
115const intZeroCode = 0x14
116const posIntEnd = 0x1d
117const negIntStart = 0x0b
118const floatCode = 0x20
119const doubleCode = 0x21
120const falseCode = 0x26
121const trueCode = 0x27
122const uuidCode = 0x30
123const versionstampCode = 0x33
124
125var sizeLimits = []uint64{
126	1<<(0*8) - 1,
127	1<<(1*8) - 1,
128	1<<(2*8) - 1,
129	1<<(3*8) - 1,
130	1<<(4*8) - 1,
131	1<<(5*8) - 1,
132	1<<(6*8) - 1,
133	1<<(7*8) - 1,
134	1<<(8*8) - 1,
135}
136
137var minInt64BigInt = big.NewInt(math.MinInt64)
138
139func bisectLeft(u uint64) int {
140	var n int
141	for sizeLimits[n] < u {
142		n++
143	}
144	return n
145}
146
147func adjustFloatBytes(b []byte, encode bool) {
148	if (encode && b[0]&0x80 != 0x00) || (!encode && b[0]&0x80 == 0x00) {
149		// Negative numbers: flip all of the bytes.
150		for i := 0; i < len(b); i++ {
151			b[i] = b[i] ^ 0xff
152		}
153	} else {
154		// Positive number: flip just the sign bit.
155		b[0] = b[0] ^ 0x80
156	}
157}
158
159type packer struct {
160	versionstampPos int32
161	buf             []byte
162}
163
164func newPacker() *packer {
165	return &packer{
166		versionstampPos: -1,
167		buf:             make([]byte, 0, 64),
168	}
169}
170
171func (p *packer) putByte(b byte) {
172	p.buf = append(p.buf, b)
173}
174
175func (p *packer) putBytes(b []byte) {
176	p.buf = append(p.buf, b...)
177}
178
179func (p *packer) putBytesNil(b []byte, i int) {
180	for i >= 0 {
181		p.putBytes(b[:i+1])
182		p.putByte(0xFF)
183		b = b[i+1:]
184		i = bytes.IndexByte(b, 0x00)
185	}
186	p.putBytes(b)
187}
188
189func (p *packer) encodeBytes(code byte, b []byte) {
190	p.putByte(code)
191	if i := bytes.IndexByte(b, 0x00); i >= 0 {
192		p.putBytesNil(b, i)
193	} else {
194		p.putBytes(b)
195	}
196	p.putByte(0x00)
197}
198
199func (p *packer) encodeUint(i uint64) {
200	if i == 0 {
201		p.putByte(intZeroCode)
202		return
203	}
204
205	n := bisectLeft(i)
206	var scratch [8]byte
207
208	p.putByte(byte(intZeroCode + n))
209	binary.BigEndian.PutUint64(scratch[:], i)
210
211	p.putBytes(scratch[8-n:])
212}
213
214func (p *packer) encodeInt(i int64) {
215	if i >= 0 {
216		p.encodeUint(uint64(i))
217		return
218	}
219
220	n := bisectLeft(uint64(-i))
221	var scratch [8]byte
222
223	p.putByte(byte(intZeroCode - n))
224	offsetEncoded := int64(sizeLimits[n]) + i
225	binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded))
226
227	p.putBytes(scratch[8-n:])
228}
229
230func (p *packer) encodeBigInt(i *big.Int) {
231	length := len(i.Bytes())
232	if length > 0xff {
233		panic(fmt.Sprintf("Integer magnitude is too large (more than 255 bytes)"))
234	}
235
236	if i.Sign() >= 0 {
237		intBytes := i.Bytes()
238		if length > 8 {
239			p.putByte(byte(posIntEnd))
240			p.putByte(byte(len(intBytes)))
241		} else {
242			p.putByte(byte(intZeroCode + length))
243		}
244
245		p.putBytes(intBytes)
246	} else {
247		add := new(big.Int).Lsh(big.NewInt(1), uint(length*8))
248		add.Sub(add, big.NewInt(1))
249		transformed := new(big.Int)
250		transformed.Add(i, add)
251
252		intBytes := transformed.Bytes()
253		if length > 8 {
254			p.putByte(byte(negIntStart))
255			p.putByte(byte(length ^ 0xff))
256		} else {
257			p.putByte(byte(intZeroCode - length))
258		}
259
260		// For large negative numbers whose absolute value begins with 0xff bytes,
261		// the transformed bytes may begin with 0x00 bytes. However, intBytes
262		// will only contain the non-zero suffix, so this loop is needed to make
263		// the value written be the correct length.
264		for i := len(intBytes); i < length; i++ {
265			p.putByte(0x00)
266		}
267
268		p.putBytes(intBytes)
269	}
270}
271
272func (p *packer) encodeFloat(f float32) {
273	var scratch [4]byte
274	binary.BigEndian.PutUint32(scratch[:], math.Float32bits(f))
275	adjustFloatBytes(scratch[:], true)
276
277	p.putByte(floatCode)
278	p.putBytes(scratch[:])
279}
280
281func (p *packer) encodeDouble(d float64) {
282	var scratch [8]byte
283	binary.BigEndian.PutUint64(scratch[:], math.Float64bits(d))
284	adjustFloatBytes(scratch[:], true)
285
286	p.putByte(doubleCode)
287	p.putBytes(scratch[:])
288}
289
290func (p *packer) encodeUUID(u UUID) {
291	p.putByte(uuidCode)
292	p.putBytes(u[:])
293}
294
295func (p *packer) encodeVersionstamp(v Versionstamp) {
296	p.putByte(versionstampCode)
297
298	isIncomplete := v.TransactionVersion == incompleteTransactionVersion
299	if isIncomplete {
300		if p.versionstampPos != -1 {
301			panic(fmt.Sprintf("Tuple can only contain one incomplete versionstamp"))
302		}
303
304		p.versionstampPos = int32(len(p.buf))
305	}
306
307	p.putBytes(v.Bytes())
308}
309
310func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) {
311	if nested {
312		p.putByte(nestedCode)
313	}
314
315	for i, e := range t {
316		switch e := e.(type) {
317		case Tuple:
318			p.encodeTuple(e, true, versionstamps)
319		case nil:
320			p.putByte(nilCode)
321			if nested {
322				p.putByte(0xff)
323			}
324		case int:
325			p.encodeInt(int64(e))
326		case int64:
327			p.encodeInt(e)
328		case uint:
329			p.encodeUint(uint64(e))
330		case uint64:
331			p.encodeUint(e)
332		case *big.Int:
333			p.encodeBigInt(e)
334		case big.Int:
335			p.encodeBigInt(&e)
336		case []byte:
337			p.encodeBytes(bytesCode, e)
338		case fdb.KeyConvertible:
339			p.encodeBytes(bytesCode, []byte(e.FDBKey()))
340		case string:
341			p.encodeBytes(stringCode, []byte(e))
342		case float32:
343			p.encodeFloat(e)
344		case float64:
345			p.encodeDouble(e)
346		case bool:
347			if e {
348				p.putByte(trueCode)
349			} else {
350				p.putByte(falseCode)
351			}
352		case UUID:
353			p.encodeUUID(e)
354		case Versionstamp:
355			if versionstamps == false && e.TransactionVersion == incompleteTransactionVersion {
356				panic(fmt.Sprintf("Incomplete Versionstamp included in vanilla tuple pack"))
357			}
358
359			p.encodeVersionstamp(e)
360		default:
361			panic(fmt.Sprintf("unencodable element at index %d (%v, type %T)", i, t[i], t[i]))
362		}
363	}
364
365	if nested {
366		p.putByte(0x00)
367	}
368}
369
370// Pack returns a new byte slice encoding the provided tuple. Pack will panic if
371// the tuple contains an element of any type other than []byte,
372// fdb.KeyConvertible, string, int64, int, uint64, uint, *big.Int, big.Int, float32,
373// float64, bool, tuple.UUID, tuple.Versionstamp, nil, or a Tuple with elements of
374// valid types. It will also panic if an integer is specified with a value outside
375// the range [-2**2040+1, 2**2040-1]
376//
377// Tuple satisfies the fdb.KeyConvertible interface, so it is not necessary to
378// call Pack when using a Tuple with a FoundationDB API function that requires a
379// key.
380//
381// This method will panic if it contains an incomplete Versionstamp. Use
382// PackWithVersionstamp instead.
383//
384func (t Tuple) Pack() []byte {
385	p := newPacker()
386	p.encodeTuple(t, false, false)
387	return p.buf
388}
389
390// PackWithVersionstamp packs the specified tuple into a key for versionstamp
391// operations. See Pack for more information. This function will return an error
392// if you attempt to pack a tuple with more than one versionstamp. This function will
393// return an error if you attempt to pack a tuple with a versionstamp position larger
394// than an uint16 if the API version is less than 520.
395func (t Tuple) PackWithVersionstamp(prefix []byte) ([]byte, error) {
396	hasVersionstamp, err := t.HasIncompleteVersionstamp()
397	if err != nil {
398		return nil, err
399	}
400
401	apiVersion, err := fdb.GetAPIVersion()
402	if err != nil {
403		return nil, err
404	}
405
406	if hasVersionstamp == false {
407		return nil, errors.New("No incomplete versionstamp included in tuple pack with versionstamp")
408	}
409
410	p := newPacker()
411
412	if prefix != nil {
413		p.putBytes(prefix)
414	}
415
416	p.encodeTuple(t, false, true)
417
418	if hasVersionstamp {
419		var scratch [4]byte
420		var offsetIndex int
421		if apiVersion < 520 {
422			if p.versionstampPos > math.MaxUint16 {
423				return nil, errors.New("Versionstamp position too large")
424			}
425
426			offsetIndex = 2
427			binary.LittleEndian.PutUint16(scratch[:], uint16(p.versionstampPos))
428		} else {
429			offsetIndex = 4
430			binary.LittleEndian.PutUint32(scratch[:], uint32(p.versionstampPos))
431		}
432
433		p.putBytes(scratch[0:offsetIndex])
434	}
435
436	return p.buf, nil
437}
438
439// HasIncompleteVersionstamp determines if there is at least one incomplete
440// versionstamp in a tuple. This function will return an error this tuple has
441// more than one versionstamp.
442func (t Tuple) HasIncompleteVersionstamp() (bool, error) {
443	incompleteCount := t.countIncompleteVersionstamps()
444
445	var err error
446	if incompleteCount > 1 {
447		err = errors.New("Tuple can only contain one incomplete versionstamp")
448	}
449
450	return incompleteCount >= 1, err
451}
452
453func (t Tuple) countIncompleteVersionstamps() int {
454	incompleteCount := 0
455
456	for _, el := range t {
457		switch e := el.(type) {
458		case Versionstamp:
459			if e.TransactionVersion == incompleteTransactionVersion {
460				incompleteCount++
461			}
462		case Tuple:
463			incompleteCount += e.countIncompleteVersionstamps()
464		}
465	}
466
467	return incompleteCount
468}
469
470func findTerminator(b []byte) int {
471	bp := b
472	var length int
473
474	for {
475		idx := bytes.IndexByte(bp, 0x00)
476		length += idx
477		if idx+1 == len(bp) || bp[idx+1] != 0xFF {
478			break
479		}
480		length += 2
481		bp = bp[idx+2:]
482	}
483
484	return length
485}
486
487func decodeBytes(b []byte) ([]byte, int) {
488	idx := findTerminator(b[1:])
489	return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2
490}
491
492func decodeString(b []byte) (string, int) {
493	bp, idx := decodeBytes(b)
494	return string(bp), idx
495}
496
497func decodeInt(b []byte) (interface{}, int) {
498	if b[0] == intZeroCode {
499		return int64(0), 1
500	}
501
502	var neg bool
503
504	n := int(b[0]) - intZeroCode
505	if n < 0 {
506		n = -n
507		neg = true
508	}
509
510	bp := make([]byte, 8)
511	copy(bp[8-n:], b[1:n+1])
512
513	var ret int64
514	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
515
516	if neg {
517		return ret - int64(sizeLimits[n]), n + 1
518	}
519
520	if ret > 0 {
521		return ret, n + 1
522	}
523
524	// The encoded value claimed to be positive yet when put in an int64
525	// produced a negative value. This means that the number must be a positive
526	// 64-bit value that uses the most significant bit. This can be fit in a
527	// uint64, so return that. Note that this is the *only* time we return
528	// a uint64.
529	return uint64(ret), n + 1
530}
531
532func decodeBigInt(b []byte) (interface{}, int) {
533	val := new(big.Int)
534	offset := 1
535	var length int
536
537	if b[0] == negIntStart || b[0] == posIntEnd {
538		length = int(b[1])
539		if b[0] == negIntStart {
540			length ^= 0xff
541		}
542
543		offset += 1
544	} else {
545		// Must be a negative 8 byte integer
546		length = 8
547	}
548
549	val.SetBytes(b[offset : length+offset])
550
551	if b[0] < intZeroCode {
552		sub := new(big.Int).Lsh(big.NewInt(1), uint(length)*8)
553		sub.Sub(sub, big.NewInt(1))
554		val.Sub(val, sub)
555	}
556
557	// This is the only value that fits in an int64 or uint64 that is decoded with this function
558	if val.Cmp(minInt64BigInt) == 0 {
559		return val.Int64(), length + offset
560	}
561
562	return val, length + offset
563}
564
565func decodeFloat(b []byte) (float32, int) {
566	bp := make([]byte, 4)
567	copy(bp, b[1:])
568	adjustFloatBytes(bp, false)
569	var ret float32
570	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
571	return ret, 5
572}
573
574func decodeDouble(b []byte) (float64, int) {
575	bp := make([]byte, 8)
576	copy(bp, b[1:])
577	adjustFloatBytes(bp, false)
578	var ret float64
579	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)
580	return ret, 9
581}
582
583func decodeUUID(b []byte) (UUID, int) {
584	var u UUID
585	copy(u[:], b[1:])
586	return u, 17
587}
588
589func decodeVersionstamp(b []byte) (Versionstamp, int) {
590	var transactionVersion [10]byte
591	var userVersion uint16
592
593	copy(transactionVersion[:], b[1:11])
594
595	userVersion = binary.BigEndian.Uint16(b[11:])
596
597	return Versionstamp{
598		TransactionVersion: transactionVersion,
599		UserVersion:        userVersion,
600	}, versionstampLength + 1
601}
602
603func decodeTuple(b []byte, nested bool) (Tuple, int, error) {
604	var t Tuple
605
606	var i int
607
608	for i < len(b) {
609		var el interface{}
610		var off int
611
612		switch {
613		case b[i] == nilCode:
614			if !nested {
615				el = nil
616				off = 1
617			} else if i+1 < len(b) && b[i+1] == 0xff {
618				el = nil
619				off = 2
620			} else {
621				return t, i + 1, nil
622			}
623		case b[i] == bytesCode:
624			el, off = decodeBytes(b[i:])
625		case b[i] == stringCode:
626			el, off = decodeString(b[i:])
627		case negIntStart+1 < b[i] && b[i] < posIntEnd:
628			el, off = decodeInt(b[i:])
629		case negIntStart+1 == b[i] && (b[i+1]&0x80 != 0):
630			el, off = decodeInt(b[i:])
631		case negIntStart <= b[i] && b[i] <= posIntEnd:
632			el, off = decodeBigInt(b[i:])
633		case b[i] == floatCode:
634			if i+5 > len(b) {
635				return nil, i, fmt.Errorf("insufficient bytes to decode float starting at position %d of byte array for tuple", i)
636			}
637			el, off = decodeFloat(b[i:])
638		case b[i] == doubleCode:
639			if i+9 > len(b) {
640				return nil, i, fmt.Errorf("insufficient bytes to decode double starting at position %d of byte array for tuple", i)
641			}
642			el, off = decodeDouble(b[i:])
643		case b[i] == trueCode:
644			el = true
645			off = 1
646		case b[i] == falseCode:
647			el = false
648			off = 1
649		case b[i] == uuidCode:
650			if i+17 > len(b) {
651				return nil, i, fmt.Errorf("insufficient bytes to decode UUID starting at position %d of byte array for tuple", i)
652			}
653			el, off = decodeUUID(b[i:])
654		case b[i] == versionstampCode:
655			if i+versionstampLength+1 > len(b) {
656				return nil, i, fmt.Errorf("insufficient bytes to decode Versionstamp starting at position %d of byte array for tuple", i)
657			}
658			el, off = decodeVersionstamp(b[i:])
659		case b[i] == nestedCode:
660			var err error
661			el, off, err = decodeTuple(b[i+1:], true)
662			if err != nil {
663				return nil, i, err
664			}
665			off++
666		default:
667			return nil, i, fmt.Errorf("unable to decode tuple element with unknown typecode %02x", b[i])
668		}
669
670		t = append(t, el)
671		i += off
672	}
673
674	return t, i, nil
675}
676
677// Unpack returns the tuple encoded by the provided byte slice, or an error if
678// the key does not correctly encode a FoundationDB tuple.
679func Unpack(b []byte) (Tuple, error) {
680	t, _, err := decodeTuple(b, false)
681	return t, err
682}
683
684// FDBKey returns the packed representation of a Tuple, and allows Tuple to
685// satisfy the fdb.KeyConvertible interface. FDBKey will panic in the same
686// circumstances as Pack.
687func (t Tuple) FDBKey() fdb.Key {
688	return t.Pack()
689}
690
691// FDBRangeKeys allows Tuple to satisfy the fdb.ExactRange interface. The range
692// represents all keys that encode tuples strictly starting with a Tuple (that
693// is, all tuples of greater length than the Tuple of which the Tuple is a
694// prefix).
695func (t Tuple) FDBRangeKeys() (fdb.KeyConvertible, fdb.KeyConvertible) {
696	p := t.Pack()
697	return fdb.Key(concat(p, 0x00)), fdb.Key(concat(p, 0xFF))
698}
699
700// FDBRangeKeySelectors allows Tuple to satisfy the fdb.Range interface. The
701// range represents all keys that encode tuples strictly starting with a Tuple
702// (that is, all tuples of greater length than the Tuple of which the Tuple is a
703// prefix).
704func (t Tuple) FDBRangeKeySelectors() (fdb.Selectable, fdb.Selectable) {
705	b, e := t.FDBRangeKeys()
706	return fdb.FirstGreaterOrEqual(b), fdb.FirstGreaterOrEqual(e)
707}
708
709func concat(a []byte, b ...byte) []byte {
710	r := make([]byte, len(a)+len(b))
711	copy(r, a)
712	copy(r[len(a):], b)
713	return r
714}
715