1/* 2 * tuple.go 3 * 4 * This source file is part of the FoundationDB open source project 5 * 6 * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21// FoundationDB Go Tuple Layer 22 23// Package tuple provides a layer for encoding and decoding multi-element tuples 24// into keys usable by FoundationDB. The encoded key maintains the same sort 25// order as the original tuple: sorted first by the first element, then by the 26// second element, etc. This makes the tuple layer ideal for building a variety 27// of higher-level data models. 28// 29// For general guidance on tuple usage, see the Tuple section of Data Modeling 30// (https://apple.github.io/foundationdb/data-modeling.html#tuples). 31// 32// FoundationDB tuples can currently encode byte and unicode strings, integers, 33// large integers, floats, doubles, booleans, UUIDs, tuples, and NULL values. 34// In Go these are represented as []byte (or fdb.KeyConvertible), string, int64 35// (or int, uint, uint64), *big.Int (or big.Int), float32, float64, bool, 36// UUID, Tuple, and nil. 37package tuple 38 39import ( 40 "bytes" 41 "encoding/binary" 42 "errors" 43 "fmt" 44 "math" 45 "math/big" 46 47 "github.com/apple/foundationdb/bindings/go/src/fdb" 48) 49 50// A TupleElement is one of the types that may be encoded in FoundationDB 51// tuples. Although the Go compiler cannot enforce this, it is a programming 52// error to use an unsupported types as a TupleElement (and will typically 53// result in a runtime panic). 54// 55// The valid types for TupleElement are []byte (or fdb.KeyConvertible), string, 56// int64 (or int, uint, uint64), *big.Int (or big.Int), float, double, bool, 57// UUID, Tuple, and nil. 58type TupleElement interface{} 59 60// Tuple is a slice of objects that can be encoded as FoundationDB tuples. If 61// any of the TupleElements are of unsupported types, a runtime panic will occur 62// when the Tuple is packed. 63// 64// Given a Tuple T containing objects only of these types, then T will be 65// identical to the Tuple returned by unpacking the byte slice obtained by 66// packing T (modulo type normalization to []byte, uint64, and int64). 67type Tuple []TupleElement 68 69// UUID wraps a basic byte array as a UUID. We do not provide any special 70// methods for accessing or generating the UUID, but as Go does not provide 71// a built-in UUID type, this simple wrapper allows for other libraries 72// to write the output of their UUID type as a 16-byte array into 73// an instance of this type. 74type UUID [16]byte 75 76// Versionstamp is struct for a FoundationDB verionstamp. Versionstamps are 77// 12 bytes long composed of a 10 byte transaction version and a 2 byte user 78// version. The transaction version is filled in at commit time and the user 79// version is provided by the application to order results within a transaction. 80type Versionstamp struct { 81 TransactionVersion [10]byte 82 UserVersion uint16 83} 84 85var incompleteTransactionVersion = [10]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} 86 87const versionstampLength = 12 88 89// IncompleteVersionstamp is the constructor you should use to make 90// an incomplete versionstamp to use in a tuple. 91func IncompleteVersionstamp(userVersion uint16) Versionstamp { 92 return Versionstamp{ 93 TransactionVersion: incompleteTransactionVersion, 94 UserVersion: userVersion, 95 } 96} 97 98// Bytes converts a Versionstamp struct to a byte slice for encoding in a tuple. 99func (v Versionstamp) Bytes() []byte { 100 var scratch [versionstampLength]byte 101 102 copy(scratch[:], v.TransactionVersion[:]) 103 104 binary.BigEndian.PutUint16(scratch[10:], v.UserVersion) 105 106 return scratch[:] 107} 108 109// Type codes: These prefix the different elements in a packed Tuple 110// to indicate what type they are. 111const nilCode = 0x00 112const bytesCode = 0x01 113const stringCode = 0x02 114const nestedCode = 0x05 115const intZeroCode = 0x14 116const posIntEnd = 0x1d 117const negIntStart = 0x0b 118const floatCode = 0x20 119const doubleCode = 0x21 120const falseCode = 0x26 121const trueCode = 0x27 122const uuidCode = 0x30 123const versionstampCode = 0x33 124 125var sizeLimits = []uint64{ 126 1<<(0*8) - 1, 127 1<<(1*8) - 1, 128 1<<(2*8) - 1, 129 1<<(3*8) - 1, 130 1<<(4*8) - 1, 131 1<<(5*8) - 1, 132 1<<(6*8) - 1, 133 1<<(7*8) - 1, 134 1<<(8*8) - 1, 135} 136 137var minInt64BigInt = big.NewInt(math.MinInt64) 138 139func bisectLeft(u uint64) int { 140 var n int 141 for sizeLimits[n] < u { 142 n++ 143 } 144 return n 145} 146 147func adjustFloatBytes(b []byte, encode bool) { 148 if (encode && b[0]&0x80 != 0x00) || (!encode && b[0]&0x80 == 0x00) { 149 // Negative numbers: flip all of the bytes. 150 for i := 0; i < len(b); i++ { 151 b[i] = b[i] ^ 0xff 152 } 153 } else { 154 // Positive number: flip just the sign bit. 155 b[0] = b[0] ^ 0x80 156 } 157} 158 159type packer struct { 160 versionstampPos int32 161 buf []byte 162} 163 164func newPacker() *packer { 165 return &packer{ 166 versionstampPos: -1, 167 buf: make([]byte, 0, 64), 168 } 169} 170 171func (p *packer) putByte(b byte) { 172 p.buf = append(p.buf, b) 173} 174 175func (p *packer) putBytes(b []byte) { 176 p.buf = append(p.buf, b...) 177} 178 179func (p *packer) putBytesNil(b []byte, i int) { 180 for i >= 0 { 181 p.putBytes(b[:i+1]) 182 p.putByte(0xFF) 183 b = b[i+1:] 184 i = bytes.IndexByte(b, 0x00) 185 } 186 p.putBytes(b) 187} 188 189func (p *packer) encodeBytes(code byte, b []byte) { 190 p.putByte(code) 191 if i := bytes.IndexByte(b, 0x00); i >= 0 { 192 p.putBytesNil(b, i) 193 } else { 194 p.putBytes(b) 195 } 196 p.putByte(0x00) 197} 198 199func (p *packer) encodeUint(i uint64) { 200 if i == 0 { 201 p.putByte(intZeroCode) 202 return 203 } 204 205 n := bisectLeft(i) 206 var scratch [8]byte 207 208 p.putByte(byte(intZeroCode + n)) 209 binary.BigEndian.PutUint64(scratch[:], i) 210 211 p.putBytes(scratch[8-n:]) 212} 213 214func (p *packer) encodeInt(i int64) { 215 if i >= 0 { 216 p.encodeUint(uint64(i)) 217 return 218 } 219 220 n := bisectLeft(uint64(-i)) 221 var scratch [8]byte 222 223 p.putByte(byte(intZeroCode - n)) 224 offsetEncoded := int64(sizeLimits[n]) + i 225 binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded)) 226 227 p.putBytes(scratch[8-n:]) 228} 229 230func (p *packer) encodeBigInt(i *big.Int) { 231 length := len(i.Bytes()) 232 if length > 0xff { 233 panic(fmt.Sprintf("Integer magnitude is too large (more than 255 bytes)")) 234 } 235 236 if i.Sign() >= 0 { 237 intBytes := i.Bytes() 238 if length > 8 { 239 p.putByte(byte(posIntEnd)) 240 p.putByte(byte(len(intBytes))) 241 } else { 242 p.putByte(byte(intZeroCode + length)) 243 } 244 245 p.putBytes(intBytes) 246 } else { 247 add := new(big.Int).Lsh(big.NewInt(1), uint(length*8)) 248 add.Sub(add, big.NewInt(1)) 249 transformed := new(big.Int) 250 transformed.Add(i, add) 251 252 intBytes := transformed.Bytes() 253 if length > 8 { 254 p.putByte(byte(negIntStart)) 255 p.putByte(byte(length ^ 0xff)) 256 } else { 257 p.putByte(byte(intZeroCode - length)) 258 } 259 260 // For large negative numbers whose absolute value begins with 0xff bytes, 261 // the transformed bytes may begin with 0x00 bytes. However, intBytes 262 // will only contain the non-zero suffix, so this loop is needed to make 263 // the value written be the correct length. 264 for i := len(intBytes); i < length; i++ { 265 p.putByte(0x00) 266 } 267 268 p.putBytes(intBytes) 269 } 270} 271 272func (p *packer) encodeFloat(f float32) { 273 var scratch [4]byte 274 binary.BigEndian.PutUint32(scratch[:], math.Float32bits(f)) 275 adjustFloatBytes(scratch[:], true) 276 277 p.putByte(floatCode) 278 p.putBytes(scratch[:]) 279} 280 281func (p *packer) encodeDouble(d float64) { 282 var scratch [8]byte 283 binary.BigEndian.PutUint64(scratch[:], math.Float64bits(d)) 284 adjustFloatBytes(scratch[:], true) 285 286 p.putByte(doubleCode) 287 p.putBytes(scratch[:]) 288} 289 290func (p *packer) encodeUUID(u UUID) { 291 p.putByte(uuidCode) 292 p.putBytes(u[:]) 293} 294 295func (p *packer) encodeVersionstamp(v Versionstamp) { 296 p.putByte(versionstampCode) 297 298 isIncomplete := v.TransactionVersion == incompleteTransactionVersion 299 if isIncomplete { 300 if p.versionstampPos != -1 { 301 panic(fmt.Sprintf("Tuple can only contain one incomplete versionstamp")) 302 } 303 304 p.versionstampPos = int32(len(p.buf)) 305 } 306 307 p.putBytes(v.Bytes()) 308} 309 310func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { 311 if nested { 312 p.putByte(nestedCode) 313 } 314 315 for i, e := range t { 316 switch e := e.(type) { 317 case Tuple: 318 p.encodeTuple(e, true, versionstamps) 319 case nil: 320 p.putByte(nilCode) 321 if nested { 322 p.putByte(0xff) 323 } 324 case int: 325 p.encodeInt(int64(e)) 326 case int64: 327 p.encodeInt(e) 328 case uint: 329 p.encodeUint(uint64(e)) 330 case uint64: 331 p.encodeUint(e) 332 case *big.Int: 333 p.encodeBigInt(e) 334 case big.Int: 335 p.encodeBigInt(&e) 336 case []byte: 337 p.encodeBytes(bytesCode, e) 338 case fdb.KeyConvertible: 339 p.encodeBytes(bytesCode, []byte(e.FDBKey())) 340 case string: 341 p.encodeBytes(stringCode, []byte(e)) 342 case float32: 343 p.encodeFloat(e) 344 case float64: 345 p.encodeDouble(e) 346 case bool: 347 if e { 348 p.putByte(trueCode) 349 } else { 350 p.putByte(falseCode) 351 } 352 case UUID: 353 p.encodeUUID(e) 354 case Versionstamp: 355 if versionstamps == false && e.TransactionVersion == incompleteTransactionVersion { 356 panic(fmt.Sprintf("Incomplete Versionstamp included in vanilla tuple pack")) 357 } 358 359 p.encodeVersionstamp(e) 360 default: 361 panic(fmt.Sprintf("unencodable element at index %d (%v, type %T)", i, t[i], t[i])) 362 } 363 } 364 365 if nested { 366 p.putByte(0x00) 367 } 368} 369 370// Pack returns a new byte slice encoding the provided tuple. Pack will panic if 371// the tuple contains an element of any type other than []byte, 372// fdb.KeyConvertible, string, int64, int, uint64, uint, *big.Int, big.Int, float32, 373// float64, bool, tuple.UUID, tuple.Versionstamp, nil, or a Tuple with elements of 374// valid types. It will also panic if an integer is specified with a value outside 375// the range [-2**2040+1, 2**2040-1] 376// 377// Tuple satisfies the fdb.KeyConvertible interface, so it is not necessary to 378// call Pack when using a Tuple with a FoundationDB API function that requires a 379// key. 380// 381// This method will panic if it contains an incomplete Versionstamp. Use 382// PackWithVersionstamp instead. 383// 384func (t Tuple) Pack() []byte { 385 p := newPacker() 386 p.encodeTuple(t, false, false) 387 return p.buf 388} 389 390// PackWithVersionstamp packs the specified tuple into a key for versionstamp 391// operations. See Pack for more information. This function will return an error 392// if you attempt to pack a tuple with more than one versionstamp. This function will 393// return an error if you attempt to pack a tuple with a versionstamp position larger 394// than an uint16 if the API version is less than 520. 395func (t Tuple) PackWithVersionstamp(prefix []byte) ([]byte, error) { 396 hasVersionstamp, err := t.HasIncompleteVersionstamp() 397 if err != nil { 398 return nil, err 399 } 400 401 apiVersion, err := fdb.GetAPIVersion() 402 if err != nil { 403 return nil, err 404 } 405 406 if hasVersionstamp == false { 407 return nil, errors.New("No incomplete versionstamp included in tuple pack with versionstamp") 408 } 409 410 p := newPacker() 411 412 if prefix != nil { 413 p.putBytes(prefix) 414 } 415 416 p.encodeTuple(t, false, true) 417 418 if hasVersionstamp { 419 var scratch [4]byte 420 var offsetIndex int 421 if apiVersion < 520 { 422 if p.versionstampPos > math.MaxUint16 { 423 return nil, errors.New("Versionstamp position too large") 424 } 425 426 offsetIndex = 2 427 binary.LittleEndian.PutUint16(scratch[:], uint16(p.versionstampPos)) 428 } else { 429 offsetIndex = 4 430 binary.LittleEndian.PutUint32(scratch[:], uint32(p.versionstampPos)) 431 } 432 433 p.putBytes(scratch[0:offsetIndex]) 434 } 435 436 return p.buf, nil 437} 438 439// HasIncompleteVersionstamp determines if there is at least one incomplete 440// versionstamp in a tuple. This function will return an error this tuple has 441// more than one versionstamp. 442func (t Tuple) HasIncompleteVersionstamp() (bool, error) { 443 incompleteCount := t.countIncompleteVersionstamps() 444 445 var err error 446 if incompleteCount > 1 { 447 err = errors.New("Tuple can only contain one incomplete versionstamp") 448 } 449 450 return incompleteCount >= 1, err 451} 452 453func (t Tuple) countIncompleteVersionstamps() int { 454 incompleteCount := 0 455 456 for _, el := range t { 457 switch e := el.(type) { 458 case Versionstamp: 459 if e.TransactionVersion == incompleteTransactionVersion { 460 incompleteCount++ 461 } 462 case Tuple: 463 incompleteCount += e.countIncompleteVersionstamps() 464 } 465 } 466 467 return incompleteCount 468} 469 470func findTerminator(b []byte) int { 471 bp := b 472 var length int 473 474 for { 475 idx := bytes.IndexByte(bp, 0x00) 476 length += idx 477 if idx+1 == len(bp) || bp[idx+1] != 0xFF { 478 break 479 } 480 length += 2 481 bp = bp[idx+2:] 482 } 483 484 return length 485} 486 487func decodeBytes(b []byte) ([]byte, int) { 488 idx := findTerminator(b[1:]) 489 return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2 490} 491 492func decodeString(b []byte) (string, int) { 493 bp, idx := decodeBytes(b) 494 return string(bp), idx 495} 496 497func decodeInt(b []byte) (interface{}, int) { 498 if b[0] == intZeroCode { 499 return int64(0), 1 500 } 501 502 var neg bool 503 504 n := int(b[0]) - intZeroCode 505 if n < 0 { 506 n = -n 507 neg = true 508 } 509 510 bp := make([]byte, 8) 511 copy(bp[8-n:], b[1:n+1]) 512 513 var ret int64 514 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 515 516 if neg { 517 return ret - int64(sizeLimits[n]), n + 1 518 } 519 520 if ret > 0 { 521 return ret, n + 1 522 } 523 524 // The encoded value claimed to be positive yet when put in an int64 525 // produced a negative value. This means that the number must be a positive 526 // 64-bit value that uses the most significant bit. This can be fit in a 527 // uint64, so return that. Note that this is the *only* time we return 528 // a uint64. 529 return uint64(ret), n + 1 530} 531 532func decodeBigInt(b []byte) (interface{}, int) { 533 val := new(big.Int) 534 offset := 1 535 var length int 536 537 if b[0] == negIntStart || b[0] == posIntEnd { 538 length = int(b[1]) 539 if b[0] == negIntStart { 540 length ^= 0xff 541 } 542 543 offset += 1 544 } else { 545 // Must be a negative 8 byte integer 546 length = 8 547 } 548 549 val.SetBytes(b[offset : length+offset]) 550 551 if b[0] < intZeroCode { 552 sub := new(big.Int).Lsh(big.NewInt(1), uint(length)*8) 553 sub.Sub(sub, big.NewInt(1)) 554 val.Sub(val, sub) 555 } 556 557 // This is the only value that fits in an int64 or uint64 that is decoded with this function 558 if val.Cmp(minInt64BigInt) == 0 { 559 return val.Int64(), length + offset 560 } 561 562 return val, length + offset 563} 564 565func decodeFloat(b []byte) (float32, int) { 566 bp := make([]byte, 4) 567 copy(bp, b[1:]) 568 adjustFloatBytes(bp, false) 569 var ret float32 570 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 571 return ret, 5 572} 573 574func decodeDouble(b []byte) (float64, int) { 575 bp := make([]byte, 8) 576 copy(bp, b[1:]) 577 adjustFloatBytes(bp, false) 578 var ret float64 579 binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) 580 return ret, 9 581} 582 583func decodeUUID(b []byte) (UUID, int) { 584 var u UUID 585 copy(u[:], b[1:]) 586 return u, 17 587} 588 589func decodeVersionstamp(b []byte) (Versionstamp, int) { 590 var transactionVersion [10]byte 591 var userVersion uint16 592 593 copy(transactionVersion[:], b[1:11]) 594 595 userVersion = binary.BigEndian.Uint16(b[11:]) 596 597 return Versionstamp{ 598 TransactionVersion: transactionVersion, 599 UserVersion: userVersion, 600 }, versionstampLength + 1 601} 602 603func decodeTuple(b []byte, nested bool) (Tuple, int, error) { 604 var t Tuple 605 606 var i int 607 608 for i < len(b) { 609 var el interface{} 610 var off int 611 612 switch { 613 case b[i] == nilCode: 614 if !nested { 615 el = nil 616 off = 1 617 } else if i+1 < len(b) && b[i+1] == 0xff { 618 el = nil 619 off = 2 620 } else { 621 return t, i + 1, nil 622 } 623 case b[i] == bytesCode: 624 el, off = decodeBytes(b[i:]) 625 case b[i] == stringCode: 626 el, off = decodeString(b[i:]) 627 case negIntStart+1 < b[i] && b[i] < posIntEnd: 628 el, off = decodeInt(b[i:]) 629 case negIntStart+1 == b[i] && (b[i+1]&0x80 != 0): 630 el, off = decodeInt(b[i:]) 631 case negIntStart <= b[i] && b[i] <= posIntEnd: 632 el, off = decodeBigInt(b[i:]) 633 case b[i] == floatCode: 634 if i+5 > len(b) { 635 return nil, i, fmt.Errorf("insufficient bytes to decode float starting at position %d of byte array for tuple", i) 636 } 637 el, off = decodeFloat(b[i:]) 638 case b[i] == doubleCode: 639 if i+9 > len(b) { 640 return nil, i, fmt.Errorf("insufficient bytes to decode double starting at position %d of byte array for tuple", i) 641 } 642 el, off = decodeDouble(b[i:]) 643 case b[i] == trueCode: 644 el = true 645 off = 1 646 case b[i] == falseCode: 647 el = false 648 off = 1 649 case b[i] == uuidCode: 650 if i+17 > len(b) { 651 return nil, i, fmt.Errorf("insufficient bytes to decode UUID starting at position %d of byte array for tuple", i) 652 } 653 el, off = decodeUUID(b[i:]) 654 case b[i] == versionstampCode: 655 if i+versionstampLength+1 > len(b) { 656 return nil, i, fmt.Errorf("insufficient bytes to decode Versionstamp starting at position %d of byte array for tuple", i) 657 } 658 el, off = decodeVersionstamp(b[i:]) 659 case b[i] == nestedCode: 660 var err error 661 el, off, err = decodeTuple(b[i+1:], true) 662 if err != nil { 663 return nil, i, err 664 } 665 off++ 666 default: 667 return nil, i, fmt.Errorf("unable to decode tuple element with unknown typecode %02x", b[i]) 668 } 669 670 t = append(t, el) 671 i += off 672 } 673 674 return t, i, nil 675} 676 677// Unpack returns the tuple encoded by the provided byte slice, or an error if 678// the key does not correctly encode a FoundationDB tuple. 679func Unpack(b []byte) (Tuple, error) { 680 t, _, err := decodeTuple(b, false) 681 return t, err 682} 683 684// FDBKey returns the packed representation of a Tuple, and allows Tuple to 685// satisfy the fdb.KeyConvertible interface. FDBKey will panic in the same 686// circumstances as Pack. 687func (t Tuple) FDBKey() fdb.Key { 688 return t.Pack() 689} 690 691// FDBRangeKeys allows Tuple to satisfy the fdb.ExactRange interface. The range 692// represents all keys that encode tuples strictly starting with a Tuple (that 693// is, all tuples of greater length than the Tuple of which the Tuple is a 694// prefix). 695func (t Tuple) FDBRangeKeys() (fdb.KeyConvertible, fdb.KeyConvertible) { 696 p := t.Pack() 697 return fdb.Key(concat(p, 0x00)), fdb.Key(concat(p, 0xFF)) 698} 699 700// FDBRangeKeySelectors allows Tuple to satisfy the fdb.Range interface. The 701// range represents all keys that encode tuples strictly starting with a Tuple 702// (that is, all tuples of greater length than the Tuple of which the Tuple is a 703// prefix). 704func (t Tuple) FDBRangeKeySelectors() (fdb.Selectable, fdb.Selectable) { 705 b, e := t.FDBRangeKeys() 706 return fdb.FirstGreaterOrEqual(b), fdb.FirstGreaterOrEqual(e) 707} 708 709func concat(a []byte, b ...byte) []byte { 710 r := make([]byte, len(a)+len(b)) 711 copy(r, a) 712 copy(r[len(a):], b) 713 return r 714} 715