1// Copyright (c) 2014 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package upsidedown 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "fmt" 21 "io" 22 "math" 23 "reflect" 24 25 "github.com/blevesearch/bleve/size" 26 "github.com/golang/protobuf/proto" 27) 28 29var reflectStaticSizeTermFrequencyRow int 30var reflectStaticSizeTermVector int 31 32func init() { 33 var tfr TermFrequencyRow 34 reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size()) 35 var tv TermVector 36 reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size()) 37} 38 39const ByteSeparator byte = 0xff 40 41type UpsideDownCouchRowStream chan UpsideDownCouchRow 42 43type UpsideDownCouchRow interface { 44 KeySize() int 45 KeyTo([]byte) (int, error) 46 Key() []byte 47 Value() []byte 48 ValueSize() int 49 ValueTo([]byte) (int, error) 50} 51 52func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { 53 if len(key) > 0 { 54 switch key[0] { 55 case 'v': 56 return NewVersionRowKV(key, value) 57 case 'f': 58 return NewFieldRowKV(key, value) 59 case 'd': 60 return NewDictionaryRowKV(key, value) 61 case 't': 62 return NewTermFrequencyRowKV(key, value) 63 case 'b': 64 return NewBackIndexRowKV(key, value) 65 case 's': 66 return NewStoredRowKV(key, value) 67 case 'i': 68 return NewInternalRowKV(key, value) 69 } 70 return nil, fmt.Errorf("Unknown field type '%s'", string(key[0])) 71 } 72 return nil, fmt.Errorf("Invalid empty key") 73} 74 75// VERSION 76 77type VersionRow struct { 78 version uint8 79} 80 81func (v *VersionRow) Key() []byte { 82 return []byte{'v'} 83} 84 85func (v *VersionRow) KeySize() int { 86 return 1 87} 88 89func (v *VersionRow) KeyTo(buf []byte) (int, error) { 90 buf[0] = 'v' 91 return 1, nil 92} 93 94func (v *VersionRow) Value() []byte { 95 return []byte{byte(v.version)} 96} 97 98func (v *VersionRow) ValueSize() int { 99 return 1 100} 101 102func (v *VersionRow) ValueTo(buf []byte) (int, error) { 103 buf[0] = v.version 104 return 1, nil 105} 106 107func (v *VersionRow) String() string { 108 return fmt.Sprintf("Version: %d", v.version) 109} 110 111func NewVersionRow(version uint8) *VersionRow { 112 return &VersionRow{ 113 version: version, 114 } 115} 116 117func NewVersionRowKV(key, value []byte) (*VersionRow, error) { 118 rv := VersionRow{} 119 buf := bytes.NewBuffer(value) 120 err := binary.Read(buf, binary.LittleEndian, &rv.version) 121 if err != nil { 122 return nil, err 123 } 124 return &rv, nil 125} 126 127// INTERNAL STORAGE 128 129type InternalRow struct { 130 key []byte 131 val []byte 132} 133 134func (i *InternalRow) Key() []byte { 135 buf := make([]byte, i.KeySize()) 136 size, _ := i.KeyTo(buf) 137 return buf[:size] 138} 139 140func (i *InternalRow) KeySize() int { 141 return len(i.key) + 1 142} 143 144func (i *InternalRow) KeyTo(buf []byte) (int, error) { 145 buf[0] = 'i' 146 actual := copy(buf[1:], i.key) 147 return 1 + actual, nil 148} 149 150func (i *InternalRow) Value() []byte { 151 return i.val 152} 153 154func (i *InternalRow) ValueSize() int { 155 return len(i.val) 156} 157 158func (i *InternalRow) ValueTo(buf []byte) (int, error) { 159 actual := copy(buf, i.val) 160 return actual, nil 161} 162 163func (i *InternalRow) String() string { 164 return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) 165} 166 167func NewInternalRow(key, val []byte) *InternalRow { 168 return &InternalRow{ 169 key: key, 170 val: val, 171 } 172} 173 174func NewInternalRowKV(key, value []byte) (*InternalRow, error) { 175 rv := InternalRow{} 176 rv.key = key[1:] 177 rv.val = value 178 return &rv, nil 179} 180 181// FIELD definition 182 183type FieldRow struct { 184 index uint16 185 name string 186} 187 188func (f *FieldRow) Key() []byte { 189 buf := make([]byte, f.KeySize()) 190 size, _ := f.KeyTo(buf) 191 return buf[:size] 192} 193 194func (f *FieldRow) KeySize() int { 195 return 3 196} 197 198func (f *FieldRow) KeyTo(buf []byte) (int, error) { 199 buf[0] = 'f' 200 binary.LittleEndian.PutUint16(buf[1:3], f.index) 201 return 3, nil 202} 203 204func (f *FieldRow) Value() []byte { 205 return append([]byte(f.name), ByteSeparator) 206} 207 208func (f *FieldRow) ValueSize() int { 209 return len(f.name) + 1 210} 211 212func (f *FieldRow) ValueTo(buf []byte) (int, error) { 213 size := copy(buf, f.name) 214 buf[size] = ByteSeparator 215 return size + 1, nil 216} 217 218func (f *FieldRow) String() string { 219 return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) 220} 221 222func NewFieldRow(index uint16, name string) *FieldRow { 223 return &FieldRow{ 224 index: index, 225 name: name, 226 } 227} 228 229func NewFieldRowKV(key, value []byte) (*FieldRow, error) { 230 rv := FieldRow{} 231 232 buf := bytes.NewBuffer(key) 233 _, err := buf.ReadByte() // type 234 if err != nil { 235 return nil, err 236 } 237 err = binary.Read(buf, binary.LittleEndian, &rv.index) 238 if err != nil { 239 return nil, err 240 } 241 242 buf = bytes.NewBuffer(value) 243 rv.name, err = buf.ReadString(ByteSeparator) 244 if err != nil { 245 return nil, err 246 } 247 rv.name = rv.name[:len(rv.name)-1] // trim off separator byte 248 249 return &rv, nil 250} 251 252// DICTIONARY 253 254const DictionaryRowMaxValueSize = binary.MaxVarintLen64 255 256type DictionaryRow struct { 257 term []byte 258 count uint64 259 field uint16 260} 261 262func (dr *DictionaryRow) Key() []byte { 263 buf := make([]byte, dr.KeySize()) 264 size, _ := dr.KeyTo(buf) 265 return buf[:size] 266} 267 268func (dr *DictionaryRow) KeySize() int { 269 return dictionaryRowKeySize(dr.term) 270} 271 272func dictionaryRowKeySize(term []byte) int { 273 return len(term) + 3 274} 275 276func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { 277 return dictionaryRowKeyTo(buf, dr.field, dr.term), nil 278} 279 280func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int { 281 buf[0] = 'd' 282 binary.LittleEndian.PutUint16(buf[1:3], field) 283 size := copy(buf[3:], term) 284 return size + 3 285} 286 287func (dr *DictionaryRow) Value() []byte { 288 buf := make([]byte, dr.ValueSize()) 289 size, _ := dr.ValueTo(buf) 290 return buf[:size] 291} 292 293func (dr *DictionaryRow) ValueSize() int { 294 return DictionaryRowMaxValueSize 295} 296 297func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { 298 used := binary.PutUvarint(buf, dr.count) 299 return used, nil 300} 301 302func (dr *DictionaryRow) String() string { 303 return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count) 304} 305 306func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow { 307 return &DictionaryRow{ 308 term: term, 309 field: field, 310 count: count, 311 } 312} 313 314func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { 315 rv, err := NewDictionaryRowK(key) 316 if err != nil { 317 return nil, err 318 } 319 320 err = rv.parseDictionaryV(value) 321 if err != nil { 322 return nil, err 323 } 324 return rv, nil 325 326} 327 328func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { 329 rv := &DictionaryRow{} 330 err := rv.parseDictionaryK(key) 331 if err != nil { 332 return nil, err 333 } 334 return rv, nil 335} 336 337func (dr *DictionaryRow) parseDictionaryK(key []byte) error { 338 dr.field = binary.LittleEndian.Uint16(key[1:3]) 339 if dr.term != nil { 340 dr.term = dr.term[:0] 341 } 342 dr.term = append(dr.term, key[3:]...) 343 return nil 344} 345 346func (dr *DictionaryRow) parseDictionaryV(value []byte) error { 347 count, err := dictionaryRowParseV(value) 348 if err != nil { 349 return err 350 } 351 dr.count = count 352 return nil 353} 354 355func dictionaryRowParseV(value []byte) (uint64, error) { 356 count, nread := binary.Uvarint(value) 357 if nread <= 0 { 358 return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) 359 } 360 return count, nil 361} 362 363// TERM FIELD FREQUENCY 364 365type TermVector struct { 366 field uint16 367 arrayPositions []uint64 368 pos uint64 369 start uint64 370 end uint64 371} 372 373func (tv *TermVector) Size() int { 374 return reflectStaticSizeTermVector + size.SizeOfPtr + 375 len(tv.arrayPositions)*size.SizeOfUint64 376} 377 378func (tv *TermVector) String() string { 379 return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) 380} 381 382type TermFrequencyRow struct { 383 term []byte 384 doc []byte 385 freq uint64 386 vectors []*TermVector 387 norm float32 388 field uint16 389} 390 391func (tfr *TermFrequencyRow) Size() int { 392 sizeInBytes := reflectStaticSizeTermFrequencyRow + 393 len(tfr.term) + 394 len(tfr.doc) 395 396 for _, entry := range tfr.vectors { 397 sizeInBytes += entry.Size() 398 } 399 400 return sizeInBytes 401} 402 403func (tfr *TermFrequencyRow) Term() []byte { 404 return tfr.term 405} 406 407func (tfr *TermFrequencyRow) Freq() uint64 { 408 return tfr.freq 409} 410 411func (tfr *TermFrequencyRow) ScanPrefixForField() []byte { 412 buf := make([]byte, 3) 413 buf[0] = 't' 414 binary.LittleEndian.PutUint16(buf[1:3], tfr.field) 415 return buf 416} 417 418func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte { 419 buf := make([]byte, 3+len(tfr.term)) 420 buf[0] = 't' 421 binary.LittleEndian.PutUint16(buf[1:3], tfr.field) 422 copy(buf[3:], tfr.term) 423 return buf 424} 425 426func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { 427 buf := make([]byte, 3+len(tfr.term)+1) 428 buf[0] = 't' 429 binary.LittleEndian.PutUint16(buf[1:3], tfr.field) 430 termLen := copy(buf[3:], tfr.term) 431 buf[3+termLen] = ByteSeparator 432 return buf 433} 434 435func (tfr *TermFrequencyRow) Key() []byte { 436 buf := make([]byte, tfr.KeySize()) 437 size, _ := tfr.KeyTo(buf) 438 return buf[:size] 439} 440 441func (tfr *TermFrequencyRow) KeySize() int { 442 return termFrequencyRowKeySize(tfr.term, tfr.doc) 443} 444 445func termFrequencyRowKeySize(term, doc []byte) int { 446 return 3 + len(term) + 1 + len(doc) 447} 448 449func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { 450 return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil 451} 452 453func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int { 454 buf[0] = 't' 455 binary.LittleEndian.PutUint16(buf[1:3], field) 456 termLen := copy(buf[3:], term) 457 buf[3+termLen] = ByteSeparator 458 docLen := copy(buf[3+termLen+1:], doc) 459 return 3 + termLen + 1 + docLen 460} 461 462func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { 463 keySize := tfr.KeySize() 464 if cap(buf) < keySize { 465 buf = make([]byte, keySize) 466 } 467 actualSize, err := tfr.KeyTo(buf[0:keySize]) 468 return buf[0:actualSize], err 469} 470 471func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { 472 dr := NewDictionaryRow(tfr.term, tfr.field, 0) 473 return dr.Key() 474} 475 476func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { 477 dr := NewDictionaryRow(tfr.term, tfr.field, 0) 478 return dr.KeySize() 479} 480 481func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { 482 dr := NewDictionaryRow(tfr.term, tfr.field, 0) 483 return dr.KeyTo(buf) 484} 485 486func (tfr *TermFrequencyRow) Value() []byte { 487 buf := make([]byte, tfr.ValueSize()) 488 size, _ := tfr.ValueTo(buf) 489 return buf[:size] 490} 491 492func (tfr *TermFrequencyRow) ValueSize() int { 493 bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 494 for _, vector := range tfr.vectors { 495 bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 496 } 497 return bufLen 498} 499 500func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { 501 used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) 502 503 normuint32 := math.Float32bits(tfr.norm) 504 newbuf := buf[used : used+binary.MaxVarintLen64] 505 used += binary.PutUvarint(newbuf, uint64(normuint32)) 506 507 for _, vector := range tfr.vectors { 508 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field)) 509 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos) 510 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start) 511 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end) 512 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions))) 513 for _, arrayPosition := range vector.arrayPositions { 514 used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) 515 } 516 } 517 return used, nil 518} 519 520func (tfr *TermFrequencyRow) String() string { 521 return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors) 522} 523 524func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { 525 tfr.term = term 526 tfr.field = field 527 tfr.doc = docID 528 tfr.freq = freq 529 tfr.norm = norm 530 return tfr 531} 532 533func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { 534 return &TermFrequencyRow{ 535 term: term, 536 field: field, 537 doc: docID, 538 freq: freq, 539 norm: norm, 540 } 541} 542 543func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { 544 return &TermFrequencyRow{ 545 term: term, 546 field: field, 547 doc: docID, 548 freq: freq, 549 norm: norm, 550 vectors: vectors, 551 } 552} 553 554func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) { 555 rv := &TermFrequencyRow{} 556 err := rv.parseK(key) 557 if err != nil { 558 return nil, err 559 } 560 return rv, nil 561} 562 563func (tfr *TermFrequencyRow) parseK(key []byte) error { 564 keyLen := len(key) 565 if keyLen < 3 { 566 return fmt.Errorf("invalid term frequency key, no valid field") 567 } 568 tfr.field = binary.LittleEndian.Uint16(key[1:3]) 569 570 termEndPos := bytes.IndexByte(key[3:], ByteSeparator) 571 if termEndPos < 0 { 572 return fmt.Errorf("invalid term frequency key, no byte separator terminating term") 573 } 574 tfr.term = key[3 : 3+termEndPos] 575 576 docLen := keyLen - (3 + termEndPos + 1) 577 if docLen < 1 { 578 return fmt.Errorf("invalid term frequency key, empty docid") 579 } 580 tfr.doc = key[3+termEndPos+1:] 581 582 return nil 583} 584 585func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { 586 tfr.doc = key[3+len(term)+1:] 587 if len(tfr.doc) == 0 { 588 return fmt.Errorf("invalid term frequency key, empty docid") 589 } 590 591 return nil 592} 593 594func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error { 595 var bytesRead int 596 tfr.freq, bytesRead = binary.Uvarint(value) 597 if bytesRead <= 0 { 598 return fmt.Errorf("invalid term frequency value, invalid frequency") 599 } 600 currOffset := bytesRead 601 602 var norm uint64 603 norm, bytesRead = binary.Uvarint(value[currOffset:]) 604 if bytesRead <= 0 { 605 return fmt.Errorf("invalid term frequency value, no norm") 606 } 607 currOffset += bytesRead 608 609 tfr.norm = math.Float32frombits(uint32(norm)) 610 611 tfr.vectors = nil 612 if !includeTermVectors { 613 return nil 614 } 615 616 var field uint64 617 field, bytesRead = binary.Uvarint(value[currOffset:]) 618 for bytesRead > 0 { 619 currOffset += bytesRead 620 tv := TermVector{} 621 tv.field = uint16(field) 622 // at this point we expect at least one term vector 623 if tfr.vectors == nil { 624 tfr.vectors = make([]*TermVector, 0) 625 } 626 627 tv.pos, bytesRead = binary.Uvarint(value[currOffset:]) 628 if bytesRead <= 0 { 629 return fmt.Errorf("invalid term frequency value, vector contains no position") 630 } 631 currOffset += bytesRead 632 633 tv.start, bytesRead = binary.Uvarint(value[currOffset:]) 634 if bytesRead <= 0 { 635 return fmt.Errorf("invalid term frequency value, vector contains no start") 636 } 637 currOffset += bytesRead 638 639 tv.end, bytesRead = binary.Uvarint(value[currOffset:]) 640 if bytesRead <= 0 { 641 return fmt.Errorf("invalid term frequency value, vector contains no end") 642 } 643 currOffset += bytesRead 644 645 var arrayPositionsLen uint64 = 0 646 arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:]) 647 if bytesRead <= 0 { 648 return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen") 649 } 650 currOffset += bytesRead 651 652 if arrayPositionsLen > 0 { 653 tv.arrayPositions = make([]uint64, arrayPositionsLen) 654 for i := 0; uint64(i) < arrayPositionsLen; i++ { 655 tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:]) 656 if bytesRead <= 0 { 657 return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i) 658 } 659 currOffset += bytesRead 660 } 661 } 662 663 tfr.vectors = append(tfr.vectors, &tv) 664 // try to read next record (may not exist) 665 field, bytesRead = binary.Uvarint(value[currOffset:]) 666 } 667 if len(value[currOffset:]) > 0 && bytesRead <= 0 { 668 return fmt.Errorf("invalid term frequency value, vector field invalid") 669 } 670 671 return nil 672} 673 674func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { 675 rv, err := NewTermFrequencyRowK(key) 676 if err != nil { 677 return nil, err 678 } 679 680 err = rv.parseV(value, true) 681 if err != nil { 682 return nil, err 683 } 684 return rv, nil 685 686} 687 688type BackIndexRow struct { 689 doc []byte 690 termsEntries []*BackIndexTermsEntry 691 storedEntries []*BackIndexStoreEntry 692} 693 694func (br *BackIndexRow) AllTermKeys() [][]byte { 695 if br == nil { 696 return nil 697 } 698 rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely 699 for _, termsEntry := range br.termsEntries { 700 for i := range termsEntry.Terms { 701 termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0) 702 rv = append(rv, termRow.Key()) 703 } 704 } 705 return rv 706} 707 708func (br *BackIndexRow) AllStoredKeys() [][]byte { 709 if br == nil { 710 return nil 711 } 712 rv := make([][]byte, len(br.storedEntries)) 713 for i, storedEntry := range br.storedEntries { 714 storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) 715 rv[i] = storedRow.Key() 716 } 717 return rv 718} 719 720func (br *BackIndexRow) Key() []byte { 721 buf := make([]byte, br.KeySize()) 722 size, _ := br.KeyTo(buf) 723 return buf[:size] 724} 725 726func (br *BackIndexRow) KeySize() int { 727 return len(br.doc) + 1 728} 729 730func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { 731 buf[0] = 'b' 732 used := copy(buf[1:], br.doc) 733 return used + 1, nil 734} 735 736func (br *BackIndexRow) Value() []byte { 737 buf := make([]byte, br.ValueSize()) 738 size, _ := br.ValueTo(buf) 739 return buf[:size] 740} 741 742func (br *BackIndexRow) ValueSize() int { 743 birv := &BackIndexRowValue{ 744 TermsEntries: br.termsEntries, 745 StoredEntries: br.storedEntries, 746 } 747 return birv.Size() 748} 749 750func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { 751 birv := &BackIndexRowValue{ 752 TermsEntries: br.termsEntries, 753 StoredEntries: br.storedEntries, 754 } 755 return birv.MarshalTo(buf) 756} 757 758func (br *BackIndexRow) String() string { 759 return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries) 760} 761 762func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { 763 return &BackIndexRow{ 764 doc: docID, 765 termsEntries: entries, 766 storedEntries: storedFields, 767 } 768} 769 770func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { 771 rv := BackIndexRow{} 772 773 buf := bytes.NewBuffer(key) 774 _, err := buf.ReadByte() // type 775 if err != nil { 776 return nil, err 777 } 778 779 rv.doc, err = buf.ReadBytes(ByteSeparator) 780 if err == io.EOF && len(rv.doc) < 1 { 781 err = fmt.Errorf("invalid doc length 0 - % x", key) 782 } 783 if err != nil && err != io.EOF { 784 return nil, err 785 } else if err == nil { 786 rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte 787 } 788 789 var birv BackIndexRowValue 790 err = proto.Unmarshal(value, &birv) 791 if err != nil { 792 return nil, err 793 } 794 rv.termsEntries = birv.TermsEntries 795 rv.storedEntries = birv.StoredEntries 796 797 return &rv, nil 798} 799 800// STORED 801 802type StoredRow struct { 803 doc []byte 804 field uint16 805 arrayPositions []uint64 806 typ byte 807 value []byte 808} 809 810func (s *StoredRow) Key() []byte { 811 buf := make([]byte, s.KeySize()) 812 size, _ := s.KeyTo(buf) 813 return buf[0:size] 814} 815 816func (s *StoredRow) KeySize() int { 817 return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) 818} 819 820func (s *StoredRow) KeyTo(buf []byte) (int, error) { 821 docLen := len(s.doc) 822 buf[0] = 's' 823 copy(buf[1:], s.doc) 824 buf[1+docLen] = ByteSeparator 825 binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field) 826 bytesUsed := 1 + docLen + 1 + 2 827 for _, arrayPosition := range s.arrayPositions { 828 varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) 829 bytesUsed += varbytes 830 } 831 return bytesUsed, nil 832} 833 834func (s *StoredRow) Value() []byte { 835 buf := make([]byte, s.ValueSize()) 836 size, _ := s.ValueTo(buf) 837 return buf[:size] 838} 839 840func (s *StoredRow) ValueSize() int { 841 return len(s.value) + 1 842} 843 844func (s *StoredRow) ValueTo(buf []byte) (int, error) { 845 buf[0] = s.typ 846 used := copy(buf[1:], s.value) 847 return used + 1, nil 848} 849 850func (s *StoredRow) String() string { 851 return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value) 852} 853 854func (s *StoredRow) ScanPrefixForDoc() []byte { 855 docLen := len(s.doc) 856 buf := make([]byte, 1+docLen+1) 857 buf[0] = 's' 858 copy(buf[1:], s.doc) 859 buf[1+docLen] = ByteSeparator 860 return buf 861} 862 863func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { 864 return &StoredRow{ 865 doc: docID, 866 field: field, 867 arrayPositions: arrayPositions, 868 typ: typ, 869 value: value, 870 } 871} 872 873func NewStoredRowK(key []byte) (*StoredRow, error) { 874 rv := StoredRow{} 875 876 buf := bytes.NewBuffer(key) 877 _, err := buf.ReadByte() // type 878 if err != nil { 879 return nil, err 880 } 881 882 rv.doc, err = buf.ReadBytes(ByteSeparator) 883 if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator 884 err = fmt.Errorf("invalid doc length 0") 885 return nil, err 886 } 887 888 rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte 889 890 err = binary.Read(buf, binary.LittleEndian, &rv.field) 891 if err != nil { 892 return nil, err 893 } 894 895 rv.arrayPositions = make([]uint64, 0) 896 nextArrayPos, err := binary.ReadUvarint(buf) 897 for err == nil { 898 rv.arrayPositions = append(rv.arrayPositions, nextArrayPos) 899 nextArrayPos, err = binary.ReadUvarint(buf) 900 } 901 return &rv, nil 902} 903 904func NewStoredRowKV(key, value []byte) (*StoredRow, error) { 905 rv, err := NewStoredRowK(key) 906 if err != nil { 907 return nil, err 908 } 909 rv.typ = value[0] 910 rv.value = value[1:] 911 return rv, nil 912} 913 914type backIndexFieldTermVisitor func(field uint32, term []byte) 915 916// visitBackIndexRow is designed to process a protobuf encoded 917// value, without creating unnecessary garbage. Instead values are passed 918// to a callback, inspected first, and only copied if necessary. 919// Due to the fact that this borrows from generated code, it must be marnually 920// updated if the protobuf definition changes. 921// 922// This code originates from: 923// func (m *BackIndexRowValue) Unmarshal(data []byte) error 924// the sections which create garbage or parse unintersting sections 925// have been commented out. This was done by design to allow for easier 926// merging in the future if that original function is regenerated 927func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error { 928 l := len(data) 929 iNdEx := 0 930 for iNdEx < l { 931 var wire uint64 932 for shift := uint(0); ; shift += 7 { 933 if iNdEx >= l { 934 return io.ErrUnexpectedEOF 935 } 936 b := data[iNdEx] 937 iNdEx++ 938 wire |= (uint64(b) & 0x7F) << shift 939 if b < 0x80 { 940 break 941 } 942 } 943 fieldNum := int32(wire >> 3) 944 wireType := int(wire & 0x7) 945 switch fieldNum { 946 case 1: 947 if wireType != 2 { 948 return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType) 949 } 950 var msglen int 951 for shift := uint(0); ; shift += 7 { 952 if iNdEx >= l { 953 return io.ErrUnexpectedEOF 954 } 955 b := data[iNdEx] 956 iNdEx++ 957 msglen |= (int(b) & 0x7F) << shift 958 if b < 0x80 { 959 break 960 } 961 } 962 postIndex := iNdEx + msglen 963 if msglen < 0 { 964 return ErrInvalidLengthUpsidedown 965 } 966 if postIndex > l { 967 return io.ErrUnexpectedEOF 968 } 969 // dont parse term entries 970 // m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{}) 971 // if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { 972 // return err 973 // } 974 // instead, inspect them 975 if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil { 976 return err 977 } 978 iNdEx = postIndex 979 case 2: 980 if wireType != 2 { 981 return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType) 982 } 983 var msglen int 984 for shift := uint(0); ; shift += 7 { 985 if iNdEx >= l { 986 return io.ErrUnexpectedEOF 987 } 988 b := data[iNdEx] 989 iNdEx++ 990 msglen |= (int(b) & 0x7F) << shift 991 if b < 0x80 { 992 break 993 } 994 } 995 postIndex := iNdEx + msglen 996 if msglen < 0 { 997 return ErrInvalidLengthUpsidedown 998 } 999 if postIndex > l { 1000 return io.ErrUnexpectedEOF 1001 } 1002 // don't parse stored entries 1003 // m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{}) 1004 // if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { 1005 // return err 1006 // } 1007 iNdEx = postIndex 1008 default: 1009 var sizeOfWire int 1010 for { 1011 sizeOfWire++ 1012 wire >>= 7 1013 if wire == 0 { 1014 break 1015 } 1016 } 1017 iNdEx -= sizeOfWire 1018 skippy, err := skipUpsidedown(data[iNdEx:]) 1019 if err != nil { 1020 return err 1021 } 1022 if skippy < 0 { 1023 return ErrInvalidLengthUpsidedown 1024 } 1025 if (iNdEx + skippy) > l { 1026 return io.ErrUnexpectedEOF 1027 } 1028 // don't track unrecognized data 1029 //m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) 1030 iNdEx += skippy 1031 } 1032 } 1033 1034 return nil 1035} 1036 1037// visitBackIndexRowFieldTerms is designed to process a protobuf encoded 1038// sub-value within the BackIndexRowValue, without creating unnecessary garbage. 1039// Instead values are passed to a callback, inspected first, and only copied if 1040// necessary. Due to the fact that this borrows from generated code, it must 1041// be marnually updated if the protobuf definition changes. 1042// 1043// This code originates from: 1044// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error { 1045// the sections which create garbage or parse uninteresting sections 1046// have been commented out. This was done by design to allow for easier 1047// merging in the future if that original function is regenerated 1048func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error { 1049 var theField uint32 1050 1051 var hasFields [1]uint64 1052 l := len(data) 1053 iNdEx := 0 1054 for iNdEx < l { 1055 var wire uint64 1056 for shift := uint(0); ; shift += 7 { 1057 if iNdEx >= l { 1058 return io.ErrUnexpectedEOF 1059 } 1060 b := data[iNdEx] 1061 iNdEx++ 1062 wire |= (uint64(b) & 0x7F) << shift 1063 if b < 0x80 { 1064 break 1065 } 1066 } 1067 fieldNum := int32(wire >> 3) 1068 wireType := int(wire & 0x7) 1069 switch fieldNum { 1070 case 1: 1071 if wireType != 0 { 1072 return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) 1073 } 1074 var v uint32 1075 for shift := uint(0); ; shift += 7 { 1076 if iNdEx >= l { 1077 return io.ErrUnexpectedEOF 1078 } 1079 b := data[iNdEx] 1080 iNdEx++ 1081 v |= (uint32(b) & 0x7F) << shift 1082 if b < 0x80 { 1083 break 1084 } 1085 } 1086 // m.Field = &v 1087 theField = v 1088 hasFields[0] |= uint64(0x00000001) 1089 case 2: 1090 if wireType != 2 { 1091 return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType) 1092 } 1093 var stringLen uint64 1094 for shift := uint(0); ; shift += 7 { 1095 if iNdEx >= l { 1096 return io.ErrUnexpectedEOF 1097 } 1098 b := data[iNdEx] 1099 iNdEx++ 1100 stringLen |= (uint64(b) & 0x7F) << shift 1101 if b < 0x80 { 1102 break 1103 } 1104 } 1105 postIndex := iNdEx + int(stringLen) 1106 if postIndex > l { 1107 return io.ErrUnexpectedEOF 1108 } 1109 //m.Terms = append(m.Terms, string(data[iNdEx:postIndex])) 1110 callback(theField, data[iNdEx:postIndex]) 1111 iNdEx = postIndex 1112 default: 1113 var sizeOfWire int 1114 for { 1115 sizeOfWire++ 1116 wire >>= 7 1117 if wire == 0 { 1118 break 1119 } 1120 } 1121 iNdEx -= sizeOfWire 1122 skippy, err := skipUpsidedown(data[iNdEx:]) 1123 if err != nil { 1124 return err 1125 } 1126 if skippy < 0 { 1127 return ErrInvalidLengthUpsidedown 1128 } 1129 if (iNdEx + skippy) > l { 1130 return io.ErrUnexpectedEOF 1131 } 1132 //m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) 1133 iNdEx += skippy 1134 } 1135 } 1136 // if hasFields[0]&uint64(0x00000001) == 0 { 1137 // return new(github_com_golang_protobuf_proto.RequiredNotSetError) 1138 // } 1139 1140 return nil 1141} 1142