1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package upsidedown
16
17import (
18	"bytes"
19	"encoding/binary"
20	"fmt"
21	"io"
22	"math"
23	"reflect"
24
25	"github.com/blevesearch/bleve/size"
26	"github.com/golang/protobuf/proto"
27)
28
29var reflectStaticSizeTermFrequencyRow int
30var reflectStaticSizeTermVector int
31
32func init() {
33	var tfr TermFrequencyRow
34	reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
35	var tv TermVector
36	reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
37}
38
39const ByteSeparator byte = 0xff
40
41type UpsideDownCouchRowStream chan UpsideDownCouchRow
42
43type UpsideDownCouchRow interface {
44	KeySize() int
45	KeyTo([]byte) (int, error)
46	Key() []byte
47	Value() []byte
48	ValueSize() int
49	ValueTo([]byte) (int, error)
50}
51
52func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
53	if len(key) > 0 {
54		switch key[0] {
55		case 'v':
56			return NewVersionRowKV(key, value)
57		case 'f':
58			return NewFieldRowKV(key, value)
59		case 'd':
60			return NewDictionaryRowKV(key, value)
61		case 't':
62			return NewTermFrequencyRowKV(key, value)
63		case 'b':
64			return NewBackIndexRowKV(key, value)
65		case 's':
66			return NewStoredRowKV(key, value)
67		case 'i':
68			return NewInternalRowKV(key, value)
69		}
70		return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
71	}
72	return nil, fmt.Errorf("Invalid empty key")
73}
74
75// VERSION
76
77type VersionRow struct {
78	version uint8
79}
80
81func (v *VersionRow) Key() []byte {
82	return []byte{'v'}
83}
84
85func (v *VersionRow) KeySize() int {
86	return 1
87}
88
89func (v *VersionRow) KeyTo(buf []byte) (int, error) {
90	buf[0] = 'v'
91	return 1, nil
92}
93
94func (v *VersionRow) Value() []byte {
95	return []byte{byte(v.version)}
96}
97
98func (v *VersionRow) ValueSize() int {
99	return 1
100}
101
102func (v *VersionRow) ValueTo(buf []byte) (int, error) {
103	buf[0] = v.version
104	return 1, nil
105}
106
107func (v *VersionRow) String() string {
108	return fmt.Sprintf("Version: %d", v.version)
109}
110
111func NewVersionRow(version uint8) *VersionRow {
112	return &VersionRow{
113		version: version,
114	}
115}
116
117func NewVersionRowKV(key, value []byte) (*VersionRow, error) {
118	rv := VersionRow{}
119	buf := bytes.NewBuffer(value)
120	err := binary.Read(buf, binary.LittleEndian, &rv.version)
121	if err != nil {
122		return nil, err
123	}
124	return &rv, nil
125}
126
127// INTERNAL STORAGE
128
129type InternalRow struct {
130	key []byte
131	val []byte
132}
133
134func (i *InternalRow) Key() []byte {
135	buf := make([]byte, i.KeySize())
136	size, _ := i.KeyTo(buf)
137	return buf[:size]
138}
139
140func (i *InternalRow) KeySize() int {
141	return len(i.key) + 1
142}
143
144func (i *InternalRow) KeyTo(buf []byte) (int, error) {
145	buf[0] = 'i'
146	actual := copy(buf[1:], i.key)
147	return 1 + actual, nil
148}
149
150func (i *InternalRow) Value() []byte {
151	return i.val
152}
153
154func (i *InternalRow) ValueSize() int {
155	return len(i.val)
156}
157
158func (i *InternalRow) ValueTo(buf []byte) (int, error) {
159	actual := copy(buf, i.val)
160	return actual, nil
161}
162
163func (i *InternalRow) String() string {
164	return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val)
165}
166
167func NewInternalRow(key, val []byte) *InternalRow {
168	return &InternalRow{
169		key: key,
170		val: val,
171	}
172}
173
174func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
175	rv := InternalRow{}
176	rv.key = key[1:]
177	rv.val = value
178	return &rv, nil
179}
180
181// FIELD definition
182
183type FieldRow struct {
184	index uint16
185	name  string
186}
187
188func (f *FieldRow) Key() []byte {
189	buf := make([]byte, f.KeySize())
190	size, _ := f.KeyTo(buf)
191	return buf[:size]
192}
193
194func (f *FieldRow) KeySize() int {
195	return 3
196}
197
198func (f *FieldRow) KeyTo(buf []byte) (int, error) {
199	buf[0] = 'f'
200	binary.LittleEndian.PutUint16(buf[1:3], f.index)
201	return 3, nil
202}
203
204func (f *FieldRow) Value() []byte {
205	return append([]byte(f.name), ByteSeparator)
206}
207
208func (f *FieldRow) ValueSize() int {
209	return len(f.name) + 1
210}
211
212func (f *FieldRow) ValueTo(buf []byte) (int, error) {
213	size := copy(buf, f.name)
214	buf[size] = ByteSeparator
215	return size + 1, nil
216}
217
218func (f *FieldRow) String() string {
219	return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
220}
221
222func NewFieldRow(index uint16, name string) *FieldRow {
223	return &FieldRow{
224		index: index,
225		name:  name,
226	}
227}
228
229func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
230	rv := FieldRow{}
231
232	buf := bytes.NewBuffer(key)
233	_, err := buf.ReadByte() // type
234	if err != nil {
235		return nil, err
236	}
237	err = binary.Read(buf, binary.LittleEndian, &rv.index)
238	if err != nil {
239		return nil, err
240	}
241
242	buf = bytes.NewBuffer(value)
243	rv.name, err = buf.ReadString(ByteSeparator)
244	if err != nil {
245		return nil, err
246	}
247	rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
248
249	return &rv, nil
250}
251
252// DICTIONARY
253
254const DictionaryRowMaxValueSize = binary.MaxVarintLen64
255
256type DictionaryRow struct {
257	term  []byte
258	count uint64
259	field uint16
260}
261
262func (dr *DictionaryRow) Key() []byte {
263	buf := make([]byte, dr.KeySize())
264	size, _ := dr.KeyTo(buf)
265	return buf[:size]
266}
267
268func (dr *DictionaryRow) KeySize() int {
269	return dictionaryRowKeySize(dr.term)
270}
271
272func dictionaryRowKeySize(term []byte) int {
273	return len(term) + 3
274}
275
276func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
277	return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
278}
279
280func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
281	buf[0] = 'd'
282	binary.LittleEndian.PutUint16(buf[1:3], field)
283	size := copy(buf[3:], term)
284	return size + 3
285}
286
287func (dr *DictionaryRow) Value() []byte {
288	buf := make([]byte, dr.ValueSize())
289	size, _ := dr.ValueTo(buf)
290	return buf[:size]
291}
292
293func (dr *DictionaryRow) ValueSize() int {
294	return DictionaryRowMaxValueSize
295}
296
297func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
298	used := binary.PutUvarint(buf, dr.count)
299	return used, nil
300}
301
302func (dr *DictionaryRow) String() string {
303	return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
304}
305
306func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
307	return &DictionaryRow{
308		term:  term,
309		field: field,
310		count: count,
311	}
312}
313
314func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
315	rv, err := NewDictionaryRowK(key)
316	if err != nil {
317		return nil, err
318	}
319
320	err = rv.parseDictionaryV(value)
321	if err != nil {
322		return nil, err
323	}
324	return rv, nil
325
326}
327
328func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
329	rv := &DictionaryRow{}
330	err := rv.parseDictionaryK(key)
331	if err != nil {
332		return nil, err
333	}
334	return rv, nil
335}
336
337func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
338	dr.field = binary.LittleEndian.Uint16(key[1:3])
339	if dr.term != nil {
340		dr.term = dr.term[:0]
341	}
342	dr.term = append(dr.term, key[3:]...)
343	return nil
344}
345
346func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
347	count, err := dictionaryRowParseV(value)
348	if err != nil {
349		return err
350	}
351	dr.count = count
352	return nil
353}
354
355func dictionaryRowParseV(value []byte) (uint64, error) {
356	count, nread := binary.Uvarint(value)
357	if nread <= 0 {
358		return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
359	}
360	return count, nil
361}
362
363// TERM FIELD FREQUENCY
364
365type TermVector struct {
366	field          uint16
367	arrayPositions []uint64
368	pos            uint64
369	start          uint64
370	end            uint64
371}
372
373func (tv *TermVector) Size() int {
374	return reflectStaticSizeTermVector + size.SizeOfPtr +
375		len(tv.arrayPositions)*size.SizeOfUint64
376}
377
378func (tv *TermVector) String() string {
379	return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
380}
381
382type TermFrequencyRow struct {
383	term    []byte
384	doc     []byte
385	freq    uint64
386	vectors []*TermVector
387	norm    float32
388	field   uint16
389}
390
391func (tfr *TermFrequencyRow) Size() int {
392	sizeInBytes := reflectStaticSizeTermFrequencyRow +
393		len(tfr.term) +
394		len(tfr.doc)
395
396	for _, entry := range tfr.vectors {
397		sizeInBytes += entry.Size()
398	}
399
400	return sizeInBytes
401}
402
403func (tfr *TermFrequencyRow) Term() []byte {
404	return tfr.term
405}
406
407func (tfr *TermFrequencyRow) Freq() uint64 {
408	return tfr.freq
409}
410
411func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
412	buf := make([]byte, 3)
413	buf[0] = 't'
414	binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
415	return buf
416}
417
418func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
419	buf := make([]byte, 3+len(tfr.term))
420	buf[0] = 't'
421	binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
422	copy(buf[3:], tfr.term)
423	return buf
424}
425
426func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
427	buf := make([]byte, 3+len(tfr.term)+1)
428	buf[0] = 't'
429	binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
430	termLen := copy(buf[3:], tfr.term)
431	buf[3+termLen] = ByteSeparator
432	return buf
433}
434
435func (tfr *TermFrequencyRow) Key() []byte {
436	buf := make([]byte, tfr.KeySize())
437	size, _ := tfr.KeyTo(buf)
438	return buf[:size]
439}
440
441func (tfr *TermFrequencyRow) KeySize() int {
442	return termFrequencyRowKeySize(tfr.term, tfr.doc)
443}
444
445func termFrequencyRowKeySize(term, doc []byte) int {
446	return 3 + len(term) + 1 + len(doc)
447}
448
449func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
450	return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
451}
452
453func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
454	buf[0] = 't'
455	binary.LittleEndian.PutUint16(buf[1:3], field)
456	termLen := copy(buf[3:], term)
457	buf[3+termLen] = ByteSeparator
458	docLen := copy(buf[3+termLen+1:], doc)
459	return 3 + termLen + 1 + docLen
460}
461
462func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
463	keySize := tfr.KeySize()
464	if cap(buf) < keySize {
465		buf = make([]byte, keySize)
466	}
467	actualSize, err := tfr.KeyTo(buf[0:keySize])
468	return buf[0:actualSize], err
469}
470
471func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
472	dr := NewDictionaryRow(tfr.term, tfr.field, 0)
473	return dr.Key()
474}
475
476func (tfr *TermFrequencyRow) DictionaryRowKeySize() int {
477	dr := NewDictionaryRow(tfr.term, tfr.field, 0)
478	return dr.KeySize()
479}
480
481func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) {
482	dr := NewDictionaryRow(tfr.term, tfr.field, 0)
483	return dr.KeyTo(buf)
484}
485
486func (tfr *TermFrequencyRow) Value() []byte {
487	buf := make([]byte, tfr.ValueSize())
488	size, _ := tfr.ValueTo(buf)
489	return buf[:size]
490}
491
492func (tfr *TermFrequencyRow) ValueSize() int {
493	bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64
494	for _, vector := range tfr.vectors {
495		bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64
496	}
497	return bufLen
498}
499
500func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) {
501	used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq)
502
503	normuint32 := math.Float32bits(tfr.norm)
504	newbuf := buf[used : used+binary.MaxVarintLen64]
505	used += binary.PutUvarint(newbuf, uint64(normuint32))
506
507	for _, vector := range tfr.vectors {
508		used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field))
509		used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos)
510		used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start)
511		used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end)
512		used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions)))
513		for _, arrayPosition := range vector.arrayPositions {
514			used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition)
515		}
516	}
517	return used, nil
518}
519
520func (tfr *TermFrequencyRow) String() string {
521	return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
522}
523
524func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
525	tfr.term = term
526	tfr.field = field
527	tfr.doc = docID
528	tfr.freq = freq
529	tfr.norm = norm
530	return tfr
531}
532
533func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
534	return &TermFrequencyRow{
535		term:  term,
536		field: field,
537		doc:   docID,
538		freq:  freq,
539		norm:  norm,
540	}
541}
542
543func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
544	return &TermFrequencyRow{
545		term:    term,
546		field:   field,
547		doc:     docID,
548		freq:    freq,
549		norm:    norm,
550		vectors: vectors,
551	}
552}
553
554func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
555	rv := &TermFrequencyRow{}
556	err := rv.parseK(key)
557	if err != nil {
558		return nil, err
559	}
560	return rv, nil
561}
562
563func (tfr *TermFrequencyRow) parseK(key []byte) error {
564	keyLen := len(key)
565	if keyLen < 3 {
566		return fmt.Errorf("invalid term frequency key, no valid field")
567	}
568	tfr.field = binary.LittleEndian.Uint16(key[1:3])
569
570	termEndPos := bytes.IndexByte(key[3:], ByteSeparator)
571	if termEndPos < 0 {
572		return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
573	}
574	tfr.term = key[3 : 3+termEndPos]
575
576	docLen := keyLen - (3 + termEndPos + 1)
577	if docLen < 1 {
578		return fmt.Errorf("invalid term frequency key, empty docid")
579	}
580	tfr.doc = key[3+termEndPos+1:]
581
582	return nil
583}
584
585func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
586	tfr.doc = key[3+len(term)+1:]
587	if len(tfr.doc) == 0 {
588		return fmt.Errorf("invalid term frequency key, empty docid")
589	}
590
591	return nil
592}
593
594func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
595	var bytesRead int
596	tfr.freq, bytesRead = binary.Uvarint(value)
597	if bytesRead <= 0 {
598		return fmt.Errorf("invalid term frequency value, invalid frequency")
599	}
600	currOffset := bytesRead
601
602	var norm uint64
603	norm, bytesRead = binary.Uvarint(value[currOffset:])
604	if bytesRead <= 0 {
605		return fmt.Errorf("invalid term frequency value, no norm")
606	}
607	currOffset += bytesRead
608
609	tfr.norm = math.Float32frombits(uint32(norm))
610
611	tfr.vectors = nil
612	if !includeTermVectors {
613		return nil
614	}
615
616	var field uint64
617	field, bytesRead = binary.Uvarint(value[currOffset:])
618	for bytesRead > 0 {
619		currOffset += bytesRead
620		tv := TermVector{}
621		tv.field = uint16(field)
622		// at this point we expect at least one term vector
623		if tfr.vectors == nil {
624			tfr.vectors = make([]*TermVector, 0)
625		}
626
627		tv.pos, bytesRead = binary.Uvarint(value[currOffset:])
628		if bytesRead <= 0 {
629			return fmt.Errorf("invalid term frequency value, vector contains no position")
630		}
631		currOffset += bytesRead
632
633		tv.start, bytesRead = binary.Uvarint(value[currOffset:])
634		if bytesRead <= 0 {
635			return fmt.Errorf("invalid term frequency value, vector contains no start")
636		}
637		currOffset += bytesRead
638
639		tv.end, bytesRead = binary.Uvarint(value[currOffset:])
640		if bytesRead <= 0 {
641			return fmt.Errorf("invalid term frequency value, vector contains no end")
642		}
643		currOffset += bytesRead
644
645		var arrayPositionsLen uint64 = 0
646		arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:])
647		if bytesRead <= 0 {
648			return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen")
649		}
650		currOffset += bytesRead
651
652		if arrayPositionsLen > 0 {
653			tv.arrayPositions = make([]uint64, arrayPositionsLen)
654			for i := 0; uint64(i) < arrayPositionsLen; i++ {
655				tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:])
656				if bytesRead <= 0 {
657					return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i)
658				}
659				currOffset += bytesRead
660			}
661		}
662
663		tfr.vectors = append(tfr.vectors, &tv)
664		// try to read next record (may not exist)
665		field, bytesRead = binary.Uvarint(value[currOffset:])
666	}
667	if len(value[currOffset:]) > 0 && bytesRead <= 0 {
668		return fmt.Errorf("invalid term frequency value, vector field invalid")
669	}
670
671	return nil
672}
673
674func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
675	rv, err := NewTermFrequencyRowK(key)
676	if err != nil {
677		return nil, err
678	}
679
680	err = rv.parseV(value, true)
681	if err != nil {
682		return nil, err
683	}
684	return rv, nil
685
686}
687
688type BackIndexRow struct {
689	doc           []byte
690	termsEntries  []*BackIndexTermsEntry
691	storedEntries []*BackIndexStoreEntry
692}
693
694func (br *BackIndexRow) AllTermKeys() [][]byte {
695	if br == nil {
696		return nil
697	}
698	rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
699	for _, termsEntry := range br.termsEntries {
700		for i := range termsEntry.Terms {
701			termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
702			rv = append(rv, termRow.Key())
703		}
704	}
705	return rv
706}
707
708func (br *BackIndexRow) AllStoredKeys() [][]byte {
709	if br == nil {
710		return nil
711	}
712	rv := make([][]byte, len(br.storedEntries))
713	for i, storedEntry := range br.storedEntries {
714		storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
715		rv[i] = storedRow.Key()
716	}
717	return rv
718}
719
720func (br *BackIndexRow) Key() []byte {
721	buf := make([]byte, br.KeySize())
722	size, _ := br.KeyTo(buf)
723	return buf[:size]
724}
725
726func (br *BackIndexRow) KeySize() int {
727	return len(br.doc) + 1
728}
729
730func (br *BackIndexRow) KeyTo(buf []byte) (int, error) {
731	buf[0] = 'b'
732	used := copy(buf[1:], br.doc)
733	return used + 1, nil
734}
735
736func (br *BackIndexRow) Value() []byte {
737	buf := make([]byte, br.ValueSize())
738	size, _ := br.ValueTo(buf)
739	return buf[:size]
740}
741
742func (br *BackIndexRow) ValueSize() int {
743	birv := &BackIndexRowValue{
744		TermsEntries:  br.termsEntries,
745		StoredEntries: br.storedEntries,
746	}
747	return birv.Size()
748}
749
750func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
751	birv := &BackIndexRowValue{
752		TermsEntries:  br.termsEntries,
753		StoredEntries: br.storedEntries,
754	}
755	return birv.MarshalTo(buf)
756}
757
758func (br *BackIndexRow) String() string {
759	return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
760}
761
762func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
763	return &BackIndexRow{
764		doc:           docID,
765		termsEntries:  entries,
766		storedEntries: storedFields,
767	}
768}
769
770func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
771	rv := BackIndexRow{}
772
773	buf := bytes.NewBuffer(key)
774	_, err := buf.ReadByte() // type
775	if err != nil {
776		return nil, err
777	}
778
779	rv.doc, err = buf.ReadBytes(ByteSeparator)
780	if err == io.EOF && len(rv.doc) < 1 {
781		err = fmt.Errorf("invalid doc length 0 - % x", key)
782	}
783	if err != nil && err != io.EOF {
784		return nil, err
785	} else if err == nil {
786		rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
787	}
788
789	var birv BackIndexRowValue
790	err = proto.Unmarshal(value, &birv)
791	if err != nil {
792		return nil, err
793	}
794	rv.termsEntries = birv.TermsEntries
795	rv.storedEntries = birv.StoredEntries
796
797	return &rv, nil
798}
799
800// STORED
801
802type StoredRow struct {
803	doc            []byte
804	field          uint16
805	arrayPositions []uint64
806	typ            byte
807	value          []byte
808}
809
810func (s *StoredRow) Key() []byte {
811	buf := make([]byte, s.KeySize())
812	size, _ := s.KeyTo(buf)
813	return buf[0:size]
814}
815
816func (s *StoredRow) KeySize() int {
817	return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions))
818}
819
820func (s *StoredRow) KeyTo(buf []byte) (int, error) {
821	docLen := len(s.doc)
822	buf[0] = 's'
823	copy(buf[1:], s.doc)
824	buf[1+docLen] = ByteSeparator
825	binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
826	bytesUsed := 1 + docLen + 1 + 2
827	for _, arrayPosition := range s.arrayPositions {
828		varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
829		bytesUsed += varbytes
830	}
831	return bytesUsed, nil
832}
833
834func (s *StoredRow) Value() []byte {
835	buf := make([]byte, s.ValueSize())
836	size, _ := s.ValueTo(buf)
837	return buf[:size]
838}
839
840func (s *StoredRow) ValueSize() int {
841	return len(s.value) + 1
842}
843
844func (s *StoredRow) ValueTo(buf []byte) (int, error) {
845	buf[0] = s.typ
846	used := copy(buf[1:], s.value)
847	return used + 1, nil
848}
849
850func (s *StoredRow) String() string {
851	return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
852}
853
854func (s *StoredRow) ScanPrefixForDoc() []byte {
855	docLen := len(s.doc)
856	buf := make([]byte, 1+docLen+1)
857	buf[0] = 's'
858	copy(buf[1:], s.doc)
859	buf[1+docLen] = ByteSeparator
860	return buf
861}
862
863func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
864	return &StoredRow{
865		doc:            docID,
866		field:          field,
867		arrayPositions: arrayPositions,
868		typ:            typ,
869		value:          value,
870	}
871}
872
873func NewStoredRowK(key []byte) (*StoredRow, error) {
874	rv := StoredRow{}
875
876	buf := bytes.NewBuffer(key)
877	_, err := buf.ReadByte() // type
878	if err != nil {
879		return nil, err
880	}
881
882	rv.doc, err = buf.ReadBytes(ByteSeparator)
883	if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
884		err = fmt.Errorf("invalid doc length 0")
885		return nil, err
886	}
887
888	rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
889
890	err = binary.Read(buf, binary.LittleEndian, &rv.field)
891	if err != nil {
892		return nil, err
893	}
894
895	rv.arrayPositions = make([]uint64, 0)
896	nextArrayPos, err := binary.ReadUvarint(buf)
897	for err == nil {
898		rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
899		nextArrayPos, err = binary.ReadUvarint(buf)
900	}
901	return &rv, nil
902}
903
904func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
905	rv, err := NewStoredRowK(key)
906	if err != nil {
907		return nil, err
908	}
909	rv.typ = value[0]
910	rv.value = value[1:]
911	return rv, nil
912}
913
914type backIndexFieldTermVisitor func(field uint32, term []byte)
915
916// visitBackIndexRow is designed to process a protobuf encoded
917// value, without creating unnecessary garbage.  Instead values are passed
918// to a callback, inspected first, and only copied if necessary.
919// Due to the fact that this borrows from generated code, it must be marnually
920// updated if the protobuf definition changes.
921//
922// This code originates from:
923// func (m *BackIndexRowValue) Unmarshal(data []byte) error
924// the sections which create garbage or parse unintersting sections
925// have been commented out.  This was done by design to allow for easier
926// merging in the future if that original function is regenerated
927func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
928	l := len(data)
929	iNdEx := 0
930	for iNdEx < l {
931		var wire uint64
932		for shift := uint(0); ; shift += 7 {
933			if iNdEx >= l {
934				return io.ErrUnexpectedEOF
935			}
936			b := data[iNdEx]
937			iNdEx++
938			wire |= (uint64(b) & 0x7F) << shift
939			if b < 0x80 {
940				break
941			}
942		}
943		fieldNum := int32(wire >> 3)
944		wireType := int(wire & 0x7)
945		switch fieldNum {
946		case 1:
947			if wireType != 2 {
948				return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
949			}
950			var msglen int
951			for shift := uint(0); ; shift += 7 {
952				if iNdEx >= l {
953					return io.ErrUnexpectedEOF
954				}
955				b := data[iNdEx]
956				iNdEx++
957				msglen |= (int(b) & 0x7F) << shift
958				if b < 0x80 {
959					break
960				}
961			}
962			postIndex := iNdEx + msglen
963			if msglen < 0 {
964				return ErrInvalidLengthUpsidedown
965			}
966			if postIndex > l {
967				return io.ErrUnexpectedEOF
968			}
969			// dont parse term entries
970			// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
971			// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
972			// 	return err
973			// }
974			// instead, inspect them
975			if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
976				return err
977			}
978			iNdEx = postIndex
979		case 2:
980			if wireType != 2 {
981				return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
982			}
983			var msglen int
984			for shift := uint(0); ; shift += 7 {
985				if iNdEx >= l {
986					return io.ErrUnexpectedEOF
987				}
988				b := data[iNdEx]
989				iNdEx++
990				msglen |= (int(b) & 0x7F) << shift
991				if b < 0x80 {
992					break
993				}
994			}
995			postIndex := iNdEx + msglen
996			if msglen < 0 {
997				return ErrInvalidLengthUpsidedown
998			}
999			if postIndex > l {
1000				return io.ErrUnexpectedEOF
1001			}
1002			// don't parse stored entries
1003			// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
1004			// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
1005			// 	return err
1006			// }
1007			iNdEx = postIndex
1008		default:
1009			var sizeOfWire int
1010			for {
1011				sizeOfWire++
1012				wire >>= 7
1013				if wire == 0 {
1014					break
1015				}
1016			}
1017			iNdEx -= sizeOfWire
1018			skippy, err := skipUpsidedown(data[iNdEx:])
1019			if err != nil {
1020				return err
1021			}
1022			if skippy < 0 {
1023				return ErrInvalidLengthUpsidedown
1024			}
1025			if (iNdEx + skippy) > l {
1026				return io.ErrUnexpectedEOF
1027			}
1028			// don't track unrecognized data
1029			//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
1030			iNdEx += skippy
1031		}
1032	}
1033
1034	return nil
1035}
1036
1037// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
1038// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
1039// Instead values are passed to a callback, inspected first, and only copied if
1040// necessary.  Due to the fact that this borrows from generated code, it must
1041// be marnually updated if the protobuf definition changes.
1042//
1043// This code originates from:
1044// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
1045// the sections which create garbage or parse uninteresting sections
1046// have been commented out.  This was done by design to allow for easier
1047// merging in the future if that original function is regenerated
1048func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
1049	var theField uint32
1050
1051	var hasFields [1]uint64
1052	l := len(data)
1053	iNdEx := 0
1054	for iNdEx < l {
1055		var wire uint64
1056		for shift := uint(0); ; shift += 7 {
1057			if iNdEx >= l {
1058				return io.ErrUnexpectedEOF
1059			}
1060			b := data[iNdEx]
1061			iNdEx++
1062			wire |= (uint64(b) & 0x7F) << shift
1063			if b < 0x80 {
1064				break
1065			}
1066		}
1067		fieldNum := int32(wire >> 3)
1068		wireType := int(wire & 0x7)
1069		switch fieldNum {
1070		case 1:
1071			if wireType != 0 {
1072				return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
1073			}
1074			var v uint32
1075			for shift := uint(0); ; shift += 7 {
1076				if iNdEx >= l {
1077					return io.ErrUnexpectedEOF
1078				}
1079				b := data[iNdEx]
1080				iNdEx++
1081				v |= (uint32(b) & 0x7F) << shift
1082				if b < 0x80 {
1083					break
1084				}
1085			}
1086			// m.Field = &v
1087			theField = v
1088			hasFields[0] |= uint64(0x00000001)
1089		case 2:
1090			if wireType != 2 {
1091				return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
1092			}
1093			var stringLen uint64
1094			for shift := uint(0); ; shift += 7 {
1095				if iNdEx >= l {
1096					return io.ErrUnexpectedEOF
1097				}
1098				b := data[iNdEx]
1099				iNdEx++
1100				stringLen |= (uint64(b) & 0x7F) << shift
1101				if b < 0x80 {
1102					break
1103				}
1104			}
1105			postIndex := iNdEx + int(stringLen)
1106			if postIndex > l {
1107				return io.ErrUnexpectedEOF
1108			}
1109			//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
1110			callback(theField, data[iNdEx:postIndex])
1111			iNdEx = postIndex
1112		default:
1113			var sizeOfWire int
1114			for {
1115				sizeOfWire++
1116				wire >>= 7
1117				if wire == 0 {
1118					break
1119				}
1120			}
1121			iNdEx -= sizeOfWire
1122			skippy, err := skipUpsidedown(data[iNdEx:])
1123			if err != nil {
1124				return err
1125			}
1126			if skippy < 0 {
1127				return ErrInvalidLengthUpsidedown
1128			}
1129			if (iNdEx + skippy) > l {
1130				return io.ErrUnexpectedEOF
1131			}
1132			//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
1133			iNdEx += skippy
1134		}
1135	}
1136	// if hasFields[0]&uint64(0x00000001) == 0 {
1137	// 	return new(github_com_golang_protobuf_proto.RequiredNotSetError)
1138	// }
1139
1140	return nil
1141}
1142