1//  Copyright (c) 2018 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"bytes"
19	"encoding/binary"
20	"math"
21	"sort"
22	"sync"
23
24	"github.com/RoaringBitmap/roaring"
25	"github.com/blevesearch/bleve/analysis"
26	"github.com/blevesearch/bleve/document"
27	"github.com/blevesearch/bleve/index"
28	"github.com/couchbase/vellum"
29	"github.com/golang/snappy"
30)
31
32var NewSegmentBufferNumResultsBump int = 100
33var NewSegmentBufferNumResultsFactor float64 = 1.0
34var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
35
36// ValidateDocFields can be set by applications to perform additional checks
37// on fields in a document being added to a new segment, by default it does
38// nothing.
39// This API is experimental and may be removed at any time.
40var ValidateDocFields = func(field document.Field) error {
41	return nil
42}
43
44// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
45// SegmentBase from analysis results
46func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
47	chunkFactor uint32) (*SegmentBase, uint64, error) {
48	s := interimPool.Get().(*interim)
49
50	var br bytes.Buffer
51	if s.lastNumDocs > 0 {
52		// use previous results to initialize the buf with an estimate
53		// size, but note that the interim instance comes from a
54		// global interimPool, so multiple scorch instances indexing
55		// different docs can lead to low quality estimates
56		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
57			NewSegmentBufferNumResultsFactor)
58		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
59			NewSegmentBufferAvgBytesPerDocFactor)
60		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
61	}
62
63	s.results = results
64	s.chunkFactor = chunkFactor
65	s.w = NewCountHashWriter(&br)
66
67	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
68		err := s.convert()
69	if err != nil {
70		return nil, uint64(0), err
71	}
72
73	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
74		s.FieldsMap, s.FieldsInv, uint64(len(results)),
75		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
76
77	if err == nil && s.reset() == nil {
78		s.lastNumDocs = len(results)
79		s.lastOutSize = len(br.Bytes())
80		interimPool.Put(s)
81	}
82
83	return sb, uint64(len(br.Bytes())), err
84}
85
86var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
87
88// interim holds temporary working data used while converting from
89// analysis results to a zap-encoded segment
90type interim struct {
91	results []*index.AnalysisResult
92
93	chunkFactor uint32
94
95	w *CountHashWriter
96
97	// FieldsMap adds 1 to field id to avoid zero value issues
98	//  name -> field id + 1
99	FieldsMap map[string]uint16
100
101	// FieldsInv is the inverse of FieldsMap
102	//  field id -> name
103	FieldsInv []string
104
105	// Term dictionaries for each field
106	//  field id -> term -> postings list id + 1
107	Dicts []map[string]uint64
108
109	// Terms for each field, where terms are sorted ascending
110	//  field id -> []term
111	DictKeys [][]string
112
113	// Fields whose IncludeDocValues is true
114	//  field id -> bool
115	IncludeDocValues []bool
116
117	// postings id -> bitmap of docNums
118	Postings []*roaring.Bitmap
119
120	// postings id -> freq/norm's, one for each docNum in postings
121	FreqNorms        [][]interimFreqNorm
122	freqNormsBacking []interimFreqNorm
123
124	// postings id -> locs, one for each freq
125	Locs        [][]interimLoc
126	locsBacking []interimLoc
127
128	numTermsPerPostingsList []int // key is postings list id
129	numLocsPerPostingsList  []int // key is postings list id
130
131	builder    *vellum.Builder
132	builderBuf bytes.Buffer
133
134	metaBuf bytes.Buffer
135
136	tmp0 []byte
137	tmp1 []byte
138
139	lastNumDocs int
140	lastOutSize int
141}
142
143func (s *interim) reset() (err error) {
144	s.results = nil
145	s.chunkFactor = 0
146	s.w = nil
147	s.FieldsMap = nil
148	s.FieldsInv = nil
149	for i := range s.Dicts {
150		s.Dicts[i] = nil
151	}
152	s.Dicts = s.Dicts[:0]
153	for i := range s.DictKeys {
154		s.DictKeys[i] = s.DictKeys[i][:0]
155	}
156	s.DictKeys = s.DictKeys[:0]
157	for i := range s.IncludeDocValues {
158		s.IncludeDocValues[i] = false
159	}
160	s.IncludeDocValues = s.IncludeDocValues[:0]
161	for _, idn := range s.Postings {
162		idn.Clear()
163	}
164	s.Postings = s.Postings[:0]
165	s.FreqNorms = s.FreqNorms[:0]
166	for i := range s.freqNormsBacking {
167		s.freqNormsBacking[i] = interimFreqNorm{}
168	}
169	s.freqNormsBacking = s.freqNormsBacking[:0]
170	s.Locs = s.Locs[:0]
171	for i := range s.locsBacking {
172		s.locsBacking[i] = interimLoc{}
173	}
174	s.locsBacking = s.locsBacking[:0]
175	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
176	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
177	s.builderBuf.Reset()
178	if s.builder != nil {
179		err = s.builder.Reset(&s.builderBuf)
180	}
181	s.metaBuf.Reset()
182	s.tmp0 = s.tmp0[:0]
183	s.tmp1 = s.tmp1[:0]
184	s.lastNumDocs = 0
185	s.lastOutSize = 0
186
187	return err
188}
189
190func (s *interim) grabBuf(size int) []byte {
191	buf := s.tmp0
192	if cap(buf) < size {
193		buf = make([]byte, size)
194		s.tmp0 = buf
195	}
196	return buf[0:size]
197}
198
199type interimStoredField struct {
200	vals      [][]byte
201	typs      []byte
202	arrayposs [][]uint64 // array positions
203}
204
205type interimFreqNorm struct {
206	freq    uint64
207	norm    float32
208	numLocs int
209}
210
211type interimLoc struct {
212	fieldID   uint16
213	pos       uint64
214	start     uint64
215	end       uint64
216	arrayposs []uint64
217}
218
219func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
220	s.FieldsMap = map[string]uint16{}
221
222	s.getOrDefineField("_id") // _id field is fieldID 0
223
224	for _, result := range s.results {
225		for _, field := range result.Document.CompositeFields {
226			s.getOrDefineField(field.Name())
227		}
228		for _, field := range result.Document.Fields {
229			s.getOrDefineField(field.Name())
230		}
231	}
232
233	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
234
235	for fieldID, fieldName := range s.FieldsInv {
236		s.FieldsMap[fieldName] = uint16(fieldID + 1)
237	}
238
239	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
240		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
241	} else {
242		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
243	}
244
245	s.prepareDicts()
246
247	for _, dict := range s.DictKeys {
248		sort.Strings(dict)
249	}
250
251	s.processDocuments()
252
253	storedIndexOffset, err := s.writeStoredFields()
254	if err != nil {
255		return 0, 0, 0, nil, err
256	}
257
258	var fdvIndexOffset uint64
259	var dictOffsets []uint64
260
261	if len(s.results) > 0 {
262		fdvIndexOffset, dictOffsets, err = s.writeDicts()
263		if err != nil {
264			return 0, 0, 0, nil, err
265		}
266	} else {
267		dictOffsets = make([]uint64, len(s.FieldsInv))
268	}
269
270	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
271	if err != nil {
272		return 0, 0, 0, nil, err
273	}
274
275	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
276}
277
278func (s *interim) getOrDefineField(fieldName string) int {
279	fieldIDPlus1, exists := s.FieldsMap[fieldName]
280	if !exists {
281		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
282		s.FieldsMap[fieldName] = fieldIDPlus1
283		s.FieldsInv = append(s.FieldsInv, fieldName)
284
285		s.Dicts = append(s.Dicts, make(map[string]uint64))
286
287		n := len(s.DictKeys)
288		if n < cap(s.DictKeys) {
289			s.DictKeys = s.DictKeys[:n+1]
290			s.DictKeys[n] = s.DictKeys[n][:0]
291		} else {
292			s.DictKeys = append(s.DictKeys, []string(nil))
293		}
294	}
295
296	return int(fieldIDPlus1 - 1)
297}
298
299// fill Dicts and DictKeys from analysis results
300func (s *interim) prepareDicts() {
301	var pidNext int
302
303	var totTFs int
304	var totLocs int
305
306	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
307		dict := s.Dicts[fieldID]
308		dictKeys := s.DictKeys[fieldID]
309
310		for term, tf := range tfs {
311			pidPlus1, exists := dict[term]
312			if !exists {
313				pidNext++
314				pidPlus1 = uint64(pidNext)
315
316				dict[term] = pidPlus1
317				dictKeys = append(dictKeys, term)
318
319				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
320				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
321			}
322
323			pid := pidPlus1 - 1
324
325			s.numTermsPerPostingsList[pid] += 1
326			s.numLocsPerPostingsList[pid] += len(tf.Locations)
327
328			totLocs += len(tf.Locations)
329		}
330
331		totTFs += len(tfs)
332
333		s.DictKeys[fieldID] = dictKeys
334	}
335
336	for _, result := range s.results {
337		// walk each composite field
338		for _, field := range result.Document.CompositeFields {
339			fieldID := uint16(s.getOrDefineField(field.Name()))
340			_, tf := field.Analyze()
341			visitField(fieldID, tf)
342		}
343
344		// walk each field
345		for i, field := range result.Document.Fields {
346			fieldID := uint16(s.getOrDefineField(field.Name()))
347			tf := result.Analyzed[i]
348			visitField(fieldID, tf)
349		}
350	}
351
352	numPostingsLists := pidNext
353
354	if cap(s.Postings) >= numPostingsLists {
355		s.Postings = s.Postings[:numPostingsLists]
356	} else {
357		postings := make([]*roaring.Bitmap, numPostingsLists)
358		copy(postings, s.Postings[:cap(s.Postings)])
359		for i := 0; i < numPostingsLists; i++ {
360			if postings[i] == nil {
361				postings[i] = roaring.New()
362			}
363		}
364		s.Postings = postings
365	}
366
367	if cap(s.FreqNorms) >= numPostingsLists {
368		s.FreqNorms = s.FreqNorms[:numPostingsLists]
369	} else {
370		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
371	}
372
373	if cap(s.freqNormsBacking) >= totTFs {
374		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
375	} else {
376		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
377	}
378
379	freqNormsBacking := s.freqNormsBacking
380	for pid, numTerms := range s.numTermsPerPostingsList {
381		s.FreqNorms[pid] = freqNormsBacking[0:0]
382		freqNormsBacking = freqNormsBacking[numTerms:]
383	}
384
385	if cap(s.Locs) >= numPostingsLists {
386		s.Locs = s.Locs[:numPostingsLists]
387	} else {
388		s.Locs = make([][]interimLoc, numPostingsLists)
389	}
390
391	if cap(s.locsBacking) >= totLocs {
392		s.locsBacking = s.locsBacking[:totLocs]
393	} else {
394		s.locsBacking = make([]interimLoc, totLocs)
395	}
396
397	locsBacking := s.locsBacking
398	for pid, numLocs := range s.numLocsPerPostingsList {
399		s.Locs[pid] = locsBacking[0:0]
400		locsBacking = locsBacking[numLocs:]
401	}
402}
403
404func (s *interim) processDocuments() {
405	numFields := len(s.FieldsInv)
406	reuseFieldLens := make([]int, numFields)
407	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
408
409	for docNum, result := range s.results {
410		for i := 0; i < numFields; i++ { // clear these for reuse
411			reuseFieldLens[i] = 0
412			reuseFieldTFs[i] = nil
413		}
414
415		s.processDocument(uint64(docNum), result,
416			reuseFieldLens, reuseFieldTFs)
417	}
418}
419
420func (s *interim) processDocument(docNum uint64,
421	result *index.AnalysisResult,
422	fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
423	visitField := func(fieldID uint16, fieldName string,
424		ln int, tf analysis.TokenFrequencies) {
425		fieldLens[fieldID] += ln
426
427		existingFreqs := fieldTFs[fieldID]
428		if existingFreqs != nil {
429			existingFreqs.MergeAll(fieldName, tf)
430		} else {
431			fieldTFs[fieldID] = tf
432		}
433	}
434
435	// walk each composite field
436	for _, field := range result.Document.CompositeFields {
437		fieldID := uint16(s.getOrDefineField(field.Name()))
438		ln, tf := field.Analyze()
439		visitField(fieldID, field.Name(), ln, tf)
440	}
441
442	// walk each field
443	for i, field := range result.Document.Fields {
444		fieldID := uint16(s.getOrDefineField(field.Name()))
445		ln := result.Length[i]
446		tf := result.Analyzed[i]
447		visitField(fieldID, field.Name(), ln, tf)
448	}
449
450	// now that it's been rolled up into fieldTFs, walk that
451	for fieldID, tfs := range fieldTFs {
452		dict := s.Dicts[fieldID]
453		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
454
455		for term, tf := range tfs {
456			pid := dict[term] - 1
457			bs := s.Postings[pid]
458			bs.Add(uint32(docNum))
459
460			s.FreqNorms[pid] = append(s.FreqNorms[pid],
461				interimFreqNorm{
462					freq:    uint64(tf.Frequency()),
463					norm:    norm,
464					numLocs: len(tf.Locations),
465				})
466
467			if len(tf.Locations) > 0 {
468				locs := s.Locs[pid]
469
470				for _, loc := range tf.Locations {
471					var locf = uint16(fieldID)
472					if loc.Field != "" {
473						locf = uint16(s.getOrDefineField(loc.Field))
474					}
475					var arrayposs []uint64
476					if len(loc.ArrayPositions) > 0 {
477						arrayposs = loc.ArrayPositions
478					}
479					locs = append(locs, interimLoc{
480						fieldID:   locf,
481						pos:       uint64(loc.Position),
482						start:     uint64(loc.Start),
483						end:       uint64(loc.End),
484						arrayposs: arrayposs,
485					})
486				}
487
488				s.Locs[pid] = locs
489			}
490		}
491	}
492}
493
494func (s *interim) writeStoredFields() (
495	storedIndexOffset uint64, err error) {
496	varBuf := make([]byte, binary.MaxVarintLen64)
497	metaEncode := func(val uint64) (int, error) {
498		wb := binary.PutUvarint(varBuf, val)
499		return s.metaBuf.Write(varBuf[:wb])
500	}
501
502	data, compressed := s.tmp0[:0], s.tmp1[:0]
503	defer func() { s.tmp0, s.tmp1 = data, compressed }()
504
505	// keyed by docNum
506	docStoredOffsets := make([]uint64, len(s.results))
507
508	// keyed by fieldID, for the current doc in the loop
509	docStoredFields := map[uint16]interimStoredField{}
510
511	for docNum, result := range s.results {
512		for fieldID := range docStoredFields { // reset for next doc
513			delete(docStoredFields, fieldID)
514		}
515
516		for _, field := range result.Document.Fields {
517			fieldID := uint16(s.getOrDefineField(field.Name()))
518
519			opts := field.Options()
520
521			if opts.IsStored() {
522				isf := docStoredFields[fieldID]
523				isf.vals = append(isf.vals, field.Value())
524				isf.typs = append(isf.typs, encodeFieldType(field))
525				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
526				docStoredFields[fieldID] = isf
527			}
528
529			if opts.IncludeDocValues() {
530				s.IncludeDocValues[fieldID] = true
531			}
532
533			err := ValidateDocFields(field)
534			if err != nil {
535				return 0, err
536			}
537		}
538
539		var curr int
540
541		s.metaBuf.Reset()
542		data = data[:0]
543
544		// _id field special case optimizes ExternalID() lookups
545		idFieldVal := docStoredFields[uint16(0)].vals[0]
546		_, err = metaEncode(uint64(len(idFieldVal)))
547		if err != nil {
548			return 0, err
549		}
550
551		// handle non-"_id" fields
552		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
553			isf, exists := docStoredFields[uint16(fieldID)]
554			if exists {
555				curr, data, err = persistStoredFieldValues(
556					fieldID, isf.vals, isf.typs, isf.arrayposs,
557					curr, metaEncode, data)
558				if err != nil {
559					return 0, err
560				}
561			}
562		}
563
564		metaBytes := s.metaBuf.Bytes()
565
566		compressed = snappy.Encode(compressed[:cap(compressed)], data)
567
568		docStoredOffsets[docNum] = uint64(s.w.Count())
569
570		_, err := writeUvarints(s.w,
571			uint64(len(metaBytes)),
572			uint64(len(idFieldVal)+len(compressed)))
573		if err != nil {
574			return 0, err
575		}
576
577		_, err = s.w.Write(metaBytes)
578		if err != nil {
579			return 0, err
580		}
581
582		_, err = s.w.Write(idFieldVal)
583		if err != nil {
584			return 0, err
585		}
586
587		_, err = s.w.Write(compressed)
588		if err != nil {
589			return 0, err
590		}
591	}
592
593	storedIndexOffset = uint64(s.w.Count())
594
595	for _, docStoredOffset := range docStoredOffsets {
596		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
597		if err != nil {
598			return 0, err
599		}
600	}
601
602	return storedIndexOffset, nil
603}
604
605func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
606	dictOffsets = make([]uint64, len(s.FieldsInv))
607
608	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
609	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
610
611	buf := s.grabBuf(binary.MaxVarintLen64)
612
613	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
614	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
615	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
616
617	var docTermMap [][]byte
618
619	if s.builder == nil {
620		s.builder, err = vellum.New(&s.builderBuf, nil)
621		if err != nil {
622			return 0, nil, err
623		}
624	}
625
626	for fieldID, terms := range s.DictKeys {
627		if cap(docTermMap) < len(s.results) {
628			docTermMap = make([][]byte, len(s.results))
629		} else {
630			docTermMap = docTermMap[0:len(s.results)]
631			for docNum := range docTermMap { // reset the docTermMap
632				docTermMap[docNum] = docTermMap[docNum][:0]
633			}
634		}
635
636		dict := s.Dicts[fieldID]
637
638		for _, term := range terms { // terms are already sorted
639			pid := dict[term] - 1
640
641			postingsBS := s.Postings[pid]
642
643			freqNorms := s.FreqNorms[pid]
644			freqNormOffset := 0
645
646			locs := s.Locs[pid]
647			locOffset := 0
648
649			postingsItr := postingsBS.Iterator()
650			for postingsItr.HasNext() {
651				docNum := uint64(postingsItr.Next())
652
653				freqNorm := freqNorms[freqNormOffset]
654
655				err = tfEncoder.Add(docNum,
656					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
657					uint64(math.Float32bits(freqNorm.norm)))
658				if err != nil {
659					return 0, nil, err
660				}
661
662				if freqNorm.numLocs > 0 {
663					numBytesLocs := 0
664					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
665						numBytesLocs += totalUvarintBytes(
666							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
667							uint64(len(loc.arrayposs)), loc.arrayposs)
668					}
669
670					err = locEncoder.Add(docNum, uint64(numBytesLocs))
671					if err != nil {
672						return 0, nil, err
673					}
674
675					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
676						err = locEncoder.Add(docNum,
677							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
678							uint64(len(loc.arrayposs)))
679						if err != nil {
680							return 0, nil, err
681						}
682
683						err = locEncoder.Add(docNum, loc.arrayposs...)
684						if err != nil {
685							return 0, nil, err
686						}
687					}
688
689					locOffset += freqNorm.numLocs
690				}
691
692				freqNormOffset++
693
694				docTermMap[docNum] = append(
695					append(docTermMap[docNum], term...),
696					termSeparator)
697			}
698
699			tfEncoder.Close()
700			locEncoder.Close()
701
702			postingsOffset, err :=
703				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
704			if err != nil {
705				return 0, nil, err
706			}
707
708			if postingsOffset > uint64(0) {
709				err = s.builder.Insert([]byte(term), postingsOffset)
710				if err != nil {
711					return 0, nil, err
712				}
713			}
714
715			tfEncoder.Reset()
716			locEncoder.Reset()
717		}
718
719		err = s.builder.Close()
720		if err != nil {
721			return 0, nil, err
722		}
723
724		// record where this dictionary starts
725		dictOffsets[fieldID] = uint64(s.w.Count())
726
727		vellumData := s.builderBuf.Bytes()
728
729		// write out the length of the vellum data
730		n := binary.PutUvarint(buf, uint64(len(vellumData)))
731		_, err = s.w.Write(buf[:n])
732		if err != nil {
733			return 0, nil, err
734		}
735
736		// write this vellum to disk
737		_, err = s.w.Write(vellumData)
738		if err != nil {
739			return 0, nil, err
740		}
741
742		// reset vellum for reuse
743		s.builderBuf.Reset()
744
745		err = s.builder.Reset(&s.builderBuf)
746		if err != nil {
747			return 0, nil, err
748		}
749
750		// write the field doc values
751		if s.IncludeDocValues[fieldID] {
752			for docNum, docTerms := range docTermMap {
753				if len(docTerms) > 0 {
754					err = fdvEncoder.Add(uint64(docNum), docTerms)
755					if err != nil {
756						return 0, nil, err
757					}
758				}
759			}
760			err = fdvEncoder.Close()
761			if err != nil {
762				return 0, nil, err
763			}
764
765			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
766
767			_, err = fdvEncoder.Write()
768			if err != nil {
769				return 0, nil, err
770			}
771
772			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
773
774			fdvEncoder.Reset()
775		} else {
776			fdvOffsetsStart[fieldID] = fieldNotUninverted
777			fdvOffsetsEnd[fieldID] = fieldNotUninverted
778		}
779	}
780
781	fdvIndexOffset = uint64(s.w.Count())
782
783	for i := 0; i < len(fdvOffsetsStart); i++ {
784		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
785		_, err := s.w.Write(buf[:n])
786		if err != nil {
787			return 0, nil, err
788		}
789		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
790		_, err = s.w.Write(buf[:n])
791		if err != nil {
792			return 0, nil, err
793		}
794	}
795
796	return fdvIndexOffset, dictOffsets, nil
797}
798
799func encodeFieldType(f document.Field) byte {
800	fieldType := byte('x')
801	switch f.(type) {
802	case *document.TextField:
803		fieldType = 't'
804	case *document.NumericField:
805		fieldType = 'n'
806	case *document.DateTimeField:
807		fieldType = 'd'
808	case *document.BooleanField:
809		fieldType = 'b'
810	case *document.GeoPointField:
811		fieldType = 'g'
812	case *document.CompositeField:
813		fieldType = 'c'
814	}
815	return fieldType
816}
817
818// returns the total # of bytes needed to encode the given uint64's
819// into binary.PutUVarint() encoding
820func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
821	n = numUvarintBytes(a)
822	n += numUvarintBytes(b)
823	n += numUvarintBytes(c)
824	n += numUvarintBytes(d)
825	n += numUvarintBytes(e)
826	for _, v := range more {
827		n += numUvarintBytes(v)
828	}
829	return n
830}
831
832// returns # of bytes needed to encode x in binary.PutUvarint() encoding
833func numUvarintBytes(x uint64) (n int) {
834	for x >= 0x80 {
835		x >>= 7
836		n++
837	}
838	return n + 1
839}
840