1//  Copyright (c) 2018 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"bytes"
19	"encoding/binary"
20	"math"
21	"sort"
22	"sync"
23
24	"github.com/RoaringBitmap/roaring"
25	index "github.com/blevesearch/bleve_index_api"
26	segment "github.com/blevesearch/scorch_segment_api/v2"
27	"github.com/blevesearch/vellum"
28	"github.com/golang/snappy"
29)
30
31var NewSegmentBufferNumResultsBump int = 100
32var NewSegmentBufferNumResultsFactor float64 = 1.0
33var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
34
35// ValidateDocFields can be set by applications to perform additional checks
36// on fields in a document being added to a new segment, by default it does
37// nothing.
38// This API is experimental and may be removed at any time.
39var ValidateDocFields = func(field index.Field) error {
40	return nil
41}
42
43var defaultChunkFactor uint32 = 1024
44
45// New creates an in-memory zap-encoded SegmentBase from a set of Documents
46func (z *ZapPlugin) New(results []index.Document) (
47	segment.Segment, uint64, error) {
48	return z.newWithChunkFactor(results, defaultChunkFactor)
49}
50
51func (*ZapPlugin) newWithChunkFactor(results []index.Document,
52	chunkFactor uint32) (segment.Segment, uint64, error) {
53	s := interimPool.Get().(*interim)
54
55	var br bytes.Buffer
56	if s.lastNumDocs > 0 {
57		// use previous results to initialize the buf with an estimate
58		// size, but note that the interim instance comes from a
59		// global interimPool, so multiple scorch instances indexing
60		// different docs can lead to low quality estimates
61		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
62			NewSegmentBufferNumResultsFactor)
63		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
64			NewSegmentBufferAvgBytesPerDocFactor)
65		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
66	}
67
68	s.results = results
69	s.chunkFactor = chunkFactor
70	s.w = NewCountHashWriter(&br)
71
72	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
73		err := s.convert()
74	if err != nil {
75		return nil, uint64(0), err
76	}
77
78	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
79		s.FieldsMap, s.FieldsInv, uint64(len(results)),
80		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
81
82	if err == nil && s.reset() == nil {
83		s.lastNumDocs = len(results)
84		s.lastOutSize = len(br.Bytes())
85		interimPool.Put(s)
86	}
87
88	return sb, uint64(len(br.Bytes())), err
89}
90
91var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
92
93// interim holds temporary working data used while converting from
94// analysis results to a zap-encoded segment
95type interim struct {
96	results []index.Document
97
98	chunkFactor uint32
99
100	w *CountHashWriter
101
102	// FieldsMap adds 1 to field id to avoid zero value issues
103	//  name -> field id + 1
104	FieldsMap map[string]uint16
105
106	// FieldsInv is the inverse of FieldsMap
107	//  field id -> name
108	FieldsInv []string
109
110	// Term dictionaries for each field
111	//  field id -> term -> postings list id + 1
112	Dicts []map[string]uint64
113
114	// Terms for each field, where terms are sorted ascending
115	//  field id -> []term
116	DictKeys [][]string
117
118	// Fields whose IncludeDocValues is true
119	//  field id -> bool
120	IncludeDocValues []bool
121
122	// postings id -> bitmap of docNums
123	Postings []*roaring.Bitmap
124
125	// postings id -> freq/norm's, one for each docNum in postings
126	FreqNorms        [][]interimFreqNorm
127	freqNormsBacking []interimFreqNorm
128
129	// postings id -> locs, one for each freq
130	Locs        [][]interimLoc
131	locsBacking []interimLoc
132
133	numTermsPerPostingsList []int // key is postings list id
134	numLocsPerPostingsList  []int // key is postings list id
135
136	builder    *vellum.Builder
137	builderBuf bytes.Buffer
138
139	metaBuf bytes.Buffer
140
141	tmp0 []byte
142	tmp1 []byte
143
144	lastNumDocs int
145	lastOutSize int
146}
147
148func (s *interim) reset() (err error) {
149	s.results = nil
150	s.chunkFactor = 0
151	s.w = nil
152	s.FieldsMap = nil
153	s.FieldsInv = nil
154	for i := range s.Dicts {
155		s.Dicts[i] = nil
156	}
157	s.Dicts = s.Dicts[:0]
158	for i := range s.DictKeys {
159		s.DictKeys[i] = s.DictKeys[i][:0]
160	}
161	s.DictKeys = s.DictKeys[:0]
162	for i := range s.IncludeDocValues {
163		s.IncludeDocValues[i] = false
164	}
165	s.IncludeDocValues = s.IncludeDocValues[:0]
166	for _, idn := range s.Postings {
167		idn.Clear()
168	}
169	s.Postings = s.Postings[:0]
170	s.FreqNorms = s.FreqNorms[:0]
171	for i := range s.freqNormsBacking {
172		s.freqNormsBacking[i] = interimFreqNorm{}
173	}
174	s.freqNormsBacking = s.freqNormsBacking[:0]
175	s.Locs = s.Locs[:0]
176	for i := range s.locsBacking {
177		s.locsBacking[i] = interimLoc{}
178	}
179	s.locsBacking = s.locsBacking[:0]
180	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
181	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
182	s.builderBuf.Reset()
183	if s.builder != nil {
184		err = s.builder.Reset(&s.builderBuf)
185	}
186	s.metaBuf.Reset()
187	s.tmp0 = s.tmp0[:0]
188	s.tmp1 = s.tmp1[:0]
189	s.lastNumDocs = 0
190	s.lastOutSize = 0
191
192	return err
193}
194
195func (s *interim) grabBuf(size int) []byte {
196	buf := s.tmp0
197	if cap(buf) < size {
198		buf = make([]byte, size)
199		s.tmp0 = buf
200	}
201	return buf[0:size]
202}
203
204type interimStoredField struct {
205	vals      [][]byte
206	typs      []byte
207	arrayposs [][]uint64 // array positions
208}
209
210type interimFreqNorm struct {
211	freq    uint64
212	norm    float32
213	numLocs int
214}
215
216type interimLoc struct {
217	fieldID   uint16
218	pos       uint64
219	start     uint64
220	end       uint64
221	arrayposs []uint64
222}
223
224func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
225	s.FieldsMap = map[string]uint16{}
226
227	s.getOrDefineField("_id") // _id field is fieldID 0
228
229	for _, result := range s.results {
230		result.VisitComposite(func(field index.CompositeField) {
231			s.getOrDefineField(field.Name())
232		})
233		result.VisitFields(func(field index.Field) {
234			s.getOrDefineField(field.Name())
235		})
236	}
237
238	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
239
240	for fieldID, fieldName := range s.FieldsInv {
241		s.FieldsMap[fieldName] = uint16(fieldID + 1)
242	}
243
244	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
245		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
246	} else {
247		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
248	}
249
250	s.prepareDicts()
251
252	for _, dict := range s.DictKeys {
253		sort.Strings(dict)
254	}
255
256	s.processDocuments()
257
258	storedIndexOffset, err := s.writeStoredFields()
259	if err != nil {
260		return 0, 0, 0, nil, err
261	}
262
263	var fdvIndexOffset uint64
264	var dictOffsets []uint64
265
266	if len(s.results) > 0 {
267		fdvIndexOffset, dictOffsets, err = s.writeDicts()
268		if err != nil {
269			return 0, 0, 0, nil, err
270		}
271	} else {
272		dictOffsets = make([]uint64, len(s.FieldsInv))
273	}
274
275	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
276	if err != nil {
277		return 0, 0, 0, nil, err
278	}
279
280	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
281}
282
283func (s *interim) getOrDefineField(fieldName string) int {
284	fieldIDPlus1, exists := s.FieldsMap[fieldName]
285	if !exists {
286		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
287		s.FieldsMap[fieldName] = fieldIDPlus1
288		s.FieldsInv = append(s.FieldsInv, fieldName)
289
290		s.Dicts = append(s.Dicts, make(map[string]uint64))
291
292		n := len(s.DictKeys)
293		if n < cap(s.DictKeys) {
294			s.DictKeys = s.DictKeys[:n+1]
295			s.DictKeys[n] = s.DictKeys[n][:0]
296		} else {
297			s.DictKeys = append(s.DictKeys, []string(nil))
298		}
299	}
300
301	return int(fieldIDPlus1 - 1)
302}
303
304// fill Dicts and DictKeys from analysis results
305func (s *interim) prepareDicts() {
306	var pidNext int
307
308	var totTFs int
309	var totLocs int
310
311	visitField := func(field index.Field) {
312		fieldID := uint16(s.getOrDefineField(field.Name()))
313
314		dict := s.Dicts[fieldID]
315		dictKeys := s.DictKeys[fieldID]
316
317		tfs := field.AnalyzedTokenFrequencies()
318		for term, tf := range tfs {
319			pidPlus1, exists := dict[term]
320			if !exists {
321				pidNext++
322				pidPlus1 = uint64(pidNext)
323
324				dict[term] = pidPlus1
325				dictKeys = append(dictKeys, term)
326
327				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
328				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
329			}
330
331			pid := pidPlus1 - 1
332
333			s.numTermsPerPostingsList[pid] += 1
334			s.numLocsPerPostingsList[pid] += len(tf.Locations)
335
336			totLocs += len(tf.Locations)
337		}
338
339		totTFs += len(tfs)
340
341		s.DictKeys[fieldID] = dictKeys
342	}
343
344	for _, result := range s.results {
345		// walk each composite field
346		result.VisitComposite(func(field index.CompositeField) {
347			visitField(field)
348		})
349
350		// walk each field
351		result.VisitFields(visitField)
352	}
353
354	numPostingsLists := pidNext
355
356	if cap(s.Postings) >= numPostingsLists {
357		s.Postings = s.Postings[:numPostingsLists]
358	} else {
359		postings := make([]*roaring.Bitmap, numPostingsLists)
360		copy(postings, s.Postings[:cap(s.Postings)])
361		for i := 0; i < numPostingsLists; i++ {
362			if postings[i] == nil {
363				postings[i] = roaring.New()
364			}
365		}
366		s.Postings = postings
367	}
368
369	if cap(s.FreqNorms) >= numPostingsLists {
370		s.FreqNorms = s.FreqNorms[:numPostingsLists]
371	} else {
372		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
373	}
374
375	if cap(s.freqNormsBacking) >= totTFs {
376		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
377	} else {
378		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
379	}
380
381	freqNormsBacking := s.freqNormsBacking
382	for pid, numTerms := range s.numTermsPerPostingsList {
383		s.FreqNorms[pid] = freqNormsBacking[0:0]
384		freqNormsBacking = freqNormsBacking[numTerms:]
385	}
386
387	if cap(s.Locs) >= numPostingsLists {
388		s.Locs = s.Locs[:numPostingsLists]
389	} else {
390		s.Locs = make([][]interimLoc, numPostingsLists)
391	}
392
393	if cap(s.locsBacking) >= totLocs {
394		s.locsBacking = s.locsBacking[:totLocs]
395	} else {
396		s.locsBacking = make([]interimLoc, totLocs)
397	}
398
399	locsBacking := s.locsBacking
400	for pid, numLocs := range s.numLocsPerPostingsList {
401		s.Locs[pid] = locsBacking[0:0]
402		locsBacking = locsBacking[numLocs:]
403	}
404}
405
406func (s *interim) processDocuments() {
407	numFields := len(s.FieldsInv)
408	reuseFieldLens := make([]int, numFields)
409	reuseFieldTFs := make([]index.TokenFrequencies, numFields)
410
411	for docNum, result := range s.results {
412		for i := 0; i < numFields; i++ { // clear these for reuse
413			reuseFieldLens[i] = 0
414			reuseFieldTFs[i] = nil
415		}
416
417		s.processDocument(uint64(docNum), result,
418			reuseFieldLens, reuseFieldTFs)
419	}
420}
421
422func (s *interim) processDocument(docNum uint64,
423	result index.Document,
424	fieldLens []int, fieldTFs []index.TokenFrequencies) {
425	visitField := func(field index.Field) {
426		fieldID := uint16(s.getOrDefineField(field.Name()))
427		fieldLens[fieldID] += field.AnalyzedLength()
428
429		existingFreqs := fieldTFs[fieldID]
430		if existingFreqs != nil {
431			existingFreqs.MergeAll(field.Name(), field.AnalyzedTokenFrequencies())
432		} else {
433			fieldTFs[fieldID] = field.AnalyzedTokenFrequencies()
434		}
435	}
436
437	// walk each composite field
438	result.VisitComposite(func(field index.CompositeField) {
439		visitField(field)
440	})
441
442	// walk each field
443	result.VisitFields(visitField)
444
445	// now that it's been rolled up into fieldTFs, walk that
446	for fieldID, tfs := range fieldTFs {
447		dict := s.Dicts[fieldID]
448		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
449
450		for term, tf := range tfs {
451			pid := dict[term] - 1
452			bs := s.Postings[pid]
453			bs.Add(uint32(docNum))
454
455			s.FreqNorms[pid] = append(s.FreqNorms[pid],
456				interimFreqNorm{
457					freq:    uint64(tf.Frequency()),
458					norm:    norm,
459					numLocs: len(tf.Locations),
460				})
461
462			if len(tf.Locations) > 0 {
463				locs := s.Locs[pid]
464
465				for _, loc := range tf.Locations {
466					var locf = uint16(fieldID)
467					if loc.Field != "" {
468						locf = uint16(s.getOrDefineField(loc.Field))
469					}
470					var arrayposs []uint64
471					if len(loc.ArrayPositions) > 0 {
472						arrayposs = loc.ArrayPositions
473					}
474					locs = append(locs, interimLoc{
475						fieldID:   locf,
476						pos:       uint64(loc.Position),
477						start:     uint64(loc.Start),
478						end:       uint64(loc.End),
479						arrayposs: arrayposs,
480					})
481				}
482
483				s.Locs[pid] = locs
484			}
485		}
486	}
487}
488
489func (s *interim) writeStoredFields() (
490	storedIndexOffset uint64, err error) {
491	varBuf := make([]byte, binary.MaxVarintLen64)
492	metaEncode := func(val uint64) (int, error) {
493		wb := binary.PutUvarint(varBuf, val)
494		return s.metaBuf.Write(varBuf[:wb])
495	}
496
497	data, compressed := s.tmp0[:0], s.tmp1[:0]
498	defer func() { s.tmp0, s.tmp1 = data, compressed }()
499
500	// keyed by docNum
501	docStoredOffsets := make([]uint64, len(s.results))
502
503	// keyed by fieldID, for the current doc in the loop
504	docStoredFields := map[uint16]interimStoredField{}
505
506	for docNum, result := range s.results {
507		for fieldID := range docStoredFields { // reset for next doc
508			delete(docStoredFields, fieldID)
509		}
510
511		var validationErr error
512		result.VisitFields(func(field index.Field) {
513			fieldID := uint16(s.getOrDefineField(field.Name()))
514
515			if field.Options().IsStored() {
516				isf := docStoredFields[fieldID]
517				isf.vals = append(isf.vals, field.Value())
518				isf.typs = append(isf.typs, field.EncodedFieldType())
519				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
520				docStoredFields[fieldID] = isf
521			}
522
523			if field.Options().IncludeDocValues() {
524				s.IncludeDocValues[fieldID] = true
525			}
526
527			err := ValidateDocFields(field)
528			if err != nil && validationErr == nil {
529				validationErr = err
530			}
531		})
532		if validationErr != nil {
533			return 0, validationErr
534		}
535
536		var curr int
537
538		s.metaBuf.Reset()
539		data = data[:0]
540
541		// _id field special case optimizes ExternalID() lookups
542		idFieldVal := docStoredFields[uint16(0)].vals[0]
543		_, err = metaEncode(uint64(len(idFieldVal)))
544		if err != nil {
545			return 0, err
546		}
547
548		// handle non-"_id" fields
549		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
550			isf, exists := docStoredFields[uint16(fieldID)]
551			if exists {
552				curr, data, err = persistStoredFieldValues(
553					fieldID, isf.vals, isf.typs, isf.arrayposs,
554					curr, metaEncode, data)
555				if err != nil {
556					return 0, err
557				}
558			}
559		}
560
561		metaBytes := s.metaBuf.Bytes()
562
563		compressed = snappy.Encode(compressed[:cap(compressed)], data)
564
565		docStoredOffsets[docNum] = uint64(s.w.Count())
566
567		_, err := writeUvarints(s.w,
568			uint64(len(metaBytes)),
569			uint64(len(idFieldVal)+len(compressed)))
570		if err != nil {
571			return 0, err
572		}
573
574		_, err = s.w.Write(metaBytes)
575		if err != nil {
576			return 0, err
577		}
578
579		_, err = s.w.Write(idFieldVal)
580		if err != nil {
581			return 0, err
582		}
583
584		_, err = s.w.Write(compressed)
585		if err != nil {
586			return 0, err
587		}
588	}
589
590	storedIndexOffset = uint64(s.w.Count())
591
592	for _, docStoredOffset := range docStoredOffsets {
593		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
594		if err != nil {
595			return 0, err
596		}
597	}
598
599	return storedIndexOffset, nil
600}
601
602func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
603	dictOffsets = make([]uint64, len(s.FieldsInv))
604
605	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
606	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
607
608	buf := s.grabBuf(binary.MaxVarintLen64)
609
610	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
611	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
612	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
613
614	var docTermMap [][]byte
615
616	if s.builder == nil {
617		s.builder, err = vellum.New(&s.builderBuf, nil)
618		if err != nil {
619			return 0, nil, err
620		}
621	}
622
623	for fieldID, terms := range s.DictKeys {
624		if cap(docTermMap) < len(s.results) {
625			docTermMap = make([][]byte, len(s.results))
626		} else {
627			docTermMap = docTermMap[0:len(s.results)]
628			for docNum := range docTermMap { // reset the docTermMap
629				docTermMap[docNum] = docTermMap[docNum][:0]
630			}
631		}
632
633		dict := s.Dicts[fieldID]
634
635		for _, term := range terms { // terms are already sorted
636			pid := dict[term] - 1
637
638			postingsBS := s.Postings[pid]
639
640			freqNorms := s.FreqNorms[pid]
641			freqNormOffset := 0
642
643			locs := s.Locs[pid]
644			locOffset := 0
645
646			postingsItr := postingsBS.Iterator()
647			for postingsItr.HasNext() {
648				docNum := uint64(postingsItr.Next())
649
650				freqNorm := freqNorms[freqNormOffset]
651
652				err = tfEncoder.Add(docNum,
653					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
654					uint64(math.Float32bits(freqNorm.norm)))
655				if err != nil {
656					return 0, nil, err
657				}
658
659				if freqNorm.numLocs > 0 {
660					numBytesLocs := 0
661					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
662						numBytesLocs += totalUvarintBytes(
663							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
664							uint64(len(loc.arrayposs)), loc.arrayposs)
665					}
666
667					err = locEncoder.Add(docNum, uint64(numBytesLocs))
668					if err != nil {
669						return 0, nil, err
670					}
671
672					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
673						err = locEncoder.Add(docNum,
674							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
675							uint64(len(loc.arrayposs)))
676						if err != nil {
677							return 0, nil, err
678						}
679
680						err = locEncoder.Add(docNum, loc.arrayposs...)
681						if err != nil {
682							return 0, nil, err
683						}
684					}
685
686					locOffset += freqNorm.numLocs
687				}
688
689				freqNormOffset++
690
691				docTermMap[docNum] = append(
692					append(docTermMap[docNum], term...),
693					termSeparator)
694			}
695
696			tfEncoder.Close()
697			locEncoder.Close()
698
699			postingsOffset, err :=
700				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
701			if err != nil {
702				return 0, nil, err
703			}
704
705			if postingsOffset > uint64(0) {
706				err = s.builder.Insert([]byte(term), postingsOffset)
707				if err != nil {
708					return 0, nil, err
709				}
710			}
711
712			tfEncoder.Reset()
713			locEncoder.Reset()
714		}
715
716		err = s.builder.Close()
717		if err != nil {
718			return 0, nil, err
719		}
720
721		// record where this dictionary starts
722		dictOffsets[fieldID] = uint64(s.w.Count())
723
724		vellumData := s.builderBuf.Bytes()
725
726		// write out the length of the vellum data
727		n := binary.PutUvarint(buf, uint64(len(vellumData)))
728		_, err = s.w.Write(buf[:n])
729		if err != nil {
730			return 0, nil, err
731		}
732
733		// write this vellum to disk
734		_, err = s.w.Write(vellumData)
735		if err != nil {
736			return 0, nil, err
737		}
738
739		// reset vellum for reuse
740		s.builderBuf.Reset()
741
742		err = s.builder.Reset(&s.builderBuf)
743		if err != nil {
744			return 0, nil, err
745		}
746
747		// write the field doc values
748		if s.IncludeDocValues[fieldID] {
749			for docNum, docTerms := range docTermMap {
750				if len(docTerms) > 0 {
751					err = fdvEncoder.Add(uint64(docNum), docTerms)
752					if err != nil {
753						return 0, nil, err
754					}
755				}
756			}
757			err = fdvEncoder.Close()
758			if err != nil {
759				return 0, nil, err
760			}
761
762			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
763
764			_, err = fdvEncoder.Write()
765			if err != nil {
766				return 0, nil, err
767			}
768
769			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
770
771			fdvEncoder.Reset()
772		} else {
773			fdvOffsetsStart[fieldID] = fieldNotUninverted
774			fdvOffsetsEnd[fieldID] = fieldNotUninverted
775		}
776	}
777
778	fdvIndexOffset = uint64(s.w.Count())
779
780	for i := 0; i < len(fdvOffsetsStart); i++ {
781		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
782		_, err := s.w.Write(buf[:n])
783		if err != nil {
784			return 0, nil, err
785		}
786		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
787		_, err = s.w.Write(buf[:n])
788		if err != nil {
789			return 0, nil, err
790		}
791	}
792
793	return fdvIndexOffset, dictOffsets, nil
794}
795
796// returns the total # of bytes needed to encode the given uint64's
797// into binary.PutUVarint() encoding
798func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
799	n = numUvarintBytes(a)
800	n += numUvarintBytes(b)
801	n += numUvarintBytes(c)
802	n += numUvarintBytes(d)
803	n += numUvarintBytes(e)
804	for _, v := range more {
805		n += numUvarintBytes(v)
806	}
807	return n
808}
809
810// returns # of bytes needed to encode x in binary.PutUvarint() encoding
811func numUvarintBytes(x uint64) (n int) {
812	for x >= 0x80 {
813		x >>= 7
814		n++
815	}
816	return n + 1
817}
818