1//  Copyright (c) 2018 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"bytes"
19	"encoding/binary"
20	"math"
21	"sort"
22	"sync"
23
24	"github.com/RoaringBitmap/roaring"
25	index "github.com/blevesearch/bleve_index_api"
26	segment "github.com/blevesearch/scorch_segment_api/v2"
27	"github.com/blevesearch/vellum"
28	"github.com/golang/snappy"
29)
30
31var NewSegmentBufferNumResultsBump int = 100
32var NewSegmentBufferNumResultsFactor float64 = 1.0
33var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
34
35// ValidateDocFields can be set by applications to perform additional checks
36// on fields in a document being added to a new segment, by default it does
37// nothing.
38// This API is experimental and may be removed at any time.
39var ValidateDocFields = func(field index.Field) error {
40	return nil
41}
42
43// New creates an in-memory zap-encoded SegmentBase from a set of Documents
44func (z *ZapPlugin) New(results []index.Document) (
45	segment.Segment, uint64, error) {
46	return z.newWithChunkMode(results, DefaultChunkMode)
47}
48
49func (*ZapPlugin) newWithChunkMode(results []index.Document,
50	chunkMode uint32) (segment.Segment, uint64, error) {
51	s := interimPool.Get().(*interim)
52
53	var br bytes.Buffer
54	if s.lastNumDocs > 0 {
55		// use previous results to initialize the buf with an estimate
56		// size, but note that the interim instance comes from a
57		// global interimPool, so multiple scorch instances indexing
58		// different docs can lead to low quality estimates
59		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
60			NewSegmentBufferNumResultsFactor)
61		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
62			NewSegmentBufferAvgBytesPerDocFactor)
63		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
64	}
65
66	s.results = results
67	s.chunkMode = chunkMode
68	s.w = NewCountHashWriter(&br)
69
70	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
71		err := s.convert()
72	if err != nil {
73		return nil, uint64(0), err
74	}
75
76	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkMode,
77		s.FieldsMap, s.FieldsInv, uint64(len(results)),
78		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
79
80	if err == nil && s.reset() == nil {
81		s.lastNumDocs = len(results)
82		s.lastOutSize = len(br.Bytes())
83		interimPool.Put(s)
84	}
85
86	return sb, uint64(len(br.Bytes())), err
87}
88
89var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
90
91// interim holds temporary working data used while converting from
92// analysis results to a zap-encoded segment
93type interim struct {
94	results []index.Document
95
96	chunkMode uint32
97
98	w *CountHashWriter
99
100	// FieldsMap adds 1 to field id to avoid zero value issues
101	//  name -> field id + 1
102	FieldsMap map[string]uint16
103
104	// FieldsInv is the inverse of FieldsMap
105	//  field id -> name
106	FieldsInv []string
107
108	// Term dictionaries for each field
109	//  field id -> term -> postings list id + 1
110	Dicts []map[string]uint64
111
112	// Terms for each field, where terms are sorted ascending
113	//  field id -> []term
114	DictKeys [][]string
115
116	// Fields whose IncludeDocValues is true
117	//  field id -> bool
118	IncludeDocValues []bool
119
120	// postings id -> bitmap of docNums
121	Postings []*roaring.Bitmap
122
123	// postings id -> freq/norm's, one for each docNum in postings
124	FreqNorms        [][]interimFreqNorm
125	freqNormsBacking []interimFreqNorm
126
127	// postings id -> locs, one for each freq
128	Locs        [][]interimLoc
129	locsBacking []interimLoc
130
131	numTermsPerPostingsList []int // key is postings list id
132	numLocsPerPostingsList  []int // key is postings list id
133
134	builder    *vellum.Builder
135	builderBuf bytes.Buffer
136
137	metaBuf bytes.Buffer
138
139	tmp0 []byte
140	tmp1 []byte
141
142	lastNumDocs int
143	lastOutSize int
144}
145
146func (s *interim) reset() (err error) {
147	s.results = nil
148	s.chunkMode = 0
149	s.w = nil
150	s.FieldsMap = nil
151	s.FieldsInv = nil
152	for i := range s.Dicts {
153		s.Dicts[i] = nil
154	}
155	s.Dicts = s.Dicts[:0]
156	for i := range s.DictKeys {
157		s.DictKeys[i] = s.DictKeys[i][:0]
158	}
159	s.DictKeys = s.DictKeys[:0]
160	for i := range s.IncludeDocValues {
161		s.IncludeDocValues[i] = false
162	}
163	s.IncludeDocValues = s.IncludeDocValues[:0]
164	for _, idn := range s.Postings {
165		idn.Clear()
166	}
167	s.Postings = s.Postings[:0]
168	s.FreqNorms = s.FreqNorms[:0]
169	for i := range s.freqNormsBacking {
170		s.freqNormsBacking[i] = interimFreqNorm{}
171	}
172	s.freqNormsBacking = s.freqNormsBacking[:0]
173	s.Locs = s.Locs[:0]
174	for i := range s.locsBacking {
175		s.locsBacking[i] = interimLoc{}
176	}
177	s.locsBacking = s.locsBacking[:0]
178	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
179	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
180	s.builderBuf.Reset()
181	if s.builder != nil {
182		err = s.builder.Reset(&s.builderBuf)
183	}
184	s.metaBuf.Reset()
185	s.tmp0 = s.tmp0[:0]
186	s.tmp1 = s.tmp1[:0]
187	s.lastNumDocs = 0
188	s.lastOutSize = 0
189
190	return err
191}
192
193func (s *interim) grabBuf(size int) []byte {
194	buf := s.tmp0
195	if cap(buf) < size {
196		buf = make([]byte, size)
197		s.tmp0 = buf
198	}
199	return buf[0:size]
200}
201
202type interimStoredField struct {
203	vals      [][]byte
204	typs      []byte
205	arrayposs [][]uint64 // array positions
206}
207
208type interimFreqNorm struct {
209	freq    uint64
210	norm    float32
211	numLocs int
212}
213
214type interimLoc struct {
215	fieldID   uint16
216	pos       uint64
217	start     uint64
218	end       uint64
219	arrayposs []uint64
220}
221
222func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
223	s.FieldsMap = map[string]uint16{}
224
225	s.getOrDefineField("_id") // _id field is fieldID 0
226
227	for _, result := range s.results {
228		result.VisitComposite(func(field index.CompositeField) {
229			s.getOrDefineField(field.Name())
230		})
231		result.VisitFields(func(field index.Field) {
232			s.getOrDefineField(field.Name())
233		})
234	}
235
236	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
237
238	for fieldID, fieldName := range s.FieldsInv {
239		s.FieldsMap[fieldName] = uint16(fieldID + 1)
240	}
241
242	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
243		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
244	} else {
245		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
246	}
247
248	s.prepareDicts()
249
250	for _, dict := range s.DictKeys {
251		sort.Strings(dict)
252	}
253
254	s.processDocuments()
255
256	storedIndexOffset, err := s.writeStoredFields()
257	if err != nil {
258		return 0, 0, 0, nil, err
259	}
260
261	var fdvIndexOffset uint64
262	var dictOffsets []uint64
263
264	if len(s.results) > 0 {
265		fdvIndexOffset, dictOffsets, err = s.writeDicts()
266		if err != nil {
267			return 0, 0, 0, nil, err
268		}
269	} else {
270		dictOffsets = make([]uint64, len(s.FieldsInv))
271	}
272
273	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
274	if err != nil {
275		return 0, 0, 0, nil, err
276	}
277
278	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
279}
280
281func (s *interim) getOrDefineField(fieldName string) int {
282	fieldIDPlus1, exists := s.FieldsMap[fieldName]
283	if !exists {
284		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
285		s.FieldsMap[fieldName] = fieldIDPlus1
286		s.FieldsInv = append(s.FieldsInv, fieldName)
287
288		s.Dicts = append(s.Dicts, make(map[string]uint64))
289
290		n := len(s.DictKeys)
291		if n < cap(s.DictKeys) {
292			s.DictKeys = s.DictKeys[:n+1]
293			s.DictKeys[n] = s.DictKeys[n][:0]
294		} else {
295			s.DictKeys = append(s.DictKeys, []string(nil))
296		}
297	}
298
299	return int(fieldIDPlus1 - 1)
300}
301
302// fill Dicts and DictKeys from analysis results
303func (s *interim) prepareDicts() {
304	var pidNext int
305
306	var totTFs int
307	var totLocs int
308
309	visitField := func(field index.Field) {
310		fieldID := uint16(s.getOrDefineField(field.Name()))
311
312		dict := s.Dicts[fieldID]
313		dictKeys := s.DictKeys[fieldID]
314
315		tfs := field.AnalyzedTokenFrequencies()
316		for term, tf := range tfs {
317			pidPlus1, exists := dict[term]
318			if !exists {
319				pidNext++
320				pidPlus1 = uint64(pidNext)
321
322				dict[term] = pidPlus1
323				dictKeys = append(dictKeys, term)
324
325				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
326				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
327			}
328
329			pid := pidPlus1 - 1
330
331			s.numTermsPerPostingsList[pid] += 1
332			s.numLocsPerPostingsList[pid] += len(tf.Locations)
333
334			totLocs += len(tf.Locations)
335		}
336
337		totTFs += len(tfs)
338
339		s.DictKeys[fieldID] = dictKeys
340	}
341
342	for _, result := range s.results {
343		// walk each composite field
344		result.VisitComposite(func(field index.CompositeField) {
345			visitField(field)
346		})
347
348		// walk each field
349		result.VisitFields(visitField)
350	}
351
352	numPostingsLists := pidNext
353
354	if cap(s.Postings) >= numPostingsLists {
355		s.Postings = s.Postings[:numPostingsLists]
356	} else {
357		postings := make([]*roaring.Bitmap, numPostingsLists)
358		copy(postings, s.Postings[:cap(s.Postings)])
359		for i := 0; i < numPostingsLists; i++ {
360			if postings[i] == nil {
361				postings[i] = roaring.New()
362			}
363		}
364		s.Postings = postings
365	}
366
367	if cap(s.FreqNorms) >= numPostingsLists {
368		s.FreqNorms = s.FreqNorms[:numPostingsLists]
369	} else {
370		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
371	}
372
373	if cap(s.freqNormsBacking) >= totTFs {
374		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
375	} else {
376		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
377	}
378
379	freqNormsBacking := s.freqNormsBacking
380	for pid, numTerms := range s.numTermsPerPostingsList {
381		s.FreqNorms[pid] = freqNormsBacking[0:0]
382		freqNormsBacking = freqNormsBacking[numTerms:]
383	}
384
385	if cap(s.Locs) >= numPostingsLists {
386		s.Locs = s.Locs[:numPostingsLists]
387	} else {
388		s.Locs = make([][]interimLoc, numPostingsLists)
389	}
390
391	if cap(s.locsBacking) >= totLocs {
392		s.locsBacking = s.locsBacking[:totLocs]
393	} else {
394		s.locsBacking = make([]interimLoc, totLocs)
395	}
396
397	locsBacking := s.locsBacking
398	for pid, numLocs := range s.numLocsPerPostingsList {
399		s.Locs[pid] = locsBacking[0:0]
400		locsBacking = locsBacking[numLocs:]
401	}
402}
403
404func (s *interim) processDocuments() {
405	numFields := len(s.FieldsInv)
406	reuseFieldLens := make([]int, numFields)
407	reuseFieldTFs := make([]index.TokenFrequencies, numFields)
408
409	for docNum, result := range s.results {
410		for i := 0; i < numFields; i++ { // clear these for reuse
411			reuseFieldLens[i] = 0
412			reuseFieldTFs[i] = nil
413		}
414
415		s.processDocument(uint64(docNum), result,
416			reuseFieldLens, reuseFieldTFs)
417	}
418}
419
420func (s *interim) processDocument(docNum uint64,
421	result index.Document,
422	fieldLens []int, fieldTFs []index.TokenFrequencies) {
423	visitField := func(field index.Field) {
424		fieldID := uint16(s.getOrDefineField(field.Name()))
425		fieldLens[fieldID] += field.AnalyzedLength()
426
427		existingFreqs := fieldTFs[fieldID]
428		if existingFreqs != nil {
429			existingFreqs.MergeAll(field.Name(), field.AnalyzedTokenFrequencies())
430		} else {
431			fieldTFs[fieldID] = field.AnalyzedTokenFrequencies()
432		}
433	}
434
435	// walk each composite field
436	result.VisitComposite(func(field index.CompositeField) {
437		visitField(field)
438	})
439
440	// walk each field
441	result.VisitFields(visitField)
442
443	// now that it's been rolled up into fieldTFs, walk that
444	for fieldID, tfs := range fieldTFs {
445		dict := s.Dicts[fieldID]
446		norm := math.Float32frombits(uint32(fieldLens[fieldID]))
447
448		for term, tf := range tfs {
449			pid := dict[term] - 1
450			bs := s.Postings[pid]
451			bs.Add(uint32(docNum))
452
453			s.FreqNorms[pid] = append(s.FreqNorms[pid],
454				interimFreqNorm{
455					freq:    uint64(tf.Frequency()),
456					norm:    norm,
457					numLocs: len(tf.Locations),
458				})
459
460			if len(tf.Locations) > 0 {
461				locs := s.Locs[pid]
462
463				for _, loc := range tf.Locations {
464					var locf = uint16(fieldID)
465					if loc.Field != "" {
466						locf = uint16(s.getOrDefineField(loc.Field))
467					}
468					var arrayposs []uint64
469					if len(loc.ArrayPositions) > 0 {
470						arrayposs = loc.ArrayPositions
471					}
472					locs = append(locs, interimLoc{
473						fieldID:   locf,
474						pos:       uint64(loc.Position),
475						start:     uint64(loc.Start),
476						end:       uint64(loc.End),
477						arrayposs: arrayposs,
478					})
479				}
480
481				s.Locs[pid] = locs
482			}
483		}
484	}
485}
486
487func (s *interim) writeStoredFields() (
488	storedIndexOffset uint64, err error) {
489	varBuf := make([]byte, binary.MaxVarintLen64)
490	metaEncode := func(val uint64) (int, error) {
491		wb := binary.PutUvarint(varBuf, val)
492		return s.metaBuf.Write(varBuf[:wb])
493	}
494
495	data, compressed := s.tmp0[:0], s.tmp1[:0]
496	defer func() { s.tmp0, s.tmp1 = data, compressed }()
497
498	// keyed by docNum
499	docStoredOffsets := make([]uint64, len(s.results))
500
501	// keyed by fieldID, for the current doc in the loop
502	docStoredFields := map[uint16]interimStoredField{}
503
504	for docNum, result := range s.results {
505		for fieldID := range docStoredFields { // reset for next doc
506			delete(docStoredFields, fieldID)
507		}
508
509		var validationErr error
510		result.VisitFields(func(field index.Field) {
511			fieldID := uint16(s.getOrDefineField(field.Name()))
512
513			if field.Options().IsStored() {
514				isf := docStoredFields[fieldID]
515				isf.vals = append(isf.vals, field.Value())
516				isf.typs = append(isf.typs, field.EncodedFieldType())
517				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
518				docStoredFields[fieldID] = isf
519			}
520
521			if field.Options().IncludeDocValues() {
522				s.IncludeDocValues[fieldID] = true
523			}
524
525			err := ValidateDocFields(field)
526			if err != nil && validationErr == nil {
527				validationErr = err
528			}
529		})
530		if validationErr != nil {
531			return 0, validationErr
532		}
533
534		var curr int
535
536		s.metaBuf.Reset()
537		data = data[:0]
538
539		// _id field special case optimizes ExternalID() lookups
540		idFieldVal := docStoredFields[uint16(0)].vals[0]
541		_, err = metaEncode(uint64(len(idFieldVal)))
542		if err != nil {
543			return 0, err
544		}
545
546		// handle non-"_id" fields
547		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
548			isf, exists := docStoredFields[uint16(fieldID)]
549			if exists {
550				curr, data, err = persistStoredFieldValues(
551					fieldID, isf.vals, isf.typs, isf.arrayposs,
552					curr, metaEncode, data)
553				if err != nil {
554					return 0, err
555				}
556			}
557		}
558
559		metaBytes := s.metaBuf.Bytes()
560
561		compressed = snappy.Encode(compressed[:cap(compressed)], data)
562
563		docStoredOffsets[docNum] = uint64(s.w.Count())
564
565		_, err := writeUvarints(s.w,
566			uint64(len(metaBytes)),
567			uint64(len(idFieldVal)+len(compressed)))
568		if err != nil {
569			return 0, err
570		}
571
572		_, err = s.w.Write(metaBytes)
573		if err != nil {
574			return 0, err
575		}
576
577		_, err = s.w.Write(idFieldVal)
578		if err != nil {
579			return 0, err
580		}
581
582		_, err = s.w.Write(compressed)
583		if err != nil {
584			return 0, err
585		}
586	}
587
588	storedIndexOffset = uint64(s.w.Count())
589
590	for _, docStoredOffset := range docStoredOffsets {
591		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
592		if err != nil {
593			return 0, err
594		}
595	}
596
597	return storedIndexOffset, nil
598}
599
600func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
601	dictOffsets = make([]uint64, len(s.FieldsInv))
602
603	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
604	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
605
606	buf := s.grabBuf(binary.MaxVarintLen64)
607
608	// these int coders are initialized with chunk size 1024
609	// however this will be reset to the correct chunk size
610	// while processing each individual field-term section
611	tfEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1))
612	locEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1))
613
614	var docTermMap [][]byte
615
616	if s.builder == nil {
617		s.builder, err = vellum.New(&s.builderBuf, nil)
618		if err != nil {
619			return 0, nil, err
620		}
621	}
622
623	for fieldID, terms := range s.DictKeys {
624		if cap(docTermMap) < len(s.results) {
625			docTermMap = make([][]byte, len(s.results))
626		} else {
627			docTermMap = docTermMap[0:len(s.results)]
628			for docNum := range docTermMap { // reset the docTermMap
629				docTermMap[docNum] = docTermMap[docNum][:0]
630			}
631		}
632
633		dict := s.Dicts[fieldID]
634
635		for _, term := range terms { // terms are already sorted
636			pid := dict[term] - 1
637
638			postingsBS := s.Postings[pid]
639
640			freqNorms := s.FreqNorms[pid]
641			freqNormOffset := 0
642
643			locs := s.Locs[pid]
644			locOffset := 0
645
646			chunkSize, err := getChunkSize(s.chunkMode, postingsBS.GetCardinality(), uint64(len(s.results)))
647			if err != nil {
648				return 0, nil, err
649			}
650			tfEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1))
651			locEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1))
652
653			postingsItr := postingsBS.Iterator()
654			for postingsItr.HasNext() {
655				docNum := uint64(postingsItr.Next())
656
657				freqNorm := freqNorms[freqNormOffset]
658
659				// check if freq/norm is enabled
660				if freqNorm.freq > 0 {
661					err = tfEncoder.Add(docNum,
662						encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
663						uint64(math.Float32bits(freqNorm.norm)))
664				} else {
665					// if disabled, then skip the norm part
666					err = tfEncoder.Add(docNum,
667						encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0))
668				}
669				if err != nil {
670					return 0, nil, err
671				}
672
673				if freqNorm.numLocs > 0 {
674					numBytesLocs := 0
675					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
676						numBytesLocs += totalUvarintBytes(
677							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
678							uint64(len(loc.arrayposs)), loc.arrayposs)
679					}
680
681					err = locEncoder.Add(docNum, uint64(numBytesLocs))
682					if err != nil {
683						return 0, nil, err
684					}
685
686					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
687						err = locEncoder.Add(docNum,
688							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
689							uint64(len(loc.arrayposs)))
690						if err != nil {
691							return 0, nil, err
692						}
693
694						err = locEncoder.Add(docNum, loc.arrayposs...)
695						if err != nil {
696							return 0, nil, err
697						}
698					}
699
700					locOffset += freqNorm.numLocs
701				}
702
703				freqNormOffset++
704
705				docTermMap[docNum] = append(
706					append(docTermMap[docNum], term...),
707					termSeparator)
708			}
709
710			tfEncoder.Close()
711			locEncoder.Close()
712
713			postingsOffset, err :=
714				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
715			if err != nil {
716				return 0, nil, err
717			}
718
719			if postingsOffset > uint64(0) {
720				err = s.builder.Insert([]byte(term), postingsOffset)
721				if err != nil {
722					return 0, nil, err
723				}
724			}
725
726			tfEncoder.Reset()
727			locEncoder.Reset()
728		}
729
730		err = s.builder.Close()
731		if err != nil {
732			return 0, nil, err
733		}
734
735		// record where this dictionary starts
736		dictOffsets[fieldID] = uint64(s.w.Count())
737
738		vellumData := s.builderBuf.Bytes()
739
740		// write out the length of the vellum data
741		n := binary.PutUvarint(buf, uint64(len(vellumData)))
742		_, err = s.w.Write(buf[:n])
743		if err != nil {
744			return 0, nil, err
745		}
746
747		// write this vellum to disk
748		_, err = s.w.Write(vellumData)
749		if err != nil {
750			return 0, nil, err
751		}
752
753		// reset vellum for reuse
754		s.builderBuf.Reset()
755
756		err = s.builder.Reset(&s.builderBuf)
757		if err != nil {
758			return 0, nil, err
759		}
760
761		// write the field doc values
762		// NOTE: doc values continue to use legacy chunk mode
763		chunkSize, err := getChunkSize(LegacyChunkMode, 0, 0)
764		if err != nil {
765			return 0, nil, err
766		}
767		fdvEncoder := newChunkedContentCoder(chunkSize, uint64(len(s.results)-1), s.w, false)
768		if s.IncludeDocValues[fieldID] {
769			for docNum, docTerms := range docTermMap {
770				if len(docTerms) > 0 {
771					err = fdvEncoder.Add(uint64(docNum), docTerms)
772					if err != nil {
773						return 0, nil, err
774					}
775				}
776			}
777			err = fdvEncoder.Close()
778			if err != nil {
779				return 0, nil, err
780			}
781
782			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
783
784			_, err = fdvEncoder.Write()
785			if err != nil {
786				return 0, nil, err
787			}
788
789			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
790
791			fdvEncoder.Reset()
792		} else {
793			fdvOffsetsStart[fieldID] = fieldNotUninverted
794			fdvOffsetsEnd[fieldID] = fieldNotUninverted
795		}
796	}
797
798	fdvIndexOffset = uint64(s.w.Count())
799
800	for i := 0; i < len(fdvOffsetsStart); i++ {
801		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
802		_, err := s.w.Write(buf[:n])
803		if err != nil {
804			return 0, nil, err
805		}
806		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
807		_, err = s.w.Write(buf[:n])
808		if err != nil {
809			return 0, nil, err
810		}
811	}
812
813	return fdvIndexOffset, dictOffsets, nil
814}
815
816// returns the total # of bytes needed to encode the given uint64's
817// into binary.PutUVarint() encoding
818func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
819	n = numUvarintBytes(a)
820	n += numUvarintBytes(b)
821	n += numUvarintBytes(c)
822	n += numUvarintBytes(d)
823	n += numUvarintBytes(e)
824	for _, v := range more {
825		n += numUvarintBytes(v)
826	}
827	return n
828}
829
830// returns # of bytes needed to encode x in binary.PutUvarint() encoding
831func numUvarintBytes(x uint64) (n int) {
832	for x >= 0x80 {
833		x >>= 7
834		n++
835	}
836	return n + 1
837}
838