1//  Copyright (c) 2018 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"bytes"
19	"encoding/binary"
20	"math"
21	"sort"
22	"sync"
23
24	"github.com/RoaringBitmap/roaring"
25	"github.com/blevesearch/bleve/analysis"
26	"github.com/blevesearch/bleve/document"
27	"github.com/blevesearch/bleve/index"
28	"github.com/blevesearch/bleve/index/scorch/segment"
29	"github.com/couchbase/vellum"
30	"github.com/golang/snappy"
31)
32
33var NewSegmentBufferNumResultsBump int = 100
34var NewSegmentBufferNumResultsFactor float64 = 1.0
35var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
36
37// ValidateDocFields can be set by applications to perform additional checks
38// on fields in a document being added to a new segment, by default it does
39// nothing.
40// This API is experimental and may be removed at any time.
41var ValidateDocFields = func(field document.Field) error {
42	return nil
43}
44
45// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
46// SegmentBase from analysis results
47func (z *ZapPlugin) New(results []*index.AnalysisResult) (
48	segment.Segment, uint64, error) {
49	return z.newWithChunkMode(results, DefaultChunkMode)
50}
51
52func (*ZapPlugin) newWithChunkMode(results []*index.AnalysisResult,
53	chunkMode uint32) (segment.Segment, uint64, error) {
54	s := interimPool.Get().(*interim)
55
56	var br bytes.Buffer
57	if s.lastNumDocs > 0 {
58		// use previous results to initialize the buf with an estimate
59		// size, but note that the interim instance comes from a
60		// global interimPool, so multiple scorch instances indexing
61		// different docs can lead to low quality estimates
62		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
63			NewSegmentBufferNumResultsFactor)
64		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
65			NewSegmentBufferAvgBytesPerDocFactor)
66		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
67	}
68
69	s.results = results
70	s.chunkMode = chunkMode
71	s.w = NewCountHashWriter(&br)
72
73	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
74		err := s.convert()
75	if err != nil {
76		return nil, uint64(0), err
77	}
78
79	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkMode,
80		s.FieldsMap, s.FieldsInv, uint64(len(results)),
81		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
82
83	if err == nil && s.reset() == nil {
84		s.lastNumDocs = len(results)
85		s.lastOutSize = len(br.Bytes())
86		interimPool.Put(s)
87	}
88
89	return sb, uint64(len(br.Bytes())), err
90}
91
92var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
93
94// interim holds temporary working data used while converting from
95// analysis results to a zap-encoded segment
96type interim struct {
97	results []*index.AnalysisResult
98
99	chunkMode uint32
100
101	w *CountHashWriter
102
103	// FieldsMap adds 1 to field id to avoid zero value issues
104	//  name -> field id + 1
105	FieldsMap map[string]uint16
106
107	// FieldsInv is the inverse of FieldsMap
108	//  field id -> name
109	FieldsInv []string
110
111	// Term dictionaries for each field
112	//  field id -> term -> postings list id + 1
113	Dicts []map[string]uint64
114
115	// Terms for each field, where terms are sorted ascending
116	//  field id -> []term
117	DictKeys [][]string
118
119	// Fields whose IncludeDocValues is true
120	//  field id -> bool
121	IncludeDocValues []bool
122
123	// postings id -> bitmap of docNums
124	Postings []*roaring.Bitmap
125
126	// postings id -> freq/norm's, one for each docNum in postings
127	FreqNorms        [][]interimFreqNorm
128	freqNormsBacking []interimFreqNorm
129
130	// postings id -> locs, one for each freq
131	Locs        [][]interimLoc
132	locsBacking []interimLoc
133
134	numTermsPerPostingsList []int // key is postings list id
135	numLocsPerPostingsList  []int // key is postings list id
136
137	builder    *vellum.Builder
138	builderBuf bytes.Buffer
139
140	metaBuf bytes.Buffer
141
142	tmp0 []byte
143	tmp1 []byte
144
145	lastNumDocs int
146	lastOutSize int
147}
148
149func (s *interim) reset() (err error) {
150	s.results = nil
151	s.chunkMode = 0
152	s.w = nil
153	s.FieldsMap = nil
154	s.FieldsInv = nil
155	for i := range s.Dicts {
156		s.Dicts[i] = nil
157	}
158	s.Dicts = s.Dicts[:0]
159	for i := range s.DictKeys {
160		s.DictKeys[i] = s.DictKeys[i][:0]
161	}
162	s.DictKeys = s.DictKeys[:0]
163	for i := range s.IncludeDocValues {
164		s.IncludeDocValues[i] = false
165	}
166	s.IncludeDocValues = s.IncludeDocValues[:0]
167	for _, idn := range s.Postings {
168		idn.Clear()
169	}
170	s.Postings = s.Postings[:0]
171	s.FreqNorms = s.FreqNorms[:0]
172	for i := range s.freqNormsBacking {
173		s.freqNormsBacking[i] = interimFreqNorm{}
174	}
175	s.freqNormsBacking = s.freqNormsBacking[:0]
176	s.Locs = s.Locs[:0]
177	for i := range s.locsBacking {
178		s.locsBacking[i] = interimLoc{}
179	}
180	s.locsBacking = s.locsBacking[:0]
181	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
182	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
183	s.builderBuf.Reset()
184	if s.builder != nil {
185		err = s.builder.Reset(&s.builderBuf)
186	}
187	s.metaBuf.Reset()
188	s.tmp0 = s.tmp0[:0]
189	s.tmp1 = s.tmp1[:0]
190	s.lastNumDocs = 0
191	s.lastOutSize = 0
192
193	return err
194}
195
196func (s *interim) grabBuf(size int) []byte {
197	buf := s.tmp0
198	if cap(buf) < size {
199		buf = make([]byte, size)
200		s.tmp0 = buf
201	}
202	return buf[0:size]
203}
204
205type interimStoredField struct {
206	vals      [][]byte
207	typs      []byte
208	arrayposs [][]uint64 // array positions
209}
210
211type interimFreqNorm struct {
212	freq    uint64
213	norm    float32
214	numLocs int
215}
216
217type interimLoc struct {
218	fieldID   uint16
219	pos       uint64
220	start     uint64
221	end       uint64
222	arrayposs []uint64
223}
224
225func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
226	s.FieldsMap = map[string]uint16{}
227
228	s.getOrDefineField("_id") // _id field is fieldID 0
229
230	for _, result := range s.results {
231		for _, field := range result.Document.CompositeFields {
232			s.getOrDefineField(field.Name())
233		}
234		for _, field := range result.Document.Fields {
235			s.getOrDefineField(field.Name())
236		}
237	}
238
239	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
240
241	for fieldID, fieldName := range s.FieldsInv {
242		s.FieldsMap[fieldName] = uint16(fieldID + 1)
243	}
244
245	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
246		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
247	} else {
248		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
249	}
250
251	s.prepareDicts()
252
253	for _, dict := range s.DictKeys {
254		sort.Strings(dict)
255	}
256
257	s.processDocuments()
258
259	storedIndexOffset, err := s.writeStoredFields()
260	if err != nil {
261		return 0, 0, 0, nil, err
262	}
263
264	var fdvIndexOffset uint64
265	var dictOffsets []uint64
266
267	if len(s.results) > 0 {
268		fdvIndexOffset, dictOffsets, err = s.writeDicts()
269		if err != nil {
270			return 0, 0, 0, nil, err
271		}
272	} else {
273		dictOffsets = make([]uint64, len(s.FieldsInv))
274	}
275
276	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
277	if err != nil {
278		return 0, 0, 0, nil, err
279	}
280
281	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
282}
283
284func (s *interim) getOrDefineField(fieldName string) int {
285	fieldIDPlus1, exists := s.FieldsMap[fieldName]
286	if !exists {
287		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
288		s.FieldsMap[fieldName] = fieldIDPlus1
289		s.FieldsInv = append(s.FieldsInv, fieldName)
290
291		s.Dicts = append(s.Dicts, make(map[string]uint64))
292
293		n := len(s.DictKeys)
294		if n < cap(s.DictKeys) {
295			s.DictKeys = s.DictKeys[:n+1]
296			s.DictKeys[n] = s.DictKeys[n][:0]
297		} else {
298			s.DictKeys = append(s.DictKeys, []string(nil))
299		}
300	}
301
302	return int(fieldIDPlus1 - 1)
303}
304
305// fill Dicts and DictKeys from analysis results
306func (s *interim) prepareDicts() {
307	var pidNext int
308
309	var totTFs int
310	var totLocs int
311
312	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
313		dict := s.Dicts[fieldID]
314		dictKeys := s.DictKeys[fieldID]
315
316		for term, tf := range tfs {
317			pidPlus1, exists := dict[term]
318			if !exists {
319				pidNext++
320				pidPlus1 = uint64(pidNext)
321
322				dict[term] = pidPlus1
323				dictKeys = append(dictKeys, term)
324
325				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
326				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
327			}
328
329			pid := pidPlus1 - 1
330
331			s.numTermsPerPostingsList[pid] += 1
332			s.numLocsPerPostingsList[pid] += len(tf.Locations)
333
334			totLocs += len(tf.Locations)
335		}
336
337		totTFs += len(tfs)
338
339		s.DictKeys[fieldID] = dictKeys
340	}
341
342	for _, result := range s.results {
343		// walk each composite field
344		for _, field := range result.Document.CompositeFields {
345			fieldID := uint16(s.getOrDefineField(field.Name()))
346			_, tf := field.Analyze()
347			visitField(fieldID, tf)
348		}
349
350		// walk each field
351		for i, field := range result.Document.Fields {
352			fieldID := uint16(s.getOrDefineField(field.Name()))
353			tf := result.Analyzed[i]
354			visitField(fieldID, tf)
355		}
356	}
357
358	numPostingsLists := pidNext
359
360	if cap(s.Postings) >= numPostingsLists {
361		s.Postings = s.Postings[:numPostingsLists]
362	} else {
363		postings := make([]*roaring.Bitmap, numPostingsLists)
364		copy(postings, s.Postings[:cap(s.Postings)])
365		for i := 0; i < numPostingsLists; i++ {
366			if postings[i] == nil {
367				postings[i] = roaring.New()
368			}
369		}
370		s.Postings = postings
371	}
372
373	if cap(s.FreqNorms) >= numPostingsLists {
374		s.FreqNorms = s.FreqNorms[:numPostingsLists]
375	} else {
376		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
377	}
378
379	if cap(s.freqNormsBacking) >= totTFs {
380		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
381	} else {
382		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
383	}
384
385	freqNormsBacking := s.freqNormsBacking
386	for pid, numTerms := range s.numTermsPerPostingsList {
387		s.FreqNorms[pid] = freqNormsBacking[0:0]
388		freqNormsBacking = freqNormsBacking[numTerms:]
389	}
390
391	if cap(s.Locs) >= numPostingsLists {
392		s.Locs = s.Locs[:numPostingsLists]
393	} else {
394		s.Locs = make([][]interimLoc, numPostingsLists)
395	}
396
397	if cap(s.locsBacking) >= totLocs {
398		s.locsBacking = s.locsBacking[:totLocs]
399	} else {
400		s.locsBacking = make([]interimLoc, totLocs)
401	}
402
403	locsBacking := s.locsBacking
404	for pid, numLocs := range s.numLocsPerPostingsList {
405		s.Locs[pid] = locsBacking[0:0]
406		locsBacking = locsBacking[numLocs:]
407	}
408}
409
410func (s *interim) processDocuments() {
411	numFields := len(s.FieldsInv)
412	reuseFieldLens := make([]int, numFields)
413	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
414
415	for docNum, result := range s.results {
416		for i := 0; i < numFields; i++ { // clear these for reuse
417			reuseFieldLens[i] = 0
418			reuseFieldTFs[i] = nil
419		}
420
421		s.processDocument(uint64(docNum), result,
422			reuseFieldLens, reuseFieldTFs)
423	}
424}
425
426func (s *interim) processDocument(docNum uint64,
427	result *index.AnalysisResult,
428	fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
429	visitField := func(fieldID uint16, fieldName string,
430		ln int, tf analysis.TokenFrequencies) {
431		fieldLens[fieldID] += ln
432
433		existingFreqs := fieldTFs[fieldID]
434		if existingFreqs != nil {
435			existingFreqs.MergeAll(fieldName, tf)
436		} else {
437			fieldTFs[fieldID] = tf
438		}
439	}
440
441	// walk each composite field
442	for _, field := range result.Document.CompositeFields {
443		fieldID := uint16(s.getOrDefineField(field.Name()))
444		ln, tf := field.Analyze()
445		visitField(fieldID, field.Name(), ln, tf)
446	}
447
448	// walk each field
449	for i, field := range result.Document.Fields {
450		fieldID := uint16(s.getOrDefineField(field.Name()))
451		ln := result.Length[i]
452		tf := result.Analyzed[i]
453		visitField(fieldID, field.Name(), ln, tf)
454	}
455
456	// now that it's been rolled up into fieldTFs, walk that
457	for fieldID, tfs := range fieldTFs {
458		dict := s.Dicts[fieldID]
459		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
460
461		for term, tf := range tfs {
462			pid := dict[term] - 1
463			bs := s.Postings[pid]
464			bs.Add(uint32(docNum))
465
466			s.FreqNorms[pid] = append(s.FreqNorms[pid],
467				interimFreqNorm{
468					freq:    uint64(tf.Frequency()),
469					norm:    norm,
470					numLocs: len(tf.Locations),
471				})
472
473			if len(tf.Locations) > 0 {
474				locs := s.Locs[pid]
475
476				for _, loc := range tf.Locations {
477					var locf = uint16(fieldID)
478					if loc.Field != "" {
479						locf = uint16(s.getOrDefineField(loc.Field))
480					}
481					var arrayposs []uint64
482					if len(loc.ArrayPositions) > 0 {
483						arrayposs = loc.ArrayPositions
484					}
485					locs = append(locs, interimLoc{
486						fieldID:   locf,
487						pos:       uint64(loc.Position),
488						start:     uint64(loc.Start),
489						end:       uint64(loc.End),
490						arrayposs: arrayposs,
491					})
492				}
493
494				s.Locs[pid] = locs
495			}
496		}
497	}
498}
499
500func (s *interim) writeStoredFields() (
501	storedIndexOffset uint64, err error) {
502	varBuf := make([]byte, binary.MaxVarintLen64)
503	metaEncode := func(val uint64) (int, error) {
504		wb := binary.PutUvarint(varBuf, val)
505		return s.metaBuf.Write(varBuf[:wb])
506	}
507
508	data, compressed := s.tmp0[:0], s.tmp1[:0]
509	defer func() { s.tmp0, s.tmp1 = data, compressed }()
510
511	// keyed by docNum
512	docStoredOffsets := make([]uint64, len(s.results))
513
514	// keyed by fieldID, for the current doc in the loop
515	docStoredFields := map[uint16]interimStoredField{}
516
517	for docNum, result := range s.results {
518		for fieldID := range docStoredFields { // reset for next doc
519			delete(docStoredFields, fieldID)
520		}
521
522		for _, field := range result.Document.Fields {
523			fieldID := uint16(s.getOrDefineField(field.Name()))
524
525			opts := field.Options()
526
527			if opts.IsStored() {
528				isf := docStoredFields[fieldID]
529				isf.vals = append(isf.vals, field.Value())
530				isf.typs = append(isf.typs, encodeFieldType(field))
531				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
532				docStoredFields[fieldID] = isf
533			}
534
535			if opts.IncludeDocValues() {
536				s.IncludeDocValues[fieldID] = true
537			}
538
539			err := ValidateDocFields(field)
540			if err != nil {
541				return 0, err
542			}
543		}
544
545		var curr int
546
547		s.metaBuf.Reset()
548		data = data[:0]
549
550		// _id field special case optimizes ExternalID() lookups
551		idFieldVal := docStoredFields[uint16(0)].vals[0]
552		_, err = metaEncode(uint64(len(idFieldVal)))
553		if err != nil {
554			return 0, err
555		}
556
557		// handle non-"_id" fields
558		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
559			isf, exists := docStoredFields[uint16(fieldID)]
560			if exists {
561				curr, data, err = persistStoredFieldValues(
562					fieldID, isf.vals, isf.typs, isf.arrayposs,
563					curr, metaEncode, data)
564				if err != nil {
565					return 0, err
566				}
567			}
568		}
569
570		metaBytes := s.metaBuf.Bytes()
571
572		compressed = snappy.Encode(compressed[:cap(compressed)], data)
573
574		docStoredOffsets[docNum] = uint64(s.w.Count())
575
576		_, err := writeUvarints(s.w,
577			uint64(len(metaBytes)),
578			uint64(len(idFieldVal)+len(compressed)))
579		if err != nil {
580			return 0, err
581		}
582
583		_, err = s.w.Write(metaBytes)
584		if err != nil {
585			return 0, err
586		}
587
588		_, err = s.w.Write(idFieldVal)
589		if err != nil {
590			return 0, err
591		}
592
593		_, err = s.w.Write(compressed)
594		if err != nil {
595			return 0, err
596		}
597	}
598
599	storedIndexOffset = uint64(s.w.Count())
600
601	for _, docStoredOffset := range docStoredOffsets {
602		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
603		if err != nil {
604			return 0, err
605		}
606	}
607
608	return storedIndexOffset, nil
609}
610
611func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
612	dictOffsets = make([]uint64, len(s.FieldsInv))
613
614	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
615	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
616
617	buf := s.grabBuf(binary.MaxVarintLen64)
618
619	// these int coders are initialized with chunk size 1024
620	// however this will be reset to the correct chunk size
621	// while processing each individual field-term section
622	tfEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1))
623	locEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1))
624
625	var docTermMap [][]byte
626
627	if s.builder == nil {
628		s.builder, err = vellum.New(&s.builderBuf, nil)
629		if err != nil {
630			return 0, nil, err
631		}
632	}
633
634	for fieldID, terms := range s.DictKeys {
635		if cap(docTermMap) < len(s.results) {
636			docTermMap = make([][]byte, len(s.results))
637		} else {
638			docTermMap = docTermMap[0:len(s.results)]
639			for docNum := range docTermMap { // reset the docTermMap
640				docTermMap[docNum] = docTermMap[docNum][:0]
641			}
642		}
643
644		dict := s.Dicts[fieldID]
645
646		for _, term := range terms { // terms are already sorted
647			pid := dict[term] - 1
648
649			postingsBS := s.Postings[pid]
650
651			freqNorms := s.FreqNorms[pid]
652			freqNormOffset := 0
653
654			locs := s.Locs[pid]
655			locOffset := 0
656
657			chunkSize, err := getChunkSize(s.chunkMode, postingsBS.GetCardinality(), uint64(len(s.results)))
658			if err != nil {
659				return 0, nil, err
660			}
661			tfEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1))
662			locEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1))
663
664			postingsItr := postingsBS.Iterator()
665			for postingsItr.HasNext() {
666				docNum := uint64(postingsItr.Next())
667
668				freqNorm := freqNorms[freqNormOffset]
669
670				err = tfEncoder.Add(docNum,
671					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
672					uint64(math.Float32bits(freqNorm.norm)))
673				if err != nil {
674					return 0, nil, err
675				}
676
677				if freqNorm.numLocs > 0 {
678					numBytesLocs := 0
679					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
680						numBytesLocs += totalUvarintBytes(
681							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
682							uint64(len(loc.arrayposs)), loc.arrayposs)
683					}
684
685					err = locEncoder.Add(docNum, uint64(numBytesLocs))
686					if err != nil {
687						return 0, nil, err
688					}
689
690					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
691						err = locEncoder.Add(docNum,
692							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
693							uint64(len(loc.arrayposs)))
694						if err != nil {
695							return 0, nil, err
696						}
697
698						err = locEncoder.Add(docNum, loc.arrayposs...)
699						if err != nil {
700							return 0, nil, err
701						}
702					}
703
704					locOffset += freqNorm.numLocs
705				}
706
707				freqNormOffset++
708
709				docTermMap[docNum] = append(
710					append(docTermMap[docNum], term...),
711					termSeparator)
712			}
713
714			tfEncoder.Close()
715			locEncoder.Close()
716
717			postingsOffset, err :=
718				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
719			if err != nil {
720				return 0, nil, err
721			}
722
723			if postingsOffset > uint64(0) {
724				err = s.builder.Insert([]byte(term), postingsOffset)
725				if err != nil {
726					return 0, nil, err
727				}
728			}
729
730			tfEncoder.Reset()
731			locEncoder.Reset()
732		}
733
734		err = s.builder.Close()
735		if err != nil {
736			return 0, nil, err
737		}
738
739		// record where this dictionary starts
740		dictOffsets[fieldID] = uint64(s.w.Count())
741
742		vellumData := s.builderBuf.Bytes()
743
744		// write out the length of the vellum data
745		n := binary.PutUvarint(buf, uint64(len(vellumData)))
746		_, err = s.w.Write(buf[:n])
747		if err != nil {
748			return 0, nil, err
749		}
750
751		// write this vellum to disk
752		_, err = s.w.Write(vellumData)
753		if err != nil {
754			return 0, nil, err
755		}
756
757		// reset vellum for reuse
758		s.builderBuf.Reset()
759
760		err = s.builder.Reset(&s.builderBuf)
761		if err != nil {
762			return 0, nil, err
763		}
764
765		// write the field doc values
766		// NOTE: doc values continue to use legacy chunk mode
767		chunkSize, err := getChunkSize(LegacyChunkMode, 0, 0)
768		if err != nil {
769			return 0, nil, err
770		}
771		fdvEncoder := newChunkedContentCoder(chunkSize, uint64(len(s.results)-1), s.w, false)
772		if s.IncludeDocValues[fieldID] {
773			for docNum, docTerms := range docTermMap {
774				if len(docTerms) > 0 {
775					err = fdvEncoder.Add(uint64(docNum), docTerms)
776					if err != nil {
777						return 0, nil, err
778					}
779				}
780			}
781			err = fdvEncoder.Close()
782			if err != nil {
783				return 0, nil, err
784			}
785
786			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
787
788			_, err = fdvEncoder.Write()
789			if err != nil {
790				return 0, nil, err
791			}
792
793			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
794
795			fdvEncoder.Reset()
796		} else {
797			fdvOffsetsStart[fieldID] = fieldNotUninverted
798			fdvOffsetsEnd[fieldID] = fieldNotUninverted
799		}
800	}
801
802	fdvIndexOffset = uint64(s.w.Count())
803
804	for i := 0; i < len(fdvOffsetsStart); i++ {
805		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
806		_, err := s.w.Write(buf[:n])
807		if err != nil {
808			return 0, nil, err
809		}
810		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
811		_, err = s.w.Write(buf[:n])
812		if err != nil {
813			return 0, nil, err
814		}
815	}
816
817	return fdvIndexOffset, dictOffsets, nil
818}
819
820func encodeFieldType(f document.Field) byte {
821	fieldType := byte('x')
822	switch f.(type) {
823	case *document.TextField:
824		fieldType = 't'
825	case *document.NumericField:
826		fieldType = 'n'
827	case *document.DateTimeField:
828		fieldType = 'd'
829	case *document.BooleanField:
830		fieldType = 'b'
831	case *document.GeoPointField:
832		fieldType = 'g'
833	case *document.CompositeField:
834		fieldType = 'c'
835	}
836	return fieldType
837}
838
839// returns the total # of bytes needed to encode the given uint64's
840// into binary.PutUVarint() encoding
841func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
842	n = numUvarintBytes(a)
843	n += numUvarintBytes(b)
844	n += numUvarintBytes(c)
845	n += numUvarintBytes(d)
846	n += numUvarintBytes(e)
847	for _, v := range more {
848		n += numUvarintBytes(v)
849	}
850	return n
851}
852
853// returns # of bytes needed to encode x in binary.PutUvarint() encoding
854func numUvarintBytes(x uint64) (n int) {
855	for x >= 0x80 {
856		x >>= 7
857		n++
858	}
859	return n + 1
860}
861