1//  Copyright (c) 2018 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"bytes"
19	"encoding/binary"
20	"math"
21	"sort"
22	"sync"
23
24	"github.com/RoaringBitmap/roaring"
25	"github.com/blevesearch/bleve/analysis"
26	"github.com/blevesearch/bleve/document"
27	"github.com/blevesearch/bleve/index"
28	"github.com/blevesearch/bleve/index/scorch/segment"
29	"github.com/couchbase/vellum"
30	"github.com/golang/snappy"
31)
32
33var NewSegmentBufferNumResultsBump int = 100
34var NewSegmentBufferNumResultsFactor float64 = 1.0
35var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
36
37// ValidateDocFields can be set by applications to perform additional checks
38// on fields in a document being added to a new segment, by default it does
39// nothing.
40// This API is experimental and may be removed at any time.
41var ValidateDocFields = func(field document.Field) error {
42	return nil
43}
44
45var defaultChunkFactor uint32 = 1024
46
47// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
48// SegmentBase from analysis results
49func (z *ZapPlugin) New(results []*index.AnalysisResult) (
50	segment.Segment, uint64, error) {
51	return z.newWithChunkFactor(results, defaultChunkFactor)
52}
53
54func (*ZapPlugin) newWithChunkFactor(results []*index.AnalysisResult,
55	chunkFactor uint32) (segment.Segment, uint64, error) {
56	s := interimPool.Get().(*interim)
57
58	var br bytes.Buffer
59	if s.lastNumDocs > 0 {
60		// use previous results to initialize the buf with an estimate
61		// size, but note that the interim instance comes from a
62		// global interimPool, so multiple scorch instances indexing
63		// different docs can lead to low quality estimates
64		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) *
65			NewSegmentBufferNumResultsFactor)
66		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) *
67			NewSegmentBufferAvgBytesPerDocFactor)
68		br.Grow(estimateAvgBytesPerDoc * estimateNumResults)
69	}
70
71	s.results = results
72	s.chunkFactor = chunkFactor
73	s.w = NewCountHashWriter(&br)
74
75	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets,
76		err := s.convert()
77	if err != nil {
78		return nil, uint64(0), err
79	}
80
81	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor,
82		s.FieldsMap, s.FieldsInv, uint64(len(results)),
83		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets)
84
85	if err == nil && s.reset() == nil {
86		s.lastNumDocs = len(results)
87		s.lastOutSize = len(br.Bytes())
88		interimPool.Put(s)
89	}
90
91	return sb, uint64(len(br.Bytes())), err
92}
93
94var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
95
96// interim holds temporary working data used while converting from
97// analysis results to a zap-encoded segment
98type interim struct {
99	results []*index.AnalysisResult
100
101	chunkFactor uint32
102
103	w *CountHashWriter
104
105	// FieldsMap adds 1 to field id to avoid zero value issues
106	//  name -> field id + 1
107	FieldsMap map[string]uint16
108
109	// FieldsInv is the inverse of FieldsMap
110	//  field id -> name
111	FieldsInv []string
112
113	// Term dictionaries for each field
114	//  field id -> term -> postings list id + 1
115	Dicts []map[string]uint64
116
117	// Terms for each field, where terms are sorted ascending
118	//  field id -> []term
119	DictKeys [][]string
120
121	// Fields whose IncludeDocValues is true
122	//  field id -> bool
123	IncludeDocValues []bool
124
125	// postings id -> bitmap of docNums
126	Postings []*roaring.Bitmap
127
128	// postings id -> freq/norm's, one for each docNum in postings
129	FreqNorms        [][]interimFreqNorm
130	freqNormsBacking []interimFreqNorm
131
132	// postings id -> locs, one for each freq
133	Locs        [][]interimLoc
134	locsBacking []interimLoc
135
136	numTermsPerPostingsList []int // key is postings list id
137	numLocsPerPostingsList  []int // key is postings list id
138
139	builder    *vellum.Builder
140	builderBuf bytes.Buffer
141
142	metaBuf bytes.Buffer
143
144	tmp0 []byte
145	tmp1 []byte
146
147	lastNumDocs int
148	lastOutSize int
149}
150
151func (s *interim) reset() (err error) {
152	s.results = nil
153	s.chunkFactor = 0
154	s.w = nil
155	s.FieldsMap = nil
156	s.FieldsInv = nil
157	for i := range s.Dicts {
158		s.Dicts[i] = nil
159	}
160	s.Dicts = s.Dicts[:0]
161	for i := range s.DictKeys {
162		s.DictKeys[i] = s.DictKeys[i][:0]
163	}
164	s.DictKeys = s.DictKeys[:0]
165	for i := range s.IncludeDocValues {
166		s.IncludeDocValues[i] = false
167	}
168	s.IncludeDocValues = s.IncludeDocValues[:0]
169	for _, idn := range s.Postings {
170		idn.Clear()
171	}
172	s.Postings = s.Postings[:0]
173	s.FreqNorms = s.FreqNorms[:0]
174	for i := range s.freqNormsBacking {
175		s.freqNormsBacking[i] = interimFreqNorm{}
176	}
177	s.freqNormsBacking = s.freqNormsBacking[:0]
178	s.Locs = s.Locs[:0]
179	for i := range s.locsBacking {
180		s.locsBacking[i] = interimLoc{}
181	}
182	s.locsBacking = s.locsBacking[:0]
183	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0]
184	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0]
185	s.builderBuf.Reset()
186	if s.builder != nil {
187		err = s.builder.Reset(&s.builderBuf)
188	}
189	s.metaBuf.Reset()
190	s.tmp0 = s.tmp0[:0]
191	s.tmp1 = s.tmp1[:0]
192	s.lastNumDocs = 0
193	s.lastOutSize = 0
194
195	return err
196}
197
198func (s *interim) grabBuf(size int) []byte {
199	buf := s.tmp0
200	if cap(buf) < size {
201		buf = make([]byte, size)
202		s.tmp0 = buf
203	}
204	return buf[0:size]
205}
206
207type interimStoredField struct {
208	vals      [][]byte
209	typs      []byte
210	arrayposs [][]uint64 // array positions
211}
212
213type interimFreqNorm struct {
214	freq    uint64
215	norm    float32
216	numLocs int
217}
218
219type interimLoc struct {
220	fieldID   uint16
221	pos       uint64
222	start     uint64
223	end       uint64
224	arrayposs []uint64
225}
226
227func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) {
228	s.FieldsMap = map[string]uint16{}
229
230	s.getOrDefineField("_id") // _id field is fieldID 0
231
232	for _, result := range s.results {
233		for _, field := range result.Document.CompositeFields {
234			s.getOrDefineField(field.Name())
235		}
236		for _, field := range result.Document.Fields {
237			s.getOrDefineField(field.Name())
238		}
239	}
240
241	sort.Strings(s.FieldsInv[1:]) // keep _id as first field
242
243	for fieldID, fieldName := range s.FieldsInv {
244		s.FieldsMap[fieldName] = uint16(fieldID + 1)
245	}
246
247	if cap(s.IncludeDocValues) >= len(s.FieldsInv) {
248		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)]
249	} else {
250		s.IncludeDocValues = make([]bool, len(s.FieldsInv))
251	}
252
253	s.prepareDicts()
254
255	for _, dict := range s.DictKeys {
256		sort.Strings(dict)
257	}
258
259	s.processDocuments()
260
261	storedIndexOffset, err := s.writeStoredFields()
262	if err != nil {
263		return 0, 0, 0, nil, err
264	}
265
266	var fdvIndexOffset uint64
267	var dictOffsets []uint64
268
269	if len(s.results) > 0 {
270		fdvIndexOffset, dictOffsets, err = s.writeDicts()
271		if err != nil {
272			return 0, 0, 0, nil, err
273		}
274	} else {
275		dictOffsets = make([]uint64, len(s.FieldsInv))
276	}
277
278	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets)
279	if err != nil {
280		return 0, 0, 0, nil, err
281	}
282
283	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil
284}
285
286func (s *interim) getOrDefineField(fieldName string) int {
287	fieldIDPlus1, exists := s.FieldsMap[fieldName]
288	if !exists {
289		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1)
290		s.FieldsMap[fieldName] = fieldIDPlus1
291		s.FieldsInv = append(s.FieldsInv, fieldName)
292
293		s.Dicts = append(s.Dicts, make(map[string]uint64))
294
295		n := len(s.DictKeys)
296		if n < cap(s.DictKeys) {
297			s.DictKeys = s.DictKeys[:n+1]
298			s.DictKeys[n] = s.DictKeys[n][:0]
299		} else {
300			s.DictKeys = append(s.DictKeys, []string(nil))
301		}
302	}
303
304	return int(fieldIDPlus1 - 1)
305}
306
307// fill Dicts and DictKeys from analysis results
308func (s *interim) prepareDicts() {
309	var pidNext int
310
311	var totTFs int
312	var totLocs int
313
314	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
315		dict := s.Dicts[fieldID]
316		dictKeys := s.DictKeys[fieldID]
317
318		for term, tf := range tfs {
319			pidPlus1, exists := dict[term]
320			if !exists {
321				pidNext++
322				pidPlus1 = uint64(pidNext)
323
324				dict[term] = pidPlus1
325				dictKeys = append(dictKeys, term)
326
327				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0)
328				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0)
329			}
330
331			pid := pidPlus1 - 1
332
333			s.numTermsPerPostingsList[pid] += 1
334			s.numLocsPerPostingsList[pid] += len(tf.Locations)
335
336			totLocs += len(tf.Locations)
337		}
338
339		totTFs += len(tfs)
340
341		s.DictKeys[fieldID] = dictKeys
342	}
343
344	for _, result := range s.results {
345		// walk each composite field
346		for _, field := range result.Document.CompositeFields {
347			fieldID := uint16(s.getOrDefineField(field.Name()))
348			_, tf := field.Analyze()
349			visitField(fieldID, tf)
350		}
351
352		// walk each field
353		for i, field := range result.Document.Fields {
354			fieldID := uint16(s.getOrDefineField(field.Name()))
355			tf := result.Analyzed[i]
356			visitField(fieldID, tf)
357		}
358	}
359
360	numPostingsLists := pidNext
361
362	if cap(s.Postings) >= numPostingsLists {
363		s.Postings = s.Postings[:numPostingsLists]
364	} else {
365		postings := make([]*roaring.Bitmap, numPostingsLists)
366		copy(postings, s.Postings[:cap(s.Postings)])
367		for i := 0; i < numPostingsLists; i++ {
368			if postings[i] == nil {
369				postings[i] = roaring.New()
370			}
371		}
372		s.Postings = postings
373	}
374
375	if cap(s.FreqNorms) >= numPostingsLists {
376		s.FreqNorms = s.FreqNorms[:numPostingsLists]
377	} else {
378		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists)
379	}
380
381	if cap(s.freqNormsBacking) >= totTFs {
382		s.freqNormsBacking = s.freqNormsBacking[:totTFs]
383	} else {
384		s.freqNormsBacking = make([]interimFreqNorm, totTFs)
385	}
386
387	freqNormsBacking := s.freqNormsBacking
388	for pid, numTerms := range s.numTermsPerPostingsList {
389		s.FreqNorms[pid] = freqNormsBacking[0:0]
390		freqNormsBacking = freqNormsBacking[numTerms:]
391	}
392
393	if cap(s.Locs) >= numPostingsLists {
394		s.Locs = s.Locs[:numPostingsLists]
395	} else {
396		s.Locs = make([][]interimLoc, numPostingsLists)
397	}
398
399	if cap(s.locsBacking) >= totLocs {
400		s.locsBacking = s.locsBacking[:totLocs]
401	} else {
402		s.locsBacking = make([]interimLoc, totLocs)
403	}
404
405	locsBacking := s.locsBacking
406	for pid, numLocs := range s.numLocsPerPostingsList {
407		s.Locs[pid] = locsBacking[0:0]
408		locsBacking = locsBacking[numLocs:]
409	}
410}
411
412func (s *interim) processDocuments() {
413	numFields := len(s.FieldsInv)
414	reuseFieldLens := make([]int, numFields)
415	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields)
416
417	for docNum, result := range s.results {
418		for i := 0; i < numFields; i++ { // clear these for reuse
419			reuseFieldLens[i] = 0
420			reuseFieldTFs[i] = nil
421		}
422
423		s.processDocument(uint64(docNum), result,
424			reuseFieldLens, reuseFieldTFs)
425	}
426}
427
428func (s *interim) processDocument(docNum uint64,
429	result *index.AnalysisResult,
430	fieldLens []int, fieldTFs []analysis.TokenFrequencies) {
431	visitField := func(fieldID uint16, fieldName string,
432		ln int, tf analysis.TokenFrequencies) {
433		fieldLens[fieldID] += ln
434
435		existingFreqs := fieldTFs[fieldID]
436		if existingFreqs != nil {
437			existingFreqs.MergeAll(fieldName, tf)
438		} else {
439			fieldTFs[fieldID] = tf
440		}
441	}
442
443	// walk each composite field
444	for _, field := range result.Document.CompositeFields {
445		fieldID := uint16(s.getOrDefineField(field.Name()))
446		ln, tf := field.Analyze()
447		visitField(fieldID, field.Name(), ln, tf)
448	}
449
450	// walk each field
451	for i, field := range result.Document.Fields {
452		fieldID := uint16(s.getOrDefineField(field.Name()))
453		ln := result.Length[i]
454		tf := result.Analyzed[i]
455		visitField(fieldID, field.Name(), ln, tf)
456	}
457
458	// now that it's been rolled up into fieldTFs, walk that
459	for fieldID, tfs := range fieldTFs {
460		dict := s.Dicts[fieldID]
461		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))
462
463		for term, tf := range tfs {
464			pid := dict[term] - 1
465			bs := s.Postings[pid]
466			bs.Add(uint32(docNum))
467
468			s.FreqNorms[pid] = append(s.FreqNorms[pid],
469				interimFreqNorm{
470					freq:    uint64(tf.Frequency()),
471					norm:    norm,
472					numLocs: len(tf.Locations),
473				})
474
475			if len(tf.Locations) > 0 {
476				locs := s.Locs[pid]
477
478				for _, loc := range tf.Locations {
479					var locf = uint16(fieldID)
480					if loc.Field != "" {
481						locf = uint16(s.getOrDefineField(loc.Field))
482					}
483					var arrayposs []uint64
484					if len(loc.ArrayPositions) > 0 {
485						arrayposs = loc.ArrayPositions
486					}
487					locs = append(locs, interimLoc{
488						fieldID:   locf,
489						pos:       uint64(loc.Position),
490						start:     uint64(loc.Start),
491						end:       uint64(loc.End),
492						arrayposs: arrayposs,
493					})
494				}
495
496				s.Locs[pid] = locs
497			}
498		}
499	}
500}
501
502func (s *interim) writeStoredFields() (
503	storedIndexOffset uint64, err error) {
504	varBuf := make([]byte, binary.MaxVarintLen64)
505	metaEncode := func(val uint64) (int, error) {
506		wb := binary.PutUvarint(varBuf, val)
507		return s.metaBuf.Write(varBuf[:wb])
508	}
509
510	data, compressed := s.tmp0[:0], s.tmp1[:0]
511	defer func() { s.tmp0, s.tmp1 = data, compressed }()
512
513	// keyed by docNum
514	docStoredOffsets := make([]uint64, len(s.results))
515
516	// keyed by fieldID, for the current doc in the loop
517	docStoredFields := map[uint16]interimStoredField{}
518
519	for docNum, result := range s.results {
520		for fieldID := range docStoredFields { // reset for next doc
521			delete(docStoredFields, fieldID)
522		}
523
524		for _, field := range result.Document.Fields {
525			fieldID := uint16(s.getOrDefineField(field.Name()))
526
527			opts := field.Options()
528
529			if opts.IsStored() {
530				isf := docStoredFields[fieldID]
531				isf.vals = append(isf.vals, field.Value())
532				isf.typs = append(isf.typs, encodeFieldType(field))
533				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions())
534				docStoredFields[fieldID] = isf
535			}
536
537			if opts.IncludeDocValues() {
538				s.IncludeDocValues[fieldID] = true
539			}
540
541			err := ValidateDocFields(field)
542			if err != nil {
543				return 0, err
544			}
545		}
546
547		var curr int
548
549		s.metaBuf.Reset()
550		data = data[:0]
551
552		// _id field special case optimizes ExternalID() lookups
553		idFieldVal := docStoredFields[uint16(0)].vals[0]
554		_, err = metaEncode(uint64(len(idFieldVal)))
555		if err != nil {
556			return 0, err
557		}
558
559		// handle non-"_id" fields
560		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ {
561			isf, exists := docStoredFields[uint16(fieldID)]
562			if exists {
563				curr, data, err = persistStoredFieldValues(
564					fieldID, isf.vals, isf.typs, isf.arrayposs,
565					curr, metaEncode, data)
566				if err != nil {
567					return 0, err
568				}
569			}
570		}
571
572		metaBytes := s.metaBuf.Bytes()
573
574		compressed = snappy.Encode(compressed[:cap(compressed)], data)
575
576		docStoredOffsets[docNum] = uint64(s.w.Count())
577
578		_, err := writeUvarints(s.w,
579			uint64(len(metaBytes)),
580			uint64(len(idFieldVal)+len(compressed)))
581		if err != nil {
582			return 0, err
583		}
584
585		_, err = s.w.Write(metaBytes)
586		if err != nil {
587			return 0, err
588		}
589
590		_, err = s.w.Write(idFieldVal)
591		if err != nil {
592			return 0, err
593		}
594
595		_, err = s.w.Write(compressed)
596		if err != nil {
597			return 0, err
598		}
599	}
600
601	storedIndexOffset = uint64(s.w.Count())
602
603	for _, docStoredOffset := range docStoredOffsets {
604		err = binary.Write(s.w, binary.BigEndian, docStoredOffset)
605		if err != nil {
606			return 0, err
607		}
608	}
609
610	return storedIndexOffset, nil
611}
612
613func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) {
614	dictOffsets = make([]uint64, len(s.FieldsInv))
615
616	fdvOffsetsStart := make([]uint64, len(s.FieldsInv))
617	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv))
618
619	buf := s.grabBuf(binary.MaxVarintLen64)
620
621	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
622	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1))
623	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false)
624
625	var docTermMap [][]byte
626
627	if s.builder == nil {
628		s.builder, err = vellum.New(&s.builderBuf, nil)
629		if err != nil {
630			return 0, nil, err
631		}
632	}
633
634	for fieldID, terms := range s.DictKeys {
635		if cap(docTermMap) < len(s.results) {
636			docTermMap = make([][]byte, len(s.results))
637		} else {
638			docTermMap = docTermMap[0:len(s.results)]
639			for docNum := range docTermMap { // reset the docTermMap
640				docTermMap[docNum] = docTermMap[docNum][:0]
641			}
642		}
643
644		dict := s.Dicts[fieldID]
645
646		for _, term := range terms { // terms are already sorted
647			pid := dict[term] - 1
648
649			postingsBS := s.Postings[pid]
650
651			freqNorms := s.FreqNorms[pid]
652			freqNormOffset := 0
653
654			locs := s.Locs[pid]
655			locOffset := 0
656
657			postingsItr := postingsBS.Iterator()
658			for postingsItr.HasNext() {
659				docNum := uint64(postingsItr.Next())
660
661				freqNorm := freqNorms[freqNormOffset]
662
663				err = tfEncoder.Add(docNum,
664					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0),
665					uint64(math.Float32bits(freqNorm.norm)))
666				if err != nil {
667					return 0, nil, err
668				}
669
670				if freqNorm.numLocs > 0 {
671					numBytesLocs := 0
672					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
673						numBytesLocs += totalUvarintBytes(
674							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
675							uint64(len(loc.arrayposs)), loc.arrayposs)
676					}
677
678					err = locEncoder.Add(docNum, uint64(numBytesLocs))
679					if err != nil {
680						return 0, nil, err
681					}
682
683					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] {
684						err = locEncoder.Add(docNum,
685							uint64(loc.fieldID), loc.pos, loc.start, loc.end,
686							uint64(len(loc.arrayposs)))
687						if err != nil {
688							return 0, nil, err
689						}
690
691						err = locEncoder.Add(docNum, loc.arrayposs...)
692						if err != nil {
693							return 0, nil, err
694						}
695					}
696
697					locOffset += freqNorm.numLocs
698				}
699
700				freqNormOffset++
701
702				docTermMap[docNum] = append(
703					append(docTermMap[docNum], term...),
704					termSeparator)
705			}
706
707			tfEncoder.Close()
708			locEncoder.Close()
709
710			postingsOffset, err :=
711				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf)
712			if err != nil {
713				return 0, nil, err
714			}
715
716			if postingsOffset > uint64(0) {
717				err = s.builder.Insert([]byte(term), postingsOffset)
718				if err != nil {
719					return 0, nil, err
720				}
721			}
722
723			tfEncoder.Reset()
724			locEncoder.Reset()
725		}
726
727		err = s.builder.Close()
728		if err != nil {
729			return 0, nil, err
730		}
731
732		// record where this dictionary starts
733		dictOffsets[fieldID] = uint64(s.w.Count())
734
735		vellumData := s.builderBuf.Bytes()
736
737		// write out the length of the vellum data
738		n := binary.PutUvarint(buf, uint64(len(vellumData)))
739		_, err = s.w.Write(buf[:n])
740		if err != nil {
741			return 0, nil, err
742		}
743
744		// write this vellum to disk
745		_, err = s.w.Write(vellumData)
746		if err != nil {
747			return 0, nil, err
748		}
749
750		// reset vellum for reuse
751		s.builderBuf.Reset()
752
753		err = s.builder.Reset(&s.builderBuf)
754		if err != nil {
755			return 0, nil, err
756		}
757
758		// write the field doc values
759		if s.IncludeDocValues[fieldID] {
760			for docNum, docTerms := range docTermMap {
761				if len(docTerms) > 0 {
762					err = fdvEncoder.Add(uint64(docNum), docTerms)
763					if err != nil {
764						return 0, nil, err
765					}
766				}
767			}
768			err = fdvEncoder.Close()
769			if err != nil {
770				return 0, nil, err
771			}
772
773			fdvOffsetsStart[fieldID] = uint64(s.w.Count())
774
775			_, err = fdvEncoder.Write()
776			if err != nil {
777				return 0, nil, err
778			}
779
780			fdvOffsetsEnd[fieldID] = uint64(s.w.Count())
781
782			fdvEncoder.Reset()
783		} else {
784			fdvOffsetsStart[fieldID] = fieldNotUninverted
785			fdvOffsetsEnd[fieldID] = fieldNotUninverted
786		}
787	}
788
789	fdvIndexOffset = uint64(s.w.Count())
790
791	for i := 0; i < len(fdvOffsetsStart); i++ {
792		n := binary.PutUvarint(buf, fdvOffsetsStart[i])
793		_, err := s.w.Write(buf[:n])
794		if err != nil {
795			return 0, nil, err
796		}
797		n = binary.PutUvarint(buf, fdvOffsetsEnd[i])
798		_, err = s.w.Write(buf[:n])
799		if err != nil {
800			return 0, nil, err
801		}
802	}
803
804	return fdvIndexOffset, dictOffsets, nil
805}
806
807func encodeFieldType(f document.Field) byte {
808	fieldType := byte('x')
809	switch f.(type) {
810	case *document.TextField:
811		fieldType = 't'
812	case *document.NumericField:
813		fieldType = 'n'
814	case *document.DateTimeField:
815		fieldType = 'd'
816	case *document.BooleanField:
817		fieldType = 'b'
818	case *document.GeoPointField:
819		fieldType = 'g'
820	case *document.CompositeField:
821		fieldType = 'c'
822	}
823	return fieldType
824}
825
826// returns the total # of bytes needed to encode the given uint64's
827// into binary.PutUVarint() encoding
828func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) {
829	n = numUvarintBytes(a)
830	n += numUvarintBytes(b)
831	n += numUvarintBytes(c)
832	n += numUvarintBytes(d)
833	n += numUvarintBytes(e)
834	for _, v := range more {
835		n += numUvarintBytes(v)
836	}
837	return n
838}
839
840// returns # of bytes needed to encode x in binary.PutUvarint() encoding
841func numUvarintBytes(x uint64) (n int) {
842	for x >= 0x80 {
843		x >>= 7
844		n++
845	}
846	return n + 1
847}
848