1// Copyright (c) 2018 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "sort" 22 "sync" 23 24 "github.com/RoaringBitmap/roaring" 25 "github.com/blevesearch/bleve/analysis" 26 "github.com/blevesearch/bleve/document" 27 "github.com/blevesearch/bleve/index" 28 "github.com/blevesearch/bleve/index/scorch/segment" 29 "github.com/couchbase/vellum" 30 "github.com/golang/snappy" 31) 32 33var NewSegmentBufferNumResultsBump int = 100 34var NewSegmentBufferNumResultsFactor float64 = 1.0 35var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 36 37// ValidateDocFields can be set by applications to perform additional checks 38// on fields in a document being added to a new segment, by default it does 39// nothing. 40// This API is experimental and may be removed at any time. 41var ValidateDocFields = func(field document.Field) error { 42 return nil 43} 44 45var defaultChunkFactor uint32 = 1024 46 47// AnalysisResultsToSegmentBase produces an in-memory zap-encoded 48// SegmentBase from analysis results 49func (z *ZapPlugin) New(results []*index.AnalysisResult) ( 50 segment.Segment, uint64, error) { 51 return z.newWithChunkFactor(results, defaultChunkFactor) 52} 53 54func (*ZapPlugin) newWithChunkFactor(results []*index.AnalysisResult, 55 chunkFactor uint32) (segment.Segment, uint64, error) { 56 s := interimPool.Get().(*interim) 57 58 var br bytes.Buffer 59 if s.lastNumDocs > 0 { 60 // use previous results to initialize the buf with an estimate 61 // size, but note that the interim instance comes from a 62 // global interimPool, so multiple scorch instances indexing 63 // different docs can lead to low quality estimates 64 estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * 65 NewSegmentBufferNumResultsFactor) 66 estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * 67 NewSegmentBufferAvgBytesPerDocFactor) 68 br.Grow(estimateAvgBytesPerDoc * estimateNumResults) 69 } 70 71 s.results = results 72 s.chunkFactor = chunkFactor 73 s.w = NewCountHashWriter(&br) 74 75 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, 76 err := s.convert() 77 if err != nil { 78 return nil, uint64(0), err 79 } 80 81 sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor, 82 s.FieldsMap, s.FieldsInv, uint64(len(results)), 83 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) 84 85 if err == nil && s.reset() == nil { 86 s.lastNumDocs = len(results) 87 s.lastOutSize = len(br.Bytes()) 88 interimPool.Put(s) 89 } 90 91 return sb, uint64(len(br.Bytes())), err 92} 93 94var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} 95 96// interim holds temporary working data used while converting from 97// analysis results to a zap-encoded segment 98type interim struct { 99 results []*index.AnalysisResult 100 101 chunkFactor uint32 102 103 w *CountHashWriter 104 105 // FieldsMap adds 1 to field id to avoid zero value issues 106 // name -> field id + 1 107 FieldsMap map[string]uint16 108 109 // FieldsInv is the inverse of FieldsMap 110 // field id -> name 111 FieldsInv []string 112 113 // Term dictionaries for each field 114 // field id -> term -> postings list id + 1 115 Dicts []map[string]uint64 116 117 // Terms for each field, where terms are sorted ascending 118 // field id -> []term 119 DictKeys [][]string 120 121 // Fields whose IncludeDocValues is true 122 // field id -> bool 123 IncludeDocValues []bool 124 125 // postings id -> bitmap of docNums 126 Postings []*roaring.Bitmap 127 128 // postings id -> freq/norm's, one for each docNum in postings 129 FreqNorms [][]interimFreqNorm 130 freqNormsBacking []interimFreqNorm 131 132 // postings id -> locs, one for each freq 133 Locs [][]interimLoc 134 locsBacking []interimLoc 135 136 numTermsPerPostingsList []int // key is postings list id 137 numLocsPerPostingsList []int // key is postings list id 138 139 builder *vellum.Builder 140 builderBuf bytes.Buffer 141 142 metaBuf bytes.Buffer 143 144 tmp0 []byte 145 tmp1 []byte 146 147 lastNumDocs int 148 lastOutSize int 149} 150 151func (s *interim) reset() (err error) { 152 s.results = nil 153 s.chunkFactor = 0 154 s.w = nil 155 s.FieldsMap = nil 156 s.FieldsInv = nil 157 for i := range s.Dicts { 158 s.Dicts[i] = nil 159 } 160 s.Dicts = s.Dicts[:0] 161 for i := range s.DictKeys { 162 s.DictKeys[i] = s.DictKeys[i][:0] 163 } 164 s.DictKeys = s.DictKeys[:0] 165 for i := range s.IncludeDocValues { 166 s.IncludeDocValues[i] = false 167 } 168 s.IncludeDocValues = s.IncludeDocValues[:0] 169 for _, idn := range s.Postings { 170 idn.Clear() 171 } 172 s.Postings = s.Postings[:0] 173 s.FreqNorms = s.FreqNorms[:0] 174 for i := range s.freqNormsBacking { 175 s.freqNormsBacking[i] = interimFreqNorm{} 176 } 177 s.freqNormsBacking = s.freqNormsBacking[:0] 178 s.Locs = s.Locs[:0] 179 for i := range s.locsBacking { 180 s.locsBacking[i] = interimLoc{} 181 } 182 s.locsBacking = s.locsBacking[:0] 183 s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] 184 s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] 185 s.builderBuf.Reset() 186 if s.builder != nil { 187 err = s.builder.Reset(&s.builderBuf) 188 } 189 s.metaBuf.Reset() 190 s.tmp0 = s.tmp0[:0] 191 s.tmp1 = s.tmp1[:0] 192 s.lastNumDocs = 0 193 s.lastOutSize = 0 194 195 return err 196} 197 198func (s *interim) grabBuf(size int) []byte { 199 buf := s.tmp0 200 if cap(buf) < size { 201 buf = make([]byte, size) 202 s.tmp0 = buf 203 } 204 return buf[0:size] 205} 206 207type interimStoredField struct { 208 vals [][]byte 209 typs []byte 210 arrayposs [][]uint64 // array positions 211} 212 213type interimFreqNorm struct { 214 freq uint64 215 norm float32 216 numLocs int 217} 218 219type interimLoc struct { 220 fieldID uint16 221 pos uint64 222 start uint64 223 end uint64 224 arrayposs []uint64 225} 226 227func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { 228 s.FieldsMap = map[string]uint16{} 229 230 s.getOrDefineField("_id") // _id field is fieldID 0 231 232 for _, result := range s.results { 233 for _, field := range result.Document.CompositeFields { 234 s.getOrDefineField(field.Name()) 235 } 236 for _, field := range result.Document.Fields { 237 s.getOrDefineField(field.Name()) 238 } 239 } 240 241 sort.Strings(s.FieldsInv[1:]) // keep _id as first field 242 243 for fieldID, fieldName := range s.FieldsInv { 244 s.FieldsMap[fieldName] = uint16(fieldID + 1) 245 } 246 247 if cap(s.IncludeDocValues) >= len(s.FieldsInv) { 248 s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] 249 } else { 250 s.IncludeDocValues = make([]bool, len(s.FieldsInv)) 251 } 252 253 s.prepareDicts() 254 255 for _, dict := range s.DictKeys { 256 sort.Strings(dict) 257 } 258 259 s.processDocuments() 260 261 storedIndexOffset, err := s.writeStoredFields() 262 if err != nil { 263 return 0, 0, 0, nil, err 264 } 265 266 var fdvIndexOffset uint64 267 var dictOffsets []uint64 268 269 if len(s.results) > 0 { 270 fdvIndexOffset, dictOffsets, err = s.writeDicts() 271 if err != nil { 272 return 0, 0, 0, nil, err 273 } 274 } else { 275 dictOffsets = make([]uint64, len(s.FieldsInv)) 276 } 277 278 fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) 279 if err != nil { 280 return 0, 0, 0, nil, err 281 } 282 283 return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil 284} 285 286func (s *interim) getOrDefineField(fieldName string) int { 287 fieldIDPlus1, exists := s.FieldsMap[fieldName] 288 if !exists { 289 fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) 290 s.FieldsMap[fieldName] = fieldIDPlus1 291 s.FieldsInv = append(s.FieldsInv, fieldName) 292 293 s.Dicts = append(s.Dicts, make(map[string]uint64)) 294 295 n := len(s.DictKeys) 296 if n < cap(s.DictKeys) { 297 s.DictKeys = s.DictKeys[:n+1] 298 s.DictKeys[n] = s.DictKeys[n][:0] 299 } else { 300 s.DictKeys = append(s.DictKeys, []string(nil)) 301 } 302 } 303 304 return int(fieldIDPlus1 - 1) 305} 306 307// fill Dicts and DictKeys from analysis results 308func (s *interim) prepareDicts() { 309 var pidNext int 310 311 var totTFs int 312 var totLocs int 313 314 visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) { 315 dict := s.Dicts[fieldID] 316 dictKeys := s.DictKeys[fieldID] 317 318 for term, tf := range tfs { 319 pidPlus1, exists := dict[term] 320 if !exists { 321 pidNext++ 322 pidPlus1 = uint64(pidNext) 323 324 dict[term] = pidPlus1 325 dictKeys = append(dictKeys, term) 326 327 s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) 328 s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) 329 } 330 331 pid := pidPlus1 - 1 332 333 s.numTermsPerPostingsList[pid] += 1 334 s.numLocsPerPostingsList[pid] += len(tf.Locations) 335 336 totLocs += len(tf.Locations) 337 } 338 339 totTFs += len(tfs) 340 341 s.DictKeys[fieldID] = dictKeys 342 } 343 344 for _, result := range s.results { 345 // walk each composite field 346 for _, field := range result.Document.CompositeFields { 347 fieldID := uint16(s.getOrDefineField(field.Name())) 348 _, tf := field.Analyze() 349 visitField(fieldID, tf) 350 } 351 352 // walk each field 353 for i, field := range result.Document.Fields { 354 fieldID := uint16(s.getOrDefineField(field.Name())) 355 tf := result.Analyzed[i] 356 visitField(fieldID, tf) 357 } 358 } 359 360 numPostingsLists := pidNext 361 362 if cap(s.Postings) >= numPostingsLists { 363 s.Postings = s.Postings[:numPostingsLists] 364 } else { 365 postings := make([]*roaring.Bitmap, numPostingsLists) 366 copy(postings, s.Postings[:cap(s.Postings)]) 367 for i := 0; i < numPostingsLists; i++ { 368 if postings[i] == nil { 369 postings[i] = roaring.New() 370 } 371 } 372 s.Postings = postings 373 } 374 375 if cap(s.FreqNorms) >= numPostingsLists { 376 s.FreqNorms = s.FreqNorms[:numPostingsLists] 377 } else { 378 s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) 379 } 380 381 if cap(s.freqNormsBacking) >= totTFs { 382 s.freqNormsBacking = s.freqNormsBacking[:totTFs] 383 } else { 384 s.freqNormsBacking = make([]interimFreqNorm, totTFs) 385 } 386 387 freqNormsBacking := s.freqNormsBacking 388 for pid, numTerms := range s.numTermsPerPostingsList { 389 s.FreqNorms[pid] = freqNormsBacking[0:0] 390 freqNormsBacking = freqNormsBacking[numTerms:] 391 } 392 393 if cap(s.Locs) >= numPostingsLists { 394 s.Locs = s.Locs[:numPostingsLists] 395 } else { 396 s.Locs = make([][]interimLoc, numPostingsLists) 397 } 398 399 if cap(s.locsBacking) >= totLocs { 400 s.locsBacking = s.locsBacking[:totLocs] 401 } else { 402 s.locsBacking = make([]interimLoc, totLocs) 403 } 404 405 locsBacking := s.locsBacking 406 for pid, numLocs := range s.numLocsPerPostingsList { 407 s.Locs[pid] = locsBacking[0:0] 408 locsBacking = locsBacking[numLocs:] 409 } 410} 411 412func (s *interim) processDocuments() { 413 numFields := len(s.FieldsInv) 414 reuseFieldLens := make([]int, numFields) 415 reuseFieldTFs := make([]analysis.TokenFrequencies, numFields) 416 417 for docNum, result := range s.results { 418 for i := 0; i < numFields; i++ { // clear these for reuse 419 reuseFieldLens[i] = 0 420 reuseFieldTFs[i] = nil 421 } 422 423 s.processDocument(uint64(docNum), result, 424 reuseFieldLens, reuseFieldTFs) 425 } 426} 427 428func (s *interim) processDocument(docNum uint64, 429 result *index.AnalysisResult, 430 fieldLens []int, fieldTFs []analysis.TokenFrequencies) { 431 visitField := func(fieldID uint16, fieldName string, 432 ln int, tf analysis.TokenFrequencies) { 433 fieldLens[fieldID] += ln 434 435 existingFreqs := fieldTFs[fieldID] 436 if existingFreqs != nil { 437 existingFreqs.MergeAll(fieldName, tf) 438 } else { 439 fieldTFs[fieldID] = tf 440 } 441 } 442 443 // walk each composite field 444 for _, field := range result.Document.CompositeFields { 445 fieldID := uint16(s.getOrDefineField(field.Name())) 446 ln, tf := field.Analyze() 447 visitField(fieldID, field.Name(), ln, tf) 448 } 449 450 // walk each field 451 for i, field := range result.Document.Fields { 452 fieldID := uint16(s.getOrDefineField(field.Name())) 453 ln := result.Length[i] 454 tf := result.Analyzed[i] 455 visitField(fieldID, field.Name(), ln, tf) 456 } 457 458 // now that it's been rolled up into fieldTFs, walk that 459 for fieldID, tfs := range fieldTFs { 460 dict := s.Dicts[fieldID] 461 norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) 462 463 for term, tf := range tfs { 464 pid := dict[term] - 1 465 bs := s.Postings[pid] 466 bs.Add(uint32(docNum)) 467 468 s.FreqNorms[pid] = append(s.FreqNorms[pid], 469 interimFreqNorm{ 470 freq: uint64(tf.Frequency()), 471 norm: norm, 472 numLocs: len(tf.Locations), 473 }) 474 475 if len(tf.Locations) > 0 { 476 locs := s.Locs[pid] 477 478 for _, loc := range tf.Locations { 479 var locf = uint16(fieldID) 480 if loc.Field != "" { 481 locf = uint16(s.getOrDefineField(loc.Field)) 482 } 483 var arrayposs []uint64 484 if len(loc.ArrayPositions) > 0 { 485 arrayposs = loc.ArrayPositions 486 } 487 locs = append(locs, interimLoc{ 488 fieldID: locf, 489 pos: uint64(loc.Position), 490 start: uint64(loc.Start), 491 end: uint64(loc.End), 492 arrayposs: arrayposs, 493 }) 494 } 495 496 s.Locs[pid] = locs 497 } 498 } 499 } 500} 501 502func (s *interim) writeStoredFields() ( 503 storedIndexOffset uint64, err error) { 504 varBuf := make([]byte, binary.MaxVarintLen64) 505 metaEncode := func(val uint64) (int, error) { 506 wb := binary.PutUvarint(varBuf, val) 507 return s.metaBuf.Write(varBuf[:wb]) 508 } 509 510 data, compressed := s.tmp0[:0], s.tmp1[:0] 511 defer func() { s.tmp0, s.tmp1 = data, compressed }() 512 513 // keyed by docNum 514 docStoredOffsets := make([]uint64, len(s.results)) 515 516 // keyed by fieldID, for the current doc in the loop 517 docStoredFields := map[uint16]interimStoredField{} 518 519 for docNum, result := range s.results { 520 for fieldID := range docStoredFields { // reset for next doc 521 delete(docStoredFields, fieldID) 522 } 523 524 for _, field := range result.Document.Fields { 525 fieldID := uint16(s.getOrDefineField(field.Name())) 526 527 opts := field.Options() 528 529 if opts.IsStored() { 530 isf := docStoredFields[fieldID] 531 isf.vals = append(isf.vals, field.Value()) 532 isf.typs = append(isf.typs, encodeFieldType(field)) 533 isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) 534 docStoredFields[fieldID] = isf 535 } 536 537 if opts.IncludeDocValues() { 538 s.IncludeDocValues[fieldID] = true 539 } 540 541 err := ValidateDocFields(field) 542 if err != nil { 543 return 0, err 544 } 545 } 546 547 var curr int 548 549 s.metaBuf.Reset() 550 data = data[:0] 551 552 // _id field special case optimizes ExternalID() lookups 553 idFieldVal := docStoredFields[uint16(0)].vals[0] 554 _, err = metaEncode(uint64(len(idFieldVal))) 555 if err != nil { 556 return 0, err 557 } 558 559 // handle non-"_id" fields 560 for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { 561 isf, exists := docStoredFields[uint16(fieldID)] 562 if exists { 563 curr, data, err = persistStoredFieldValues( 564 fieldID, isf.vals, isf.typs, isf.arrayposs, 565 curr, metaEncode, data) 566 if err != nil { 567 return 0, err 568 } 569 } 570 } 571 572 metaBytes := s.metaBuf.Bytes() 573 574 compressed = snappy.Encode(compressed[:cap(compressed)], data) 575 576 docStoredOffsets[docNum] = uint64(s.w.Count()) 577 578 _, err := writeUvarints(s.w, 579 uint64(len(metaBytes)), 580 uint64(len(idFieldVal)+len(compressed))) 581 if err != nil { 582 return 0, err 583 } 584 585 _, err = s.w.Write(metaBytes) 586 if err != nil { 587 return 0, err 588 } 589 590 _, err = s.w.Write(idFieldVal) 591 if err != nil { 592 return 0, err 593 } 594 595 _, err = s.w.Write(compressed) 596 if err != nil { 597 return 0, err 598 } 599 } 600 601 storedIndexOffset = uint64(s.w.Count()) 602 603 for _, docStoredOffset := range docStoredOffsets { 604 err = binary.Write(s.w, binary.BigEndian, docStoredOffset) 605 if err != nil { 606 return 0, err 607 } 608 } 609 610 return storedIndexOffset, nil 611} 612 613func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { 614 dictOffsets = make([]uint64, len(s.FieldsInv)) 615 616 fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) 617 fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) 618 619 buf := s.grabBuf(binary.MaxVarintLen64) 620 621 tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 622 locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 623 fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false) 624 625 var docTermMap [][]byte 626 627 if s.builder == nil { 628 s.builder, err = vellum.New(&s.builderBuf, nil) 629 if err != nil { 630 return 0, nil, err 631 } 632 } 633 634 for fieldID, terms := range s.DictKeys { 635 if cap(docTermMap) < len(s.results) { 636 docTermMap = make([][]byte, len(s.results)) 637 } else { 638 docTermMap = docTermMap[0:len(s.results)] 639 for docNum := range docTermMap { // reset the docTermMap 640 docTermMap[docNum] = docTermMap[docNum][:0] 641 } 642 } 643 644 dict := s.Dicts[fieldID] 645 646 for _, term := range terms { // terms are already sorted 647 pid := dict[term] - 1 648 649 postingsBS := s.Postings[pid] 650 651 freqNorms := s.FreqNorms[pid] 652 freqNormOffset := 0 653 654 locs := s.Locs[pid] 655 locOffset := 0 656 657 postingsItr := postingsBS.Iterator() 658 for postingsItr.HasNext() { 659 docNum := uint64(postingsItr.Next()) 660 661 freqNorm := freqNorms[freqNormOffset] 662 663 err = tfEncoder.Add(docNum, 664 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), 665 uint64(math.Float32bits(freqNorm.norm))) 666 if err != nil { 667 return 0, nil, err 668 } 669 670 if freqNorm.numLocs > 0 { 671 numBytesLocs := 0 672 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 673 numBytesLocs += totalUvarintBytes( 674 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 675 uint64(len(loc.arrayposs)), loc.arrayposs) 676 } 677 678 err = locEncoder.Add(docNum, uint64(numBytesLocs)) 679 if err != nil { 680 return 0, nil, err 681 } 682 683 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 684 err = locEncoder.Add(docNum, 685 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 686 uint64(len(loc.arrayposs))) 687 if err != nil { 688 return 0, nil, err 689 } 690 691 err = locEncoder.Add(docNum, loc.arrayposs...) 692 if err != nil { 693 return 0, nil, err 694 } 695 } 696 697 locOffset += freqNorm.numLocs 698 } 699 700 freqNormOffset++ 701 702 docTermMap[docNum] = append( 703 append(docTermMap[docNum], term...), 704 termSeparator) 705 } 706 707 tfEncoder.Close() 708 locEncoder.Close() 709 710 postingsOffset, err := 711 writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) 712 if err != nil { 713 return 0, nil, err 714 } 715 716 if postingsOffset > uint64(0) { 717 err = s.builder.Insert([]byte(term), postingsOffset) 718 if err != nil { 719 return 0, nil, err 720 } 721 } 722 723 tfEncoder.Reset() 724 locEncoder.Reset() 725 } 726 727 err = s.builder.Close() 728 if err != nil { 729 return 0, nil, err 730 } 731 732 // record where this dictionary starts 733 dictOffsets[fieldID] = uint64(s.w.Count()) 734 735 vellumData := s.builderBuf.Bytes() 736 737 // write out the length of the vellum data 738 n := binary.PutUvarint(buf, uint64(len(vellumData))) 739 _, err = s.w.Write(buf[:n]) 740 if err != nil { 741 return 0, nil, err 742 } 743 744 // write this vellum to disk 745 _, err = s.w.Write(vellumData) 746 if err != nil { 747 return 0, nil, err 748 } 749 750 // reset vellum for reuse 751 s.builderBuf.Reset() 752 753 err = s.builder.Reset(&s.builderBuf) 754 if err != nil { 755 return 0, nil, err 756 } 757 758 // write the field doc values 759 if s.IncludeDocValues[fieldID] { 760 for docNum, docTerms := range docTermMap { 761 if len(docTerms) > 0 { 762 err = fdvEncoder.Add(uint64(docNum), docTerms) 763 if err != nil { 764 return 0, nil, err 765 } 766 } 767 } 768 err = fdvEncoder.Close() 769 if err != nil { 770 return 0, nil, err 771 } 772 773 fdvOffsetsStart[fieldID] = uint64(s.w.Count()) 774 775 _, err = fdvEncoder.Write() 776 if err != nil { 777 return 0, nil, err 778 } 779 780 fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) 781 782 fdvEncoder.Reset() 783 } else { 784 fdvOffsetsStart[fieldID] = fieldNotUninverted 785 fdvOffsetsEnd[fieldID] = fieldNotUninverted 786 } 787 } 788 789 fdvIndexOffset = uint64(s.w.Count()) 790 791 for i := 0; i < len(fdvOffsetsStart); i++ { 792 n := binary.PutUvarint(buf, fdvOffsetsStart[i]) 793 _, err := s.w.Write(buf[:n]) 794 if err != nil { 795 return 0, nil, err 796 } 797 n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) 798 _, err = s.w.Write(buf[:n]) 799 if err != nil { 800 return 0, nil, err 801 } 802 } 803 804 return fdvIndexOffset, dictOffsets, nil 805} 806 807func encodeFieldType(f document.Field) byte { 808 fieldType := byte('x') 809 switch f.(type) { 810 case *document.TextField: 811 fieldType = 't' 812 case *document.NumericField: 813 fieldType = 'n' 814 case *document.DateTimeField: 815 fieldType = 'd' 816 case *document.BooleanField: 817 fieldType = 'b' 818 case *document.GeoPointField: 819 fieldType = 'g' 820 case *document.CompositeField: 821 fieldType = 'c' 822 } 823 return fieldType 824} 825 826// returns the total # of bytes needed to encode the given uint64's 827// into binary.PutUVarint() encoding 828func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { 829 n = numUvarintBytes(a) 830 n += numUvarintBytes(b) 831 n += numUvarintBytes(c) 832 n += numUvarintBytes(d) 833 n += numUvarintBytes(e) 834 for _, v := range more { 835 n += numUvarintBytes(v) 836 } 837 return n 838} 839 840// returns # of bytes needed to encode x in binary.PutUvarint() encoding 841func numUvarintBytes(x uint64) (n int) { 842 for x >= 0x80 { 843 x >>= 7 844 n++ 845 } 846 return n + 1 847} 848