1// Copyright (c) 2018 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "sort" 22 "sync" 23 24 "github.com/RoaringBitmap/roaring" 25 "github.com/blevesearch/bleve/analysis" 26 "github.com/blevesearch/bleve/document" 27 "github.com/blevesearch/bleve/index" 28 "github.com/blevesearch/bleve/index/scorch/segment" 29 "github.com/couchbase/vellum" 30 "github.com/golang/snappy" 31) 32 33var NewSegmentBufferNumResultsBump int = 100 34var NewSegmentBufferNumResultsFactor float64 = 1.0 35var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 36 37// ValidateDocFields can be set by applications to perform additional checks 38// on fields in a document being added to a new segment, by default it does 39// nothing. 40// This API is experimental and may be removed at any time. 41var ValidateDocFields = func(field document.Field) error { 42 return nil 43} 44 45// AnalysisResultsToSegmentBase produces an in-memory zap-encoded 46// SegmentBase from analysis results 47func (z *ZapPlugin) New(results []*index.AnalysisResult) ( 48 segment.Segment, uint64, error) { 49 return z.newWithChunkMode(results, DefaultChunkMode) 50} 51 52func (*ZapPlugin) newWithChunkMode(results []*index.AnalysisResult, 53 chunkMode uint32) (segment.Segment, uint64, error) { 54 s := interimPool.Get().(*interim) 55 56 var br bytes.Buffer 57 if s.lastNumDocs > 0 { 58 // use previous results to initialize the buf with an estimate 59 // size, but note that the interim instance comes from a 60 // global interimPool, so multiple scorch instances indexing 61 // different docs can lead to low quality estimates 62 estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * 63 NewSegmentBufferNumResultsFactor) 64 estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * 65 NewSegmentBufferAvgBytesPerDocFactor) 66 br.Grow(estimateAvgBytesPerDoc * estimateNumResults) 67 } 68 69 s.results = results 70 s.chunkMode = chunkMode 71 s.w = NewCountHashWriter(&br) 72 73 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, 74 err := s.convert() 75 if err != nil { 76 return nil, uint64(0), err 77 } 78 79 sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkMode, 80 s.FieldsMap, s.FieldsInv, uint64(len(results)), 81 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) 82 83 if err == nil && s.reset() == nil { 84 s.lastNumDocs = len(results) 85 s.lastOutSize = len(br.Bytes()) 86 interimPool.Put(s) 87 } 88 89 return sb, uint64(len(br.Bytes())), err 90} 91 92var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} 93 94// interim holds temporary working data used while converting from 95// analysis results to a zap-encoded segment 96type interim struct { 97 results []*index.AnalysisResult 98 99 chunkMode uint32 100 101 w *CountHashWriter 102 103 // FieldsMap adds 1 to field id to avoid zero value issues 104 // name -> field id + 1 105 FieldsMap map[string]uint16 106 107 // FieldsInv is the inverse of FieldsMap 108 // field id -> name 109 FieldsInv []string 110 111 // Term dictionaries for each field 112 // field id -> term -> postings list id + 1 113 Dicts []map[string]uint64 114 115 // Terms for each field, where terms are sorted ascending 116 // field id -> []term 117 DictKeys [][]string 118 119 // Fields whose IncludeDocValues is true 120 // field id -> bool 121 IncludeDocValues []bool 122 123 // postings id -> bitmap of docNums 124 Postings []*roaring.Bitmap 125 126 // postings id -> freq/norm's, one for each docNum in postings 127 FreqNorms [][]interimFreqNorm 128 freqNormsBacking []interimFreqNorm 129 130 // postings id -> locs, one for each freq 131 Locs [][]interimLoc 132 locsBacking []interimLoc 133 134 numTermsPerPostingsList []int // key is postings list id 135 numLocsPerPostingsList []int // key is postings list id 136 137 builder *vellum.Builder 138 builderBuf bytes.Buffer 139 140 metaBuf bytes.Buffer 141 142 tmp0 []byte 143 tmp1 []byte 144 145 lastNumDocs int 146 lastOutSize int 147} 148 149func (s *interim) reset() (err error) { 150 s.results = nil 151 s.chunkMode = 0 152 s.w = nil 153 s.FieldsMap = nil 154 s.FieldsInv = nil 155 for i := range s.Dicts { 156 s.Dicts[i] = nil 157 } 158 s.Dicts = s.Dicts[:0] 159 for i := range s.DictKeys { 160 s.DictKeys[i] = s.DictKeys[i][:0] 161 } 162 s.DictKeys = s.DictKeys[:0] 163 for i := range s.IncludeDocValues { 164 s.IncludeDocValues[i] = false 165 } 166 s.IncludeDocValues = s.IncludeDocValues[:0] 167 for _, idn := range s.Postings { 168 idn.Clear() 169 } 170 s.Postings = s.Postings[:0] 171 s.FreqNorms = s.FreqNorms[:0] 172 for i := range s.freqNormsBacking { 173 s.freqNormsBacking[i] = interimFreqNorm{} 174 } 175 s.freqNormsBacking = s.freqNormsBacking[:0] 176 s.Locs = s.Locs[:0] 177 for i := range s.locsBacking { 178 s.locsBacking[i] = interimLoc{} 179 } 180 s.locsBacking = s.locsBacking[:0] 181 s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] 182 s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] 183 s.builderBuf.Reset() 184 if s.builder != nil { 185 err = s.builder.Reset(&s.builderBuf) 186 } 187 s.metaBuf.Reset() 188 s.tmp0 = s.tmp0[:0] 189 s.tmp1 = s.tmp1[:0] 190 s.lastNumDocs = 0 191 s.lastOutSize = 0 192 193 return err 194} 195 196func (s *interim) grabBuf(size int) []byte { 197 buf := s.tmp0 198 if cap(buf) < size { 199 buf = make([]byte, size) 200 s.tmp0 = buf 201 } 202 return buf[0:size] 203} 204 205type interimStoredField struct { 206 vals [][]byte 207 typs []byte 208 arrayposs [][]uint64 // array positions 209} 210 211type interimFreqNorm struct { 212 freq uint64 213 norm float32 214 numLocs int 215} 216 217type interimLoc struct { 218 fieldID uint16 219 pos uint64 220 start uint64 221 end uint64 222 arrayposs []uint64 223} 224 225func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { 226 s.FieldsMap = map[string]uint16{} 227 228 s.getOrDefineField("_id") // _id field is fieldID 0 229 230 for _, result := range s.results { 231 for _, field := range result.Document.CompositeFields { 232 s.getOrDefineField(field.Name()) 233 } 234 for _, field := range result.Document.Fields { 235 s.getOrDefineField(field.Name()) 236 } 237 } 238 239 sort.Strings(s.FieldsInv[1:]) // keep _id as first field 240 241 for fieldID, fieldName := range s.FieldsInv { 242 s.FieldsMap[fieldName] = uint16(fieldID + 1) 243 } 244 245 if cap(s.IncludeDocValues) >= len(s.FieldsInv) { 246 s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] 247 } else { 248 s.IncludeDocValues = make([]bool, len(s.FieldsInv)) 249 } 250 251 s.prepareDicts() 252 253 for _, dict := range s.DictKeys { 254 sort.Strings(dict) 255 } 256 257 s.processDocuments() 258 259 storedIndexOffset, err := s.writeStoredFields() 260 if err != nil { 261 return 0, 0, 0, nil, err 262 } 263 264 var fdvIndexOffset uint64 265 var dictOffsets []uint64 266 267 if len(s.results) > 0 { 268 fdvIndexOffset, dictOffsets, err = s.writeDicts() 269 if err != nil { 270 return 0, 0, 0, nil, err 271 } 272 } else { 273 dictOffsets = make([]uint64, len(s.FieldsInv)) 274 } 275 276 fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) 277 if err != nil { 278 return 0, 0, 0, nil, err 279 } 280 281 return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil 282} 283 284func (s *interim) getOrDefineField(fieldName string) int { 285 fieldIDPlus1, exists := s.FieldsMap[fieldName] 286 if !exists { 287 fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) 288 s.FieldsMap[fieldName] = fieldIDPlus1 289 s.FieldsInv = append(s.FieldsInv, fieldName) 290 291 s.Dicts = append(s.Dicts, make(map[string]uint64)) 292 293 n := len(s.DictKeys) 294 if n < cap(s.DictKeys) { 295 s.DictKeys = s.DictKeys[:n+1] 296 s.DictKeys[n] = s.DictKeys[n][:0] 297 } else { 298 s.DictKeys = append(s.DictKeys, []string(nil)) 299 } 300 } 301 302 return int(fieldIDPlus1 - 1) 303} 304 305// fill Dicts and DictKeys from analysis results 306func (s *interim) prepareDicts() { 307 var pidNext int 308 309 var totTFs int 310 var totLocs int 311 312 visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) { 313 dict := s.Dicts[fieldID] 314 dictKeys := s.DictKeys[fieldID] 315 316 for term, tf := range tfs { 317 pidPlus1, exists := dict[term] 318 if !exists { 319 pidNext++ 320 pidPlus1 = uint64(pidNext) 321 322 dict[term] = pidPlus1 323 dictKeys = append(dictKeys, term) 324 325 s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) 326 s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) 327 } 328 329 pid := pidPlus1 - 1 330 331 s.numTermsPerPostingsList[pid] += 1 332 s.numLocsPerPostingsList[pid] += len(tf.Locations) 333 334 totLocs += len(tf.Locations) 335 } 336 337 totTFs += len(tfs) 338 339 s.DictKeys[fieldID] = dictKeys 340 } 341 342 for _, result := range s.results { 343 // walk each composite field 344 for _, field := range result.Document.CompositeFields { 345 fieldID := uint16(s.getOrDefineField(field.Name())) 346 _, tf := field.Analyze() 347 visitField(fieldID, tf) 348 } 349 350 // walk each field 351 for i, field := range result.Document.Fields { 352 fieldID := uint16(s.getOrDefineField(field.Name())) 353 tf := result.Analyzed[i] 354 visitField(fieldID, tf) 355 } 356 } 357 358 numPostingsLists := pidNext 359 360 if cap(s.Postings) >= numPostingsLists { 361 s.Postings = s.Postings[:numPostingsLists] 362 } else { 363 postings := make([]*roaring.Bitmap, numPostingsLists) 364 copy(postings, s.Postings[:cap(s.Postings)]) 365 for i := 0; i < numPostingsLists; i++ { 366 if postings[i] == nil { 367 postings[i] = roaring.New() 368 } 369 } 370 s.Postings = postings 371 } 372 373 if cap(s.FreqNorms) >= numPostingsLists { 374 s.FreqNorms = s.FreqNorms[:numPostingsLists] 375 } else { 376 s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) 377 } 378 379 if cap(s.freqNormsBacking) >= totTFs { 380 s.freqNormsBacking = s.freqNormsBacking[:totTFs] 381 } else { 382 s.freqNormsBacking = make([]interimFreqNorm, totTFs) 383 } 384 385 freqNormsBacking := s.freqNormsBacking 386 for pid, numTerms := range s.numTermsPerPostingsList { 387 s.FreqNorms[pid] = freqNormsBacking[0:0] 388 freqNormsBacking = freqNormsBacking[numTerms:] 389 } 390 391 if cap(s.Locs) >= numPostingsLists { 392 s.Locs = s.Locs[:numPostingsLists] 393 } else { 394 s.Locs = make([][]interimLoc, numPostingsLists) 395 } 396 397 if cap(s.locsBacking) >= totLocs { 398 s.locsBacking = s.locsBacking[:totLocs] 399 } else { 400 s.locsBacking = make([]interimLoc, totLocs) 401 } 402 403 locsBacking := s.locsBacking 404 for pid, numLocs := range s.numLocsPerPostingsList { 405 s.Locs[pid] = locsBacking[0:0] 406 locsBacking = locsBacking[numLocs:] 407 } 408} 409 410func (s *interim) processDocuments() { 411 numFields := len(s.FieldsInv) 412 reuseFieldLens := make([]int, numFields) 413 reuseFieldTFs := make([]analysis.TokenFrequencies, numFields) 414 415 for docNum, result := range s.results { 416 for i := 0; i < numFields; i++ { // clear these for reuse 417 reuseFieldLens[i] = 0 418 reuseFieldTFs[i] = nil 419 } 420 421 s.processDocument(uint64(docNum), result, 422 reuseFieldLens, reuseFieldTFs) 423 } 424} 425 426func (s *interim) processDocument(docNum uint64, 427 result *index.AnalysisResult, 428 fieldLens []int, fieldTFs []analysis.TokenFrequencies) { 429 visitField := func(fieldID uint16, fieldName string, 430 ln int, tf analysis.TokenFrequencies) { 431 fieldLens[fieldID] += ln 432 433 existingFreqs := fieldTFs[fieldID] 434 if existingFreqs != nil { 435 existingFreqs.MergeAll(fieldName, tf) 436 } else { 437 fieldTFs[fieldID] = tf 438 } 439 } 440 441 // walk each composite field 442 for _, field := range result.Document.CompositeFields { 443 fieldID := uint16(s.getOrDefineField(field.Name())) 444 ln, tf := field.Analyze() 445 visitField(fieldID, field.Name(), ln, tf) 446 } 447 448 // walk each field 449 for i, field := range result.Document.Fields { 450 fieldID := uint16(s.getOrDefineField(field.Name())) 451 ln := result.Length[i] 452 tf := result.Analyzed[i] 453 visitField(fieldID, field.Name(), ln, tf) 454 } 455 456 // now that it's been rolled up into fieldTFs, walk that 457 for fieldID, tfs := range fieldTFs { 458 dict := s.Dicts[fieldID] 459 norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) 460 461 for term, tf := range tfs { 462 pid := dict[term] - 1 463 bs := s.Postings[pid] 464 bs.Add(uint32(docNum)) 465 466 s.FreqNorms[pid] = append(s.FreqNorms[pid], 467 interimFreqNorm{ 468 freq: uint64(tf.Frequency()), 469 norm: norm, 470 numLocs: len(tf.Locations), 471 }) 472 473 if len(tf.Locations) > 0 { 474 locs := s.Locs[pid] 475 476 for _, loc := range tf.Locations { 477 var locf = uint16(fieldID) 478 if loc.Field != "" { 479 locf = uint16(s.getOrDefineField(loc.Field)) 480 } 481 var arrayposs []uint64 482 if len(loc.ArrayPositions) > 0 { 483 arrayposs = loc.ArrayPositions 484 } 485 locs = append(locs, interimLoc{ 486 fieldID: locf, 487 pos: uint64(loc.Position), 488 start: uint64(loc.Start), 489 end: uint64(loc.End), 490 arrayposs: arrayposs, 491 }) 492 } 493 494 s.Locs[pid] = locs 495 } 496 } 497 } 498} 499 500func (s *interim) writeStoredFields() ( 501 storedIndexOffset uint64, err error) { 502 varBuf := make([]byte, binary.MaxVarintLen64) 503 metaEncode := func(val uint64) (int, error) { 504 wb := binary.PutUvarint(varBuf, val) 505 return s.metaBuf.Write(varBuf[:wb]) 506 } 507 508 data, compressed := s.tmp0[:0], s.tmp1[:0] 509 defer func() { s.tmp0, s.tmp1 = data, compressed }() 510 511 // keyed by docNum 512 docStoredOffsets := make([]uint64, len(s.results)) 513 514 // keyed by fieldID, for the current doc in the loop 515 docStoredFields := map[uint16]interimStoredField{} 516 517 for docNum, result := range s.results { 518 for fieldID := range docStoredFields { // reset for next doc 519 delete(docStoredFields, fieldID) 520 } 521 522 for _, field := range result.Document.Fields { 523 fieldID := uint16(s.getOrDefineField(field.Name())) 524 525 opts := field.Options() 526 527 if opts.IsStored() { 528 isf := docStoredFields[fieldID] 529 isf.vals = append(isf.vals, field.Value()) 530 isf.typs = append(isf.typs, encodeFieldType(field)) 531 isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) 532 docStoredFields[fieldID] = isf 533 } 534 535 if opts.IncludeDocValues() { 536 s.IncludeDocValues[fieldID] = true 537 } 538 539 err := ValidateDocFields(field) 540 if err != nil { 541 return 0, err 542 } 543 } 544 545 var curr int 546 547 s.metaBuf.Reset() 548 data = data[:0] 549 550 // _id field special case optimizes ExternalID() lookups 551 idFieldVal := docStoredFields[uint16(0)].vals[0] 552 _, err = metaEncode(uint64(len(idFieldVal))) 553 if err != nil { 554 return 0, err 555 } 556 557 // handle non-"_id" fields 558 for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { 559 isf, exists := docStoredFields[uint16(fieldID)] 560 if exists { 561 curr, data, err = persistStoredFieldValues( 562 fieldID, isf.vals, isf.typs, isf.arrayposs, 563 curr, metaEncode, data) 564 if err != nil { 565 return 0, err 566 } 567 } 568 } 569 570 metaBytes := s.metaBuf.Bytes() 571 572 compressed = snappy.Encode(compressed[:cap(compressed)], data) 573 574 docStoredOffsets[docNum] = uint64(s.w.Count()) 575 576 _, err := writeUvarints(s.w, 577 uint64(len(metaBytes)), 578 uint64(len(idFieldVal)+len(compressed))) 579 if err != nil { 580 return 0, err 581 } 582 583 _, err = s.w.Write(metaBytes) 584 if err != nil { 585 return 0, err 586 } 587 588 _, err = s.w.Write(idFieldVal) 589 if err != nil { 590 return 0, err 591 } 592 593 _, err = s.w.Write(compressed) 594 if err != nil { 595 return 0, err 596 } 597 } 598 599 storedIndexOffset = uint64(s.w.Count()) 600 601 for _, docStoredOffset := range docStoredOffsets { 602 err = binary.Write(s.w, binary.BigEndian, docStoredOffset) 603 if err != nil { 604 return 0, err 605 } 606 } 607 608 return storedIndexOffset, nil 609} 610 611func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { 612 dictOffsets = make([]uint64, len(s.FieldsInv)) 613 614 fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) 615 fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) 616 617 buf := s.grabBuf(binary.MaxVarintLen64) 618 619 // these int coders are initialized with chunk size 1024 620 // however this will be reset to the correct chunk size 621 // while processing each individual field-term section 622 tfEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1)) 623 locEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1)) 624 625 var docTermMap [][]byte 626 627 if s.builder == nil { 628 s.builder, err = vellum.New(&s.builderBuf, nil) 629 if err != nil { 630 return 0, nil, err 631 } 632 } 633 634 for fieldID, terms := range s.DictKeys { 635 if cap(docTermMap) < len(s.results) { 636 docTermMap = make([][]byte, len(s.results)) 637 } else { 638 docTermMap = docTermMap[0:len(s.results)] 639 for docNum := range docTermMap { // reset the docTermMap 640 docTermMap[docNum] = docTermMap[docNum][:0] 641 } 642 } 643 644 dict := s.Dicts[fieldID] 645 646 for _, term := range terms { // terms are already sorted 647 pid := dict[term] - 1 648 649 postingsBS := s.Postings[pid] 650 651 freqNorms := s.FreqNorms[pid] 652 freqNormOffset := 0 653 654 locs := s.Locs[pid] 655 locOffset := 0 656 657 chunkSize, err := getChunkSize(s.chunkMode, postingsBS.GetCardinality(), uint64(len(s.results))) 658 if err != nil { 659 return 0, nil, err 660 } 661 tfEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1)) 662 locEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1)) 663 664 postingsItr := postingsBS.Iterator() 665 for postingsItr.HasNext() { 666 docNum := uint64(postingsItr.Next()) 667 668 freqNorm := freqNorms[freqNormOffset] 669 670 err = tfEncoder.Add(docNum, 671 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), 672 uint64(math.Float32bits(freqNorm.norm))) 673 if err != nil { 674 return 0, nil, err 675 } 676 677 if freqNorm.numLocs > 0 { 678 numBytesLocs := 0 679 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 680 numBytesLocs += totalUvarintBytes( 681 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 682 uint64(len(loc.arrayposs)), loc.arrayposs) 683 } 684 685 err = locEncoder.Add(docNum, uint64(numBytesLocs)) 686 if err != nil { 687 return 0, nil, err 688 } 689 690 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 691 err = locEncoder.Add(docNum, 692 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 693 uint64(len(loc.arrayposs))) 694 if err != nil { 695 return 0, nil, err 696 } 697 698 err = locEncoder.Add(docNum, loc.arrayposs...) 699 if err != nil { 700 return 0, nil, err 701 } 702 } 703 704 locOffset += freqNorm.numLocs 705 } 706 707 freqNormOffset++ 708 709 docTermMap[docNum] = append( 710 append(docTermMap[docNum], term...), 711 termSeparator) 712 } 713 714 tfEncoder.Close() 715 locEncoder.Close() 716 717 postingsOffset, err := 718 writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) 719 if err != nil { 720 return 0, nil, err 721 } 722 723 if postingsOffset > uint64(0) { 724 err = s.builder.Insert([]byte(term), postingsOffset) 725 if err != nil { 726 return 0, nil, err 727 } 728 } 729 730 tfEncoder.Reset() 731 locEncoder.Reset() 732 } 733 734 err = s.builder.Close() 735 if err != nil { 736 return 0, nil, err 737 } 738 739 // record where this dictionary starts 740 dictOffsets[fieldID] = uint64(s.w.Count()) 741 742 vellumData := s.builderBuf.Bytes() 743 744 // write out the length of the vellum data 745 n := binary.PutUvarint(buf, uint64(len(vellumData))) 746 _, err = s.w.Write(buf[:n]) 747 if err != nil { 748 return 0, nil, err 749 } 750 751 // write this vellum to disk 752 _, err = s.w.Write(vellumData) 753 if err != nil { 754 return 0, nil, err 755 } 756 757 // reset vellum for reuse 758 s.builderBuf.Reset() 759 760 err = s.builder.Reset(&s.builderBuf) 761 if err != nil { 762 return 0, nil, err 763 } 764 765 // write the field doc values 766 // NOTE: doc values continue to use legacy chunk mode 767 chunkSize, err := getChunkSize(LegacyChunkMode, 0, 0) 768 if err != nil { 769 return 0, nil, err 770 } 771 fdvEncoder := newChunkedContentCoder(chunkSize, uint64(len(s.results)-1), s.w, false) 772 if s.IncludeDocValues[fieldID] { 773 for docNum, docTerms := range docTermMap { 774 if len(docTerms) > 0 { 775 err = fdvEncoder.Add(uint64(docNum), docTerms) 776 if err != nil { 777 return 0, nil, err 778 } 779 } 780 } 781 err = fdvEncoder.Close() 782 if err != nil { 783 return 0, nil, err 784 } 785 786 fdvOffsetsStart[fieldID] = uint64(s.w.Count()) 787 788 _, err = fdvEncoder.Write() 789 if err != nil { 790 return 0, nil, err 791 } 792 793 fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) 794 795 fdvEncoder.Reset() 796 } else { 797 fdvOffsetsStart[fieldID] = fieldNotUninverted 798 fdvOffsetsEnd[fieldID] = fieldNotUninverted 799 } 800 } 801 802 fdvIndexOffset = uint64(s.w.Count()) 803 804 for i := 0; i < len(fdvOffsetsStart); i++ { 805 n := binary.PutUvarint(buf, fdvOffsetsStart[i]) 806 _, err := s.w.Write(buf[:n]) 807 if err != nil { 808 return 0, nil, err 809 } 810 n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) 811 _, err = s.w.Write(buf[:n]) 812 if err != nil { 813 return 0, nil, err 814 } 815 } 816 817 return fdvIndexOffset, dictOffsets, nil 818} 819 820func encodeFieldType(f document.Field) byte { 821 fieldType := byte('x') 822 switch f.(type) { 823 case *document.TextField: 824 fieldType = 't' 825 case *document.NumericField: 826 fieldType = 'n' 827 case *document.DateTimeField: 828 fieldType = 'd' 829 case *document.BooleanField: 830 fieldType = 'b' 831 case *document.GeoPointField: 832 fieldType = 'g' 833 case *document.CompositeField: 834 fieldType = 'c' 835 } 836 return fieldType 837} 838 839// returns the total # of bytes needed to encode the given uint64's 840// into binary.PutUVarint() encoding 841func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { 842 n = numUvarintBytes(a) 843 n += numUvarintBytes(b) 844 n += numUvarintBytes(c) 845 n += numUvarintBytes(d) 846 n += numUvarintBytes(e) 847 for _, v := range more { 848 n += numUvarintBytes(v) 849 } 850 return n 851} 852 853// returns # of bytes needed to encode x in binary.PutUvarint() encoding 854func numUvarintBytes(x uint64) (n int) { 855 for x >= 0x80 { 856 x >>= 7 857 n++ 858 } 859 return n + 1 860} 861