1// Copyright (c) 2018 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "sort" 22 "sync" 23 24 "github.com/RoaringBitmap/roaring" 25 index "github.com/blevesearch/bleve_index_api" 26 segment "github.com/blevesearch/scorch_segment_api/v2" 27 "github.com/blevesearch/vellum" 28 "github.com/golang/snappy" 29) 30 31var NewSegmentBufferNumResultsBump int = 100 32var NewSegmentBufferNumResultsFactor float64 = 1.0 33var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 34 35// ValidateDocFields can be set by applications to perform additional checks 36// on fields in a document being added to a new segment, by default it does 37// nothing. 38// This API is experimental and may be removed at any time. 39var ValidateDocFields = func(field index.Field) error { 40 return nil 41} 42 43// New creates an in-memory zap-encoded SegmentBase from a set of Documents 44func (z *ZapPlugin) New(results []index.Document) ( 45 segment.Segment, uint64, error) { 46 return z.newWithChunkMode(results, DefaultChunkMode) 47} 48 49func (*ZapPlugin) newWithChunkMode(results []index.Document, 50 chunkMode uint32) (segment.Segment, uint64, error) { 51 s := interimPool.Get().(*interim) 52 53 var br bytes.Buffer 54 if s.lastNumDocs > 0 { 55 // use previous results to initialize the buf with an estimate 56 // size, but note that the interim instance comes from a 57 // global interimPool, so multiple scorch instances indexing 58 // different docs can lead to low quality estimates 59 estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * 60 NewSegmentBufferNumResultsFactor) 61 estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * 62 NewSegmentBufferAvgBytesPerDocFactor) 63 br.Grow(estimateAvgBytesPerDoc * estimateNumResults) 64 } 65 66 s.results = results 67 s.chunkMode = chunkMode 68 s.w = NewCountHashWriter(&br) 69 70 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, 71 err := s.convert() 72 if err != nil { 73 return nil, uint64(0), err 74 } 75 76 sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkMode, 77 s.FieldsMap, s.FieldsInv, uint64(len(results)), 78 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) 79 80 if err == nil && s.reset() == nil { 81 s.lastNumDocs = len(results) 82 s.lastOutSize = len(br.Bytes()) 83 interimPool.Put(s) 84 } 85 86 return sb, uint64(len(br.Bytes())), err 87} 88 89var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} 90 91// interim holds temporary working data used while converting from 92// analysis results to a zap-encoded segment 93type interim struct { 94 results []index.Document 95 96 chunkMode uint32 97 98 w *CountHashWriter 99 100 // FieldsMap adds 1 to field id to avoid zero value issues 101 // name -> field id + 1 102 FieldsMap map[string]uint16 103 104 // FieldsInv is the inverse of FieldsMap 105 // field id -> name 106 FieldsInv []string 107 108 // Term dictionaries for each field 109 // field id -> term -> postings list id + 1 110 Dicts []map[string]uint64 111 112 // Terms for each field, where terms are sorted ascending 113 // field id -> []term 114 DictKeys [][]string 115 116 // Fields whose IncludeDocValues is true 117 // field id -> bool 118 IncludeDocValues []bool 119 120 // postings id -> bitmap of docNums 121 Postings []*roaring.Bitmap 122 123 // postings id -> freq/norm's, one for each docNum in postings 124 FreqNorms [][]interimFreqNorm 125 freqNormsBacking []interimFreqNorm 126 127 // postings id -> locs, one for each freq 128 Locs [][]interimLoc 129 locsBacking []interimLoc 130 131 numTermsPerPostingsList []int // key is postings list id 132 numLocsPerPostingsList []int // key is postings list id 133 134 builder *vellum.Builder 135 builderBuf bytes.Buffer 136 137 metaBuf bytes.Buffer 138 139 tmp0 []byte 140 tmp1 []byte 141 142 lastNumDocs int 143 lastOutSize int 144} 145 146func (s *interim) reset() (err error) { 147 s.results = nil 148 s.chunkMode = 0 149 s.w = nil 150 s.FieldsMap = nil 151 s.FieldsInv = nil 152 for i := range s.Dicts { 153 s.Dicts[i] = nil 154 } 155 s.Dicts = s.Dicts[:0] 156 for i := range s.DictKeys { 157 s.DictKeys[i] = s.DictKeys[i][:0] 158 } 159 s.DictKeys = s.DictKeys[:0] 160 for i := range s.IncludeDocValues { 161 s.IncludeDocValues[i] = false 162 } 163 s.IncludeDocValues = s.IncludeDocValues[:0] 164 for _, idn := range s.Postings { 165 idn.Clear() 166 } 167 s.Postings = s.Postings[:0] 168 s.FreqNorms = s.FreqNorms[:0] 169 for i := range s.freqNormsBacking { 170 s.freqNormsBacking[i] = interimFreqNorm{} 171 } 172 s.freqNormsBacking = s.freqNormsBacking[:0] 173 s.Locs = s.Locs[:0] 174 for i := range s.locsBacking { 175 s.locsBacking[i] = interimLoc{} 176 } 177 s.locsBacking = s.locsBacking[:0] 178 s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] 179 s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] 180 s.builderBuf.Reset() 181 if s.builder != nil { 182 err = s.builder.Reset(&s.builderBuf) 183 } 184 s.metaBuf.Reset() 185 s.tmp0 = s.tmp0[:0] 186 s.tmp1 = s.tmp1[:0] 187 s.lastNumDocs = 0 188 s.lastOutSize = 0 189 190 return err 191} 192 193func (s *interim) grabBuf(size int) []byte { 194 buf := s.tmp0 195 if cap(buf) < size { 196 buf = make([]byte, size) 197 s.tmp0 = buf 198 } 199 return buf[0:size] 200} 201 202type interimStoredField struct { 203 vals [][]byte 204 typs []byte 205 arrayposs [][]uint64 // array positions 206} 207 208type interimFreqNorm struct { 209 freq uint64 210 norm float32 211 numLocs int 212} 213 214type interimLoc struct { 215 fieldID uint16 216 pos uint64 217 start uint64 218 end uint64 219 arrayposs []uint64 220} 221 222func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { 223 s.FieldsMap = map[string]uint16{} 224 225 s.getOrDefineField("_id") // _id field is fieldID 0 226 227 for _, result := range s.results { 228 result.VisitComposite(func(field index.CompositeField) { 229 s.getOrDefineField(field.Name()) 230 }) 231 result.VisitFields(func(field index.Field) { 232 s.getOrDefineField(field.Name()) 233 }) 234 } 235 236 sort.Strings(s.FieldsInv[1:]) // keep _id as first field 237 238 for fieldID, fieldName := range s.FieldsInv { 239 s.FieldsMap[fieldName] = uint16(fieldID + 1) 240 } 241 242 if cap(s.IncludeDocValues) >= len(s.FieldsInv) { 243 s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] 244 } else { 245 s.IncludeDocValues = make([]bool, len(s.FieldsInv)) 246 } 247 248 s.prepareDicts() 249 250 for _, dict := range s.DictKeys { 251 sort.Strings(dict) 252 } 253 254 s.processDocuments() 255 256 storedIndexOffset, err := s.writeStoredFields() 257 if err != nil { 258 return 0, 0, 0, nil, err 259 } 260 261 var fdvIndexOffset uint64 262 var dictOffsets []uint64 263 264 if len(s.results) > 0 { 265 fdvIndexOffset, dictOffsets, err = s.writeDicts() 266 if err != nil { 267 return 0, 0, 0, nil, err 268 } 269 } else { 270 dictOffsets = make([]uint64, len(s.FieldsInv)) 271 } 272 273 fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) 274 if err != nil { 275 return 0, 0, 0, nil, err 276 } 277 278 return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil 279} 280 281func (s *interim) getOrDefineField(fieldName string) int { 282 fieldIDPlus1, exists := s.FieldsMap[fieldName] 283 if !exists { 284 fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) 285 s.FieldsMap[fieldName] = fieldIDPlus1 286 s.FieldsInv = append(s.FieldsInv, fieldName) 287 288 s.Dicts = append(s.Dicts, make(map[string]uint64)) 289 290 n := len(s.DictKeys) 291 if n < cap(s.DictKeys) { 292 s.DictKeys = s.DictKeys[:n+1] 293 s.DictKeys[n] = s.DictKeys[n][:0] 294 } else { 295 s.DictKeys = append(s.DictKeys, []string(nil)) 296 } 297 } 298 299 return int(fieldIDPlus1 - 1) 300} 301 302// fill Dicts and DictKeys from analysis results 303func (s *interim) prepareDicts() { 304 var pidNext int 305 306 var totTFs int 307 var totLocs int 308 309 visitField := func(field index.Field) { 310 fieldID := uint16(s.getOrDefineField(field.Name())) 311 312 dict := s.Dicts[fieldID] 313 dictKeys := s.DictKeys[fieldID] 314 315 tfs := field.AnalyzedTokenFrequencies() 316 for term, tf := range tfs { 317 pidPlus1, exists := dict[term] 318 if !exists { 319 pidNext++ 320 pidPlus1 = uint64(pidNext) 321 322 dict[term] = pidPlus1 323 dictKeys = append(dictKeys, term) 324 325 s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) 326 s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) 327 } 328 329 pid := pidPlus1 - 1 330 331 s.numTermsPerPostingsList[pid] += 1 332 s.numLocsPerPostingsList[pid] += len(tf.Locations) 333 334 totLocs += len(tf.Locations) 335 } 336 337 totTFs += len(tfs) 338 339 s.DictKeys[fieldID] = dictKeys 340 } 341 342 for _, result := range s.results { 343 // walk each composite field 344 result.VisitComposite(func(field index.CompositeField) { 345 visitField(field) 346 }) 347 348 // walk each field 349 result.VisitFields(visitField) 350 } 351 352 numPostingsLists := pidNext 353 354 if cap(s.Postings) >= numPostingsLists { 355 s.Postings = s.Postings[:numPostingsLists] 356 } else { 357 postings := make([]*roaring.Bitmap, numPostingsLists) 358 copy(postings, s.Postings[:cap(s.Postings)]) 359 for i := 0; i < numPostingsLists; i++ { 360 if postings[i] == nil { 361 postings[i] = roaring.New() 362 } 363 } 364 s.Postings = postings 365 } 366 367 if cap(s.FreqNorms) >= numPostingsLists { 368 s.FreqNorms = s.FreqNorms[:numPostingsLists] 369 } else { 370 s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) 371 } 372 373 if cap(s.freqNormsBacking) >= totTFs { 374 s.freqNormsBacking = s.freqNormsBacking[:totTFs] 375 } else { 376 s.freqNormsBacking = make([]interimFreqNorm, totTFs) 377 } 378 379 freqNormsBacking := s.freqNormsBacking 380 for pid, numTerms := range s.numTermsPerPostingsList { 381 s.FreqNorms[pid] = freqNormsBacking[0:0] 382 freqNormsBacking = freqNormsBacking[numTerms:] 383 } 384 385 if cap(s.Locs) >= numPostingsLists { 386 s.Locs = s.Locs[:numPostingsLists] 387 } else { 388 s.Locs = make([][]interimLoc, numPostingsLists) 389 } 390 391 if cap(s.locsBacking) >= totLocs { 392 s.locsBacking = s.locsBacking[:totLocs] 393 } else { 394 s.locsBacking = make([]interimLoc, totLocs) 395 } 396 397 locsBacking := s.locsBacking 398 for pid, numLocs := range s.numLocsPerPostingsList { 399 s.Locs[pid] = locsBacking[0:0] 400 locsBacking = locsBacking[numLocs:] 401 } 402} 403 404func (s *interim) processDocuments() { 405 numFields := len(s.FieldsInv) 406 reuseFieldLens := make([]int, numFields) 407 reuseFieldTFs := make([]index.TokenFrequencies, numFields) 408 409 for docNum, result := range s.results { 410 for i := 0; i < numFields; i++ { // clear these for reuse 411 reuseFieldLens[i] = 0 412 reuseFieldTFs[i] = nil 413 } 414 415 s.processDocument(uint64(docNum), result, 416 reuseFieldLens, reuseFieldTFs) 417 } 418} 419 420func (s *interim) processDocument(docNum uint64, 421 result index.Document, 422 fieldLens []int, fieldTFs []index.TokenFrequencies) { 423 visitField := func(field index.Field) { 424 fieldID := uint16(s.getOrDefineField(field.Name())) 425 fieldLens[fieldID] += field.AnalyzedLength() 426 427 existingFreqs := fieldTFs[fieldID] 428 if existingFreqs != nil { 429 existingFreqs.MergeAll(field.Name(), field.AnalyzedTokenFrequencies()) 430 } else { 431 fieldTFs[fieldID] = field.AnalyzedTokenFrequencies() 432 } 433 } 434 435 // walk each composite field 436 result.VisitComposite(func(field index.CompositeField) { 437 visitField(field) 438 }) 439 440 // walk each field 441 result.VisitFields(visitField) 442 443 // now that it's been rolled up into fieldTFs, walk that 444 for fieldID, tfs := range fieldTFs { 445 dict := s.Dicts[fieldID] 446 norm := math.Float32frombits(uint32(fieldLens[fieldID])) 447 448 for term, tf := range tfs { 449 pid := dict[term] - 1 450 bs := s.Postings[pid] 451 bs.Add(uint32(docNum)) 452 453 s.FreqNorms[pid] = append(s.FreqNorms[pid], 454 interimFreqNorm{ 455 freq: uint64(tf.Frequency()), 456 norm: norm, 457 numLocs: len(tf.Locations), 458 }) 459 460 if len(tf.Locations) > 0 { 461 locs := s.Locs[pid] 462 463 for _, loc := range tf.Locations { 464 var locf = uint16(fieldID) 465 if loc.Field != "" { 466 locf = uint16(s.getOrDefineField(loc.Field)) 467 } 468 var arrayposs []uint64 469 if len(loc.ArrayPositions) > 0 { 470 arrayposs = loc.ArrayPositions 471 } 472 locs = append(locs, interimLoc{ 473 fieldID: locf, 474 pos: uint64(loc.Position), 475 start: uint64(loc.Start), 476 end: uint64(loc.End), 477 arrayposs: arrayposs, 478 }) 479 } 480 481 s.Locs[pid] = locs 482 } 483 } 484 } 485} 486 487func (s *interim) writeStoredFields() ( 488 storedIndexOffset uint64, err error) { 489 varBuf := make([]byte, binary.MaxVarintLen64) 490 metaEncode := func(val uint64) (int, error) { 491 wb := binary.PutUvarint(varBuf, val) 492 return s.metaBuf.Write(varBuf[:wb]) 493 } 494 495 data, compressed := s.tmp0[:0], s.tmp1[:0] 496 defer func() { s.tmp0, s.tmp1 = data, compressed }() 497 498 // keyed by docNum 499 docStoredOffsets := make([]uint64, len(s.results)) 500 501 // keyed by fieldID, for the current doc in the loop 502 docStoredFields := map[uint16]interimStoredField{} 503 504 for docNum, result := range s.results { 505 for fieldID := range docStoredFields { // reset for next doc 506 delete(docStoredFields, fieldID) 507 } 508 509 var validationErr error 510 result.VisitFields(func(field index.Field) { 511 fieldID := uint16(s.getOrDefineField(field.Name())) 512 513 if field.Options().IsStored() { 514 isf := docStoredFields[fieldID] 515 isf.vals = append(isf.vals, field.Value()) 516 isf.typs = append(isf.typs, field.EncodedFieldType()) 517 isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) 518 docStoredFields[fieldID] = isf 519 } 520 521 if field.Options().IncludeDocValues() { 522 s.IncludeDocValues[fieldID] = true 523 } 524 525 err := ValidateDocFields(field) 526 if err != nil && validationErr == nil { 527 validationErr = err 528 } 529 }) 530 if validationErr != nil { 531 return 0, validationErr 532 } 533 534 var curr int 535 536 s.metaBuf.Reset() 537 data = data[:0] 538 539 // _id field special case optimizes ExternalID() lookups 540 idFieldVal := docStoredFields[uint16(0)].vals[0] 541 _, err = metaEncode(uint64(len(idFieldVal))) 542 if err != nil { 543 return 0, err 544 } 545 546 // handle non-"_id" fields 547 for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { 548 isf, exists := docStoredFields[uint16(fieldID)] 549 if exists { 550 curr, data, err = persistStoredFieldValues( 551 fieldID, isf.vals, isf.typs, isf.arrayposs, 552 curr, metaEncode, data) 553 if err != nil { 554 return 0, err 555 } 556 } 557 } 558 559 metaBytes := s.metaBuf.Bytes() 560 561 compressed = snappy.Encode(compressed[:cap(compressed)], data) 562 563 docStoredOffsets[docNum] = uint64(s.w.Count()) 564 565 _, err := writeUvarints(s.w, 566 uint64(len(metaBytes)), 567 uint64(len(idFieldVal)+len(compressed))) 568 if err != nil { 569 return 0, err 570 } 571 572 _, err = s.w.Write(metaBytes) 573 if err != nil { 574 return 0, err 575 } 576 577 _, err = s.w.Write(idFieldVal) 578 if err != nil { 579 return 0, err 580 } 581 582 _, err = s.w.Write(compressed) 583 if err != nil { 584 return 0, err 585 } 586 } 587 588 storedIndexOffset = uint64(s.w.Count()) 589 590 for _, docStoredOffset := range docStoredOffsets { 591 err = binary.Write(s.w, binary.BigEndian, docStoredOffset) 592 if err != nil { 593 return 0, err 594 } 595 } 596 597 return storedIndexOffset, nil 598} 599 600func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { 601 dictOffsets = make([]uint64, len(s.FieldsInv)) 602 603 fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) 604 fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) 605 606 buf := s.grabBuf(binary.MaxVarintLen64) 607 608 // these int coders are initialized with chunk size 1024 609 // however this will be reset to the correct chunk size 610 // while processing each individual field-term section 611 tfEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1)) 612 locEncoder := newChunkedIntCoder(1024, uint64(len(s.results)-1)) 613 614 var docTermMap [][]byte 615 616 if s.builder == nil { 617 s.builder, err = vellum.New(&s.builderBuf, nil) 618 if err != nil { 619 return 0, nil, err 620 } 621 } 622 623 for fieldID, terms := range s.DictKeys { 624 if cap(docTermMap) < len(s.results) { 625 docTermMap = make([][]byte, len(s.results)) 626 } else { 627 docTermMap = docTermMap[0:len(s.results)] 628 for docNum := range docTermMap { // reset the docTermMap 629 docTermMap[docNum] = docTermMap[docNum][:0] 630 } 631 } 632 633 dict := s.Dicts[fieldID] 634 635 for _, term := range terms { // terms are already sorted 636 pid := dict[term] - 1 637 638 postingsBS := s.Postings[pid] 639 640 freqNorms := s.FreqNorms[pid] 641 freqNormOffset := 0 642 643 locs := s.Locs[pid] 644 locOffset := 0 645 646 chunkSize, err := getChunkSize(s.chunkMode, postingsBS.GetCardinality(), uint64(len(s.results))) 647 if err != nil { 648 return 0, nil, err 649 } 650 tfEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1)) 651 locEncoder.SetChunkSize(chunkSize, uint64(len(s.results)-1)) 652 653 postingsItr := postingsBS.Iterator() 654 for postingsItr.HasNext() { 655 docNum := uint64(postingsItr.Next()) 656 657 freqNorm := freqNorms[freqNormOffset] 658 659 // check if freq/norm is enabled 660 if freqNorm.freq > 0 { 661 err = tfEncoder.Add(docNum, 662 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), 663 uint64(math.Float32bits(freqNorm.norm))) 664 } else { 665 // if disabled, then skip the norm part 666 err = tfEncoder.Add(docNum, 667 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0)) 668 } 669 if err != nil { 670 return 0, nil, err 671 } 672 673 if freqNorm.numLocs > 0 { 674 numBytesLocs := 0 675 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 676 numBytesLocs += totalUvarintBytes( 677 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 678 uint64(len(loc.arrayposs)), loc.arrayposs) 679 } 680 681 err = locEncoder.Add(docNum, uint64(numBytesLocs)) 682 if err != nil { 683 return 0, nil, err 684 } 685 686 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 687 err = locEncoder.Add(docNum, 688 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 689 uint64(len(loc.arrayposs))) 690 if err != nil { 691 return 0, nil, err 692 } 693 694 err = locEncoder.Add(docNum, loc.arrayposs...) 695 if err != nil { 696 return 0, nil, err 697 } 698 } 699 700 locOffset += freqNorm.numLocs 701 } 702 703 freqNormOffset++ 704 705 docTermMap[docNum] = append( 706 append(docTermMap[docNum], term...), 707 termSeparator) 708 } 709 710 tfEncoder.Close() 711 locEncoder.Close() 712 713 postingsOffset, err := 714 writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) 715 if err != nil { 716 return 0, nil, err 717 } 718 719 if postingsOffset > uint64(0) { 720 err = s.builder.Insert([]byte(term), postingsOffset) 721 if err != nil { 722 return 0, nil, err 723 } 724 } 725 726 tfEncoder.Reset() 727 locEncoder.Reset() 728 } 729 730 err = s.builder.Close() 731 if err != nil { 732 return 0, nil, err 733 } 734 735 // record where this dictionary starts 736 dictOffsets[fieldID] = uint64(s.w.Count()) 737 738 vellumData := s.builderBuf.Bytes() 739 740 // write out the length of the vellum data 741 n := binary.PutUvarint(buf, uint64(len(vellumData))) 742 _, err = s.w.Write(buf[:n]) 743 if err != nil { 744 return 0, nil, err 745 } 746 747 // write this vellum to disk 748 _, err = s.w.Write(vellumData) 749 if err != nil { 750 return 0, nil, err 751 } 752 753 // reset vellum for reuse 754 s.builderBuf.Reset() 755 756 err = s.builder.Reset(&s.builderBuf) 757 if err != nil { 758 return 0, nil, err 759 } 760 761 // write the field doc values 762 // NOTE: doc values continue to use legacy chunk mode 763 chunkSize, err := getChunkSize(LegacyChunkMode, 0, 0) 764 if err != nil { 765 return 0, nil, err 766 } 767 fdvEncoder := newChunkedContentCoder(chunkSize, uint64(len(s.results)-1), s.w, false) 768 if s.IncludeDocValues[fieldID] { 769 for docNum, docTerms := range docTermMap { 770 if len(docTerms) > 0 { 771 err = fdvEncoder.Add(uint64(docNum), docTerms) 772 if err != nil { 773 return 0, nil, err 774 } 775 } 776 } 777 err = fdvEncoder.Close() 778 if err != nil { 779 return 0, nil, err 780 } 781 782 fdvOffsetsStart[fieldID] = uint64(s.w.Count()) 783 784 _, err = fdvEncoder.Write() 785 if err != nil { 786 return 0, nil, err 787 } 788 789 fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) 790 791 fdvEncoder.Reset() 792 } else { 793 fdvOffsetsStart[fieldID] = fieldNotUninverted 794 fdvOffsetsEnd[fieldID] = fieldNotUninverted 795 } 796 } 797 798 fdvIndexOffset = uint64(s.w.Count()) 799 800 for i := 0; i < len(fdvOffsetsStart); i++ { 801 n := binary.PutUvarint(buf, fdvOffsetsStart[i]) 802 _, err := s.w.Write(buf[:n]) 803 if err != nil { 804 return 0, nil, err 805 } 806 n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) 807 _, err = s.w.Write(buf[:n]) 808 if err != nil { 809 return 0, nil, err 810 } 811 } 812 813 return fdvIndexOffset, dictOffsets, nil 814} 815 816// returns the total # of bytes needed to encode the given uint64's 817// into binary.PutUVarint() encoding 818func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { 819 n = numUvarintBytes(a) 820 n += numUvarintBytes(b) 821 n += numUvarintBytes(c) 822 n += numUvarintBytes(d) 823 n += numUvarintBytes(e) 824 for _, v := range more { 825 n += numUvarintBytes(v) 826 } 827 return n 828} 829 830// returns # of bytes needed to encode x in binary.PutUvarint() encoding 831func numUvarintBytes(x uint64) (n int) { 832 for x >= 0x80 { 833 x >>= 7 834 n++ 835 } 836 return n + 1 837} 838