1// Copyright (c) 2018 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "sort" 22 "sync" 23 24 "github.com/RoaringBitmap/roaring" 25 index "github.com/blevesearch/bleve_index_api" 26 segment "github.com/blevesearch/scorch_segment_api/v2" 27 "github.com/blevesearch/vellum" 28 "github.com/golang/snappy" 29) 30 31var NewSegmentBufferNumResultsBump int = 100 32var NewSegmentBufferNumResultsFactor float64 = 1.0 33var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 34 35// ValidateDocFields can be set by applications to perform additional checks 36// on fields in a document being added to a new segment, by default it does 37// nothing. 38// This API is experimental and may be removed at any time. 39var ValidateDocFields = func(field index.Field) error { 40 return nil 41} 42 43var defaultChunkFactor uint32 = 1024 44 45// New creates an in-memory zap-encoded SegmentBase from a set of Documents 46func (z *ZapPlugin) New(results []index.Document) ( 47 segment.Segment, uint64, error) { 48 return z.newWithChunkFactor(results, defaultChunkFactor) 49} 50 51func (*ZapPlugin) newWithChunkFactor(results []index.Document, 52 chunkFactor uint32) (segment.Segment, uint64, error) { 53 s := interimPool.Get().(*interim) 54 55 var br bytes.Buffer 56 if s.lastNumDocs > 0 { 57 // use previous results to initialize the buf with an estimate 58 // size, but note that the interim instance comes from a 59 // global interimPool, so multiple scorch instances indexing 60 // different docs can lead to low quality estimates 61 estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * 62 NewSegmentBufferNumResultsFactor) 63 estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * 64 NewSegmentBufferAvgBytesPerDocFactor) 65 br.Grow(estimateAvgBytesPerDoc * estimateNumResults) 66 } 67 68 s.results = results 69 s.chunkFactor = chunkFactor 70 s.w = NewCountHashWriter(&br) 71 72 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, 73 err := s.convert() 74 if err != nil { 75 return nil, uint64(0), err 76 } 77 78 sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor, 79 s.FieldsMap, s.FieldsInv, uint64(len(results)), 80 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) 81 82 if err == nil && s.reset() == nil { 83 s.lastNumDocs = len(results) 84 s.lastOutSize = len(br.Bytes()) 85 interimPool.Put(s) 86 } 87 88 return sb, uint64(len(br.Bytes())), err 89} 90 91var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} 92 93// interim holds temporary working data used while converting from 94// analysis results to a zap-encoded segment 95type interim struct { 96 results []index.Document 97 98 chunkFactor uint32 99 100 w *CountHashWriter 101 102 // FieldsMap adds 1 to field id to avoid zero value issues 103 // name -> field id + 1 104 FieldsMap map[string]uint16 105 106 // FieldsInv is the inverse of FieldsMap 107 // field id -> name 108 FieldsInv []string 109 110 // Term dictionaries for each field 111 // field id -> term -> postings list id + 1 112 Dicts []map[string]uint64 113 114 // Terms for each field, where terms are sorted ascending 115 // field id -> []term 116 DictKeys [][]string 117 118 // Fields whose IncludeDocValues is true 119 // field id -> bool 120 IncludeDocValues []bool 121 122 // postings id -> bitmap of docNums 123 Postings []*roaring.Bitmap 124 125 // postings id -> freq/norm's, one for each docNum in postings 126 FreqNorms [][]interimFreqNorm 127 freqNormsBacking []interimFreqNorm 128 129 // postings id -> locs, one for each freq 130 Locs [][]interimLoc 131 locsBacking []interimLoc 132 133 numTermsPerPostingsList []int // key is postings list id 134 numLocsPerPostingsList []int // key is postings list id 135 136 builder *vellum.Builder 137 builderBuf bytes.Buffer 138 139 metaBuf bytes.Buffer 140 141 tmp0 []byte 142 tmp1 []byte 143 144 lastNumDocs int 145 lastOutSize int 146} 147 148func (s *interim) reset() (err error) { 149 s.results = nil 150 s.chunkFactor = 0 151 s.w = nil 152 s.FieldsMap = nil 153 s.FieldsInv = nil 154 for i := range s.Dicts { 155 s.Dicts[i] = nil 156 } 157 s.Dicts = s.Dicts[:0] 158 for i := range s.DictKeys { 159 s.DictKeys[i] = s.DictKeys[i][:0] 160 } 161 s.DictKeys = s.DictKeys[:0] 162 for i := range s.IncludeDocValues { 163 s.IncludeDocValues[i] = false 164 } 165 s.IncludeDocValues = s.IncludeDocValues[:0] 166 for _, idn := range s.Postings { 167 idn.Clear() 168 } 169 s.Postings = s.Postings[:0] 170 s.FreqNorms = s.FreqNorms[:0] 171 for i := range s.freqNormsBacking { 172 s.freqNormsBacking[i] = interimFreqNorm{} 173 } 174 s.freqNormsBacking = s.freqNormsBacking[:0] 175 s.Locs = s.Locs[:0] 176 for i := range s.locsBacking { 177 s.locsBacking[i] = interimLoc{} 178 } 179 s.locsBacking = s.locsBacking[:0] 180 s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] 181 s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] 182 s.builderBuf.Reset() 183 if s.builder != nil { 184 err = s.builder.Reset(&s.builderBuf) 185 } 186 s.metaBuf.Reset() 187 s.tmp0 = s.tmp0[:0] 188 s.tmp1 = s.tmp1[:0] 189 s.lastNumDocs = 0 190 s.lastOutSize = 0 191 192 return err 193} 194 195func (s *interim) grabBuf(size int) []byte { 196 buf := s.tmp0 197 if cap(buf) < size { 198 buf = make([]byte, size) 199 s.tmp0 = buf 200 } 201 return buf[0:size] 202} 203 204type interimStoredField struct { 205 vals [][]byte 206 typs []byte 207 arrayposs [][]uint64 // array positions 208} 209 210type interimFreqNorm struct { 211 freq uint64 212 norm float32 213 numLocs int 214} 215 216type interimLoc struct { 217 fieldID uint16 218 pos uint64 219 start uint64 220 end uint64 221 arrayposs []uint64 222} 223 224func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { 225 s.FieldsMap = map[string]uint16{} 226 227 s.getOrDefineField("_id") // _id field is fieldID 0 228 229 for _, result := range s.results { 230 result.VisitComposite(func(field index.CompositeField) { 231 s.getOrDefineField(field.Name()) 232 }) 233 result.VisitFields(func(field index.Field) { 234 s.getOrDefineField(field.Name()) 235 }) 236 } 237 238 sort.Strings(s.FieldsInv[1:]) // keep _id as first field 239 240 for fieldID, fieldName := range s.FieldsInv { 241 s.FieldsMap[fieldName] = uint16(fieldID + 1) 242 } 243 244 if cap(s.IncludeDocValues) >= len(s.FieldsInv) { 245 s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] 246 } else { 247 s.IncludeDocValues = make([]bool, len(s.FieldsInv)) 248 } 249 250 s.prepareDicts() 251 252 for _, dict := range s.DictKeys { 253 sort.Strings(dict) 254 } 255 256 s.processDocuments() 257 258 storedIndexOffset, err := s.writeStoredFields() 259 if err != nil { 260 return 0, 0, 0, nil, err 261 } 262 263 var fdvIndexOffset uint64 264 var dictOffsets []uint64 265 266 if len(s.results) > 0 { 267 fdvIndexOffset, dictOffsets, err = s.writeDicts() 268 if err != nil { 269 return 0, 0, 0, nil, err 270 } 271 } else { 272 dictOffsets = make([]uint64, len(s.FieldsInv)) 273 } 274 275 fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) 276 if err != nil { 277 return 0, 0, 0, nil, err 278 } 279 280 return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil 281} 282 283func (s *interim) getOrDefineField(fieldName string) int { 284 fieldIDPlus1, exists := s.FieldsMap[fieldName] 285 if !exists { 286 fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) 287 s.FieldsMap[fieldName] = fieldIDPlus1 288 s.FieldsInv = append(s.FieldsInv, fieldName) 289 290 s.Dicts = append(s.Dicts, make(map[string]uint64)) 291 292 n := len(s.DictKeys) 293 if n < cap(s.DictKeys) { 294 s.DictKeys = s.DictKeys[:n+1] 295 s.DictKeys[n] = s.DictKeys[n][:0] 296 } else { 297 s.DictKeys = append(s.DictKeys, []string(nil)) 298 } 299 } 300 301 return int(fieldIDPlus1 - 1) 302} 303 304// fill Dicts and DictKeys from analysis results 305func (s *interim) prepareDicts() { 306 var pidNext int 307 308 var totTFs int 309 var totLocs int 310 311 visitField := func(field index.Field) { 312 fieldID := uint16(s.getOrDefineField(field.Name())) 313 314 dict := s.Dicts[fieldID] 315 dictKeys := s.DictKeys[fieldID] 316 317 tfs := field.AnalyzedTokenFrequencies() 318 for term, tf := range tfs { 319 pidPlus1, exists := dict[term] 320 if !exists { 321 pidNext++ 322 pidPlus1 = uint64(pidNext) 323 324 dict[term] = pidPlus1 325 dictKeys = append(dictKeys, term) 326 327 s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) 328 s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) 329 } 330 331 pid := pidPlus1 - 1 332 333 s.numTermsPerPostingsList[pid] += 1 334 s.numLocsPerPostingsList[pid] += len(tf.Locations) 335 336 totLocs += len(tf.Locations) 337 } 338 339 totTFs += len(tfs) 340 341 s.DictKeys[fieldID] = dictKeys 342 } 343 344 for _, result := range s.results { 345 // walk each composite field 346 result.VisitComposite(func(field index.CompositeField) { 347 visitField(field) 348 }) 349 350 // walk each field 351 result.VisitFields(visitField) 352 } 353 354 numPostingsLists := pidNext 355 356 if cap(s.Postings) >= numPostingsLists { 357 s.Postings = s.Postings[:numPostingsLists] 358 } else { 359 postings := make([]*roaring.Bitmap, numPostingsLists) 360 copy(postings, s.Postings[:cap(s.Postings)]) 361 for i := 0; i < numPostingsLists; i++ { 362 if postings[i] == nil { 363 postings[i] = roaring.New() 364 } 365 } 366 s.Postings = postings 367 } 368 369 if cap(s.FreqNorms) >= numPostingsLists { 370 s.FreqNorms = s.FreqNorms[:numPostingsLists] 371 } else { 372 s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) 373 } 374 375 if cap(s.freqNormsBacking) >= totTFs { 376 s.freqNormsBacking = s.freqNormsBacking[:totTFs] 377 } else { 378 s.freqNormsBacking = make([]interimFreqNorm, totTFs) 379 } 380 381 freqNormsBacking := s.freqNormsBacking 382 for pid, numTerms := range s.numTermsPerPostingsList { 383 s.FreqNorms[pid] = freqNormsBacking[0:0] 384 freqNormsBacking = freqNormsBacking[numTerms:] 385 } 386 387 if cap(s.Locs) >= numPostingsLists { 388 s.Locs = s.Locs[:numPostingsLists] 389 } else { 390 s.Locs = make([][]interimLoc, numPostingsLists) 391 } 392 393 if cap(s.locsBacking) >= totLocs { 394 s.locsBacking = s.locsBacking[:totLocs] 395 } else { 396 s.locsBacking = make([]interimLoc, totLocs) 397 } 398 399 locsBacking := s.locsBacking 400 for pid, numLocs := range s.numLocsPerPostingsList { 401 s.Locs[pid] = locsBacking[0:0] 402 locsBacking = locsBacking[numLocs:] 403 } 404} 405 406func (s *interim) processDocuments() { 407 numFields := len(s.FieldsInv) 408 reuseFieldLens := make([]int, numFields) 409 reuseFieldTFs := make([]index.TokenFrequencies, numFields) 410 411 for docNum, result := range s.results { 412 for i := 0; i < numFields; i++ { // clear these for reuse 413 reuseFieldLens[i] = 0 414 reuseFieldTFs[i] = nil 415 } 416 417 s.processDocument(uint64(docNum), result, 418 reuseFieldLens, reuseFieldTFs) 419 } 420} 421 422func (s *interim) processDocument(docNum uint64, 423 result index.Document, 424 fieldLens []int, fieldTFs []index.TokenFrequencies) { 425 visitField := func(field index.Field) { 426 fieldID := uint16(s.getOrDefineField(field.Name())) 427 fieldLens[fieldID] += field.AnalyzedLength() 428 429 existingFreqs := fieldTFs[fieldID] 430 if existingFreqs != nil { 431 existingFreqs.MergeAll(field.Name(), field.AnalyzedTokenFrequencies()) 432 } else { 433 fieldTFs[fieldID] = field.AnalyzedTokenFrequencies() 434 } 435 } 436 437 // walk each composite field 438 result.VisitComposite(func(field index.CompositeField) { 439 visitField(field) 440 }) 441 442 // walk each field 443 result.VisitFields(visitField) 444 445 // now that it's been rolled up into fieldTFs, walk that 446 for fieldID, tfs := range fieldTFs { 447 dict := s.Dicts[fieldID] 448 norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) 449 450 for term, tf := range tfs { 451 pid := dict[term] - 1 452 bs := s.Postings[pid] 453 bs.Add(uint32(docNum)) 454 455 s.FreqNorms[pid] = append(s.FreqNorms[pid], 456 interimFreqNorm{ 457 freq: uint64(tf.Frequency()), 458 norm: norm, 459 numLocs: len(tf.Locations), 460 }) 461 462 if len(tf.Locations) > 0 { 463 locs := s.Locs[pid] 464 465 for _, loc := range tf.Locations { 466 var locf = uint16(fieldID) 467 if loc.Field != "" { 468 locf = uint16(s.getOrDefineField(loc.Field)) 469 } 470 var arrayposs []uint64 471 if len(loc.ArrayPositions) > 0 { 472 arrayposs = loc.ArrayPositions 473 } 474 locs = append(locs, interimLoc{ 475 fieldID: locf, 476 pos: uint64(loc.Position), 477 start: uint64(loc.Start), 478 end: uint64(loc.End), 479 arrayposs: arrayposs, 480 }) 481 } 482 483 s.Locs[pid] = locs 484 } 485 } 486 } 487} 488 489func (s *interim) writeStoredFields() ( 490 storedIndexOffset uint64, err error) { 491 varBuf := make([]byte, binary.MaxVarintLen64) 492 metaEncode := func(val uint64) (int, error) { 493 wb := binary.PutUvarint(varBuf, val) 494 return s.metaBuf.Write(varBuf[:wb]) 495 } 496 497 data, compressed := s.tmp0[:0], s.tmp1[:0] 498 defer func() { s.tmp0, s.tmp1 = data, compressed }() 499 500 // keyed by docNum 501 docStoredOffsets := make([]uint64, len(s.results)) 502 503 // keyed by fieldID, for the current doc in the loop 504 docStoredFields := map[uint16]interimStoredField{} 505 506 for docNum, result := range s.results { 507 for fieldID := range docStoredFields { // reset for next doc 508 delete(docStoredFields, fieldID) 509 } 510 511 var validationErr error 512 result.VisitFields(func(field index.Field) { 513 fieldID := uint16(s.getOrDefineField(field.Name())) 514 515 if field.Options().IsStored() { 516 isf := docStoredFields[fieldID] 517 isf.vals = append(isf.vals, field.Value()) 518 isf.typs = append(isf.typs, field.EncodedFieldType()) 519 isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) 520 docStoredFields[fieldID] = isf 521 } 522 523 if field.Options().IncludeDocValues() { 524 s.IncludeDocValues[fieldID] = true 525 } 526 527 err := ValidateDocFields(field) 528 if err != nil && validationErr == nil { 529 validationErr = err 530 } 531 }) 532 if validationErr != nil { 533 return 0, validationErr 534 } 535 536 var curr int 537 538 s.metaBuf.Reset() 539 data = data[:0] 540 541 // _id field special case optimizes ExternalID() lookups 542 idFieldVal := docStoredFields[uint16(0)].vals[0] 543 _, err = metaEncode(uint64(len(idFieldVal))) 544 if err != nil { 545 return 0, err 546 } 547 548 // handle non-"_id" fields 549 for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { 550 isf, exists := docStoredFields[uint16(fieldID)] 551 if exists { 552 curr, data, err = persistStoredFieldValues( 553 fieldID, isf.vals, isf.typs, isf.arrayposs, 554 curr, metaEncode, data) 555 if err != nil { 556 return 0, err 557 } 558 } 559 } 560 561 metaBytes := s.metaBuf.Bytes() 562 563 compressed = snappy.Encode(compressed[:cap(compressed)], data) 564 565 docStoredOffsets[docNum] = uint64(s.w.Count()) 566 567 _, err := writeUvarints(s.w, 568 uint64(len(metaBytes)), 569 uint64(len(idFieldVal)+len(compressed))) 570 if err != nil { 571 return 0, err 572 } 573 574 _, err = s.w.Write(metaBytes) 575 if err != nil { 576 return 0, err 577 } 578 579 _, err = s.w.Write(idFieldVal) 580 if err != nil { 581 return 0, err 582 } 583 584 _, err = s.w.Write(compressed) 585 if err != nil { 586 return 0, err 587 } 588 } 589 590 storedIndexOffset = uint64(s.w.Count()) 591 592 for _, docStoredOffset := range docStoredOffsets { 593 err = binary.Write(s.w, binary.BigEndian, docStoredOffset) 594 if err != nil { 595 return 0, err 596 } 597 } 598 599 return storedIndexOffset, nil 600} 601 602func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { 603 dictOffsets = make([]uint64, len(s.FieldsInv)) 604 605 fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) 606 fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) 607 608 buf := s.grabBuf(binary.MaxVarintLen64) 609 610 tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 611 locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 612 fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false) 613 614 var docTermMap [][]byte 615 616 if s.builder == nil { 617 s.builder, err = vellum.New(&s.builderBuf, nil) 618 if err != nil { 619 return 0, nil, err 620 } 621 } 622 623 for fieldID, terms := range s.DictKeys { 624 if cap(docTermMap) < len(s.results) { 625 docTermMap = make([][]byte, len(s.results)) 626 } else { 627 docTermMap = docTermMap[0:len(s.results)] 628 for docNum := range docTermMap { // reset the docTermMap 629 docTermMap[docNum] = docTermMap[docNum][:0] 630 } 631 } 632 633 dict := s.Dicts[fieldID] 634 635 for _, term := range terms { // terms are already sorted 636 pid := dict[term] - 1 637 638 postingsBS := s.Postings[pid] 639 640 freqNorms := s.FreqNorms[pid] 641 freqNormOffset := 0 642 643 locs := s.Locs[pid] 644 locOffset := 0 645 646 postingsItr := postingsBS.Iterator() 647 for postingsItr.HasNext() { 648 docNum := uint64(postingsItr.Next()) 649 650 freqNorm := freqNorms[freqNormOffset] 651 652 err = tfEncoder.Add(docNum, 653 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), 654 uint64(math.Float32bits(freqNorm.norm))) 655 if err != nil { 656 return 0, nil, err 657 } 658 659 if freqNorm.numLocs > 0 { 660 numBytesLocs := 0 661 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 662 numBytesLocs += totalUvarintBytes( 663 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 664 uint64(len(loc.arrayposs)), loc.arrayposs) 665 } 666 667 err = locEncoder.Add(docNum, uint64(numBytesLocs)) 668 if err != nil { 669 return 0, nil, err 670 } 671 672 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 673 err = locEncoder.Add(docNum, 674 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 675 uint64(len(loc.arrayposs))) 676 if err != nil { 677 return 0, nil, err 678 } 679 680 err = locEncoder.Add(docNum, loc.arrayposs...) 681 if err != nil { 682 return 0, nil, err 683 } 684 } 685 686 locOffset += freqNorm.numLocs 687 } 688 689 freqNormOffset++ 690 691 docTermMap[docNum] = append( 692 append(docTermMap[docNum], term...), 693 termSeparator) 694 } 695 696 tfEncoder.Close() 697 locEncoder.Close() 698 699 postingsOffset, err := 700 writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) 701 if err != nil { 702 return 0, nil, err 703 } 704 705 if postingsOffset > uint64(0) { 706 err = s.builder.Insert([]byte(term), postingsOffset) 707 if err != nil { 708 return 0, nil, err 709 } 710 } 711 712 tfEncoder.Reset() 713 locEncoder.Reset() 714 } 715 716 err = s.builder.Close() 717 if err != nil { 718 return 0, nil, err 719 } 720 721 // record where this dictionary starts 722 dictOffsets[fieldID] = uint64(s.w.Count()) 723 724 vellumData := s.builderBuf.Bytes() 725 726 // write out the length of the vellum data 727 n := binary.PutUvarint(buf, uint64(len(vellumData))) 728 _, err = s.w.Write(buf[:n]) 729 if err != nil { 730 return 0, nil, err 731 } 732 733 // write this vellum to disk 734 _, err = s.w.Write(vellumData) 735 if err != nil { 736 return 0, nil, err 737 } 738 739 // reset vellum for reuse 740 s.builderBuf.Reset() 741 742 err = s.builder.Reset(&s.builderBuf) 743 if err != nil { 744 return 0, nil, err 745 } 746 747 // write the field doc values 748 if s.IncludeDocValues[fieldID] { 749 for docNum, docTerms := range docTermMap { 750 if len(docTerms) > 0 { 751 err = fdvEncoder.Add(uint64(docNum), docTerms) 752 if err != nil { 753 return 0, nil, err 754 } 755 } 756 } 757 err = fdvEncoder.Close() 758 if err != nil { 759 return 0, nil, err 760 } 761 762 fdvOffsetsStart[fieldID] = uint64(s.w.Count()) 763 764 _, err = fdvEncoder.Write() 765 if err != nil { 766 return 0, nil, err 767 } 768 769 fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) 770 771 fdvEncoder.Reset() 772 } else { 773 fdvOffsetsStart[fieldID] = fieldNotUninverted 774 fdvOffsetsEnd[fieldID] = fieldNotUninverted 775 } 776 } 777 778 fdvIndexOffset = uint64(s.w.Count()) 779 780 for i := 0; i < len(fdvOffsetsStart); i++ { 781 n := binary.PutUvarint(buf, fdvOffsetsStart[i]) 782 _, err := s.w.Write(buf[:n]) 783 if err != nil { 784 return 0, nil, err 785 } 786 n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) 787 _, err = s.w.Write(buf[:n]) 788 if err != nil { 789 return 0, nil, err 790 } 791 } 792 793 return fdvIndexOffset, dictOffsets, nil 794} 795 796// returns the total # of bytes needed to encode the given uint64's 797// into binary.PutUVarint() encoding 798func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { 799 n = numUvarintBytes(a) 800 n += numUvarintBytes(b) 801 n += numUvarintBytes(c) 802 n += numUvarintBytes(d) 803 n += numUvarintBytes(e) 804 for _, v := range more { 805 n += numUvarintBytes(v) 806 } 807 return n 808} 809 810// returns # of bytes needed to encode x in binary.PutUvarint() encoding 811func numUvarintBytes(x uint64) (n int) { 812 for x >= 0x80 { 813 x >>= 7 814 n++ 815 } 816 return n + 1 817} 818