1// Copyright (c) 2018 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "sort" 22 "sync" 23 24 "github.com/RoaringBitmap/roaring" 25 "github.com/blevesearch/bleve/analysis" 26 "github.com/blevesearch/bleve/document" 27 "github.com/blevesearch/bleve/index" 28 "github.com/couchbase/vellum" 29 "github.com/golang/snappy" 30) 31 32var NewSegmentBufferNumResultsBump int = 100 33var NewSegmentBufferNumResultsFactor float64 = 1.0 34var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 35 36// ValidateDocFields can be set by applications to perform additional checks 37// on fields in a document being added to a new segment, by default it does 38// nothing. 39// This API is experimental and may be removed at any time. 40var ValidateDocFields = func(field document.Field) error { 41 return nil 42} 43 44// AnalysisResultsToSegmentBase produces an in-memory zap-encoded 45// SegmentBase from analysis results 46func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, 47 chunkFactor uint32) (*SegmentBase, uint64, error) { 48 s := interimPool.Get().(*interim) 49 50 var br bytes.Buffer 51 if s.lastNumDocs > 0 { 52 // use previous results to initialize the buf with an estimate 53 // size, but note that the interim instance comes from a 54 // global interimPool, so multiple scorch instances indexing 55 // different docs can lead to low quality estimates 56 estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * 57 NewSegmentBufferNumResultsFactor) 58 estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * 59 NewSegmentBufferAvgBytesPerDocFactor) 60 br.Grow(estimateAvgBytesPerDoc * estimateNumResults) 61 } 62 63 s.results = results 64 s.chunkFactor = chunkFactor 65 s.w = NewCountHashWriter(&br) 66 67 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, 68 err := s.convert() 69 if err != nil { 70 return nil, uint64(0), err 71 } 72 73 sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor, 74 s.FieldsMap, s.FieldsInv, uint64(len(results)), 75 storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) 76 77 if err == nil && s.reset() == nil { 78 s.lastNumDocs = len(results) 79 s.lastOutSize = len(br.Bytes()) 80 interimPool.Put(s) 81 } 82 83 return sb, uint64(len(br.Bytes())), err 84} 85 86var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} 87 88// interim holds temporary working data used while converting from 89// analysis results to a zap-encoded segment 90type interim struct { 91 results []*index.AnalysisResult 92 93 chunkFactor uint32 94 95 w *CountHashWriter 96 97 // FieldsMap adds 1 to field id to avoid zero value issues 98 // name -> field id + 1 99 FieldsMap map[string]uint16 100 101 // FieldsInv is the inverse of FieldsMap 102 // field id -> name 103 FieldsInv []string 104 105 // Term dictionaries for each field 106 // field id -> term -> postings list id + 1 107 Dicts []map[string]uint64 108 109 // Terms for each field, where terms are sorted ascending 110 // field id -> []term 111 DictKeys [][]string 112 113 // Fields whose IncludeDocValues is true 114 // field id -> bool 115 IncludeDocValues []bool 116 117 // postings id -> bitmap of docNums 118 Postings []*roaring.Bitmap 119 120 // postings id -> freq/norm's, one for each docNum in postings 121 FreqNorms [][]interimFreqNorm 122 freqNormsBacking []interimFreqNorm 123 124 // postings id -> locs, one for each freq 125 Locs [][]interimLoc 126 locsBacking []interimLoc 127 128 numTermsPerPostingsList []int // key is postings list id 129 numLocsPerPostingsList []int // key is postings list id 130 131 builder *vellum.Builder 132 builderBuf bytes.Buffer 133 134 metaBuf bytes.Buffer 135 136 tmp0 []byte 137 tmp1 []byte 138 139 lastNumDocs int 140 lastOutSize int 141} 142 143func (s *interim) reset() (err error) { 144 s.results = nil 145 s.chunkFactor = 0 146 s.w = nil 147 s.FieldsMap = nil 148 s.FieldsInv = nil 149 for i := range s.Dicts { 150 s.Dicts[i] = nil 151 } 152 s.Dicts = s.Dicts[:0] 153 for i := range s.DictKeys { 154 s.DictKeys[i] = s.DictKeys[i][:0] 155 } 156 s.DictKeys = s.DictKeys[:0] 157 for i := range s.IncludeDocValues { 158 s.IncludeDocValues[i] = false 159 } 160 s.IncludeDocValues = s.IncludeDocValues[:0] 161 for _, idn := range s.Postings { 162 idn.Clear() 163 } 164 s.Postings = s.Postings[:0] 165 s.FreqNorms = s.FreqNorms[:0] 166 for i := range s.freqNormsBacking { 167 s.freqNormsBacking[i] = interimFreqNorm{} 168 } 169 s.freqNormsBacking = s.freqNormsBacking[:0] 170 s.Locs = s.Locs[:0] 171 for i := range s.locsBacking { 172 s.locsBacking[i] = interimLoc{} 173 } 174 s.locsBacking = s.locsBacking[:0] 175 s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] 176 s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] 177 s.builderBuf.Reset() 178 if s.builder != nil { 179 err = s.builder.Reset(&s.builderBuf) 180 } 181 s.metaBuf.Reset() 182 s.tmp0 = s.tmp0[:0] 183 s.tmp1 = s.tmp1[:0] 184 s.lastNumDocs = 0 185 s.lastOutSize = 0 186 187 return err 188} 189 190func (s *interim) grabBuf(size int) []byte { 191 buf := s.tmp0 192 if cap(buf) < size { 193 buf = make([]byte, size) 194 s.tmp0 = buf 195 } 196 return buf[0:size] 197} 198 199type interimStoredField struct { 200 vals [][]byte 201 typs []byte 202 arrayposs [][]uint64 // array positions 203} 204 205type interimFreqNorm struct { 206 freq uint64 207 norm float32 208 numLocs int 209} 210 211type interimLoc struct { 212 fieldID uint16 213 pos uint64 214 start uint64 215 end uint64 216 arrayposs []uint64 217} 218 219func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { 220 s.FieldsMap = map[string]uint16{} 221 222 s.getOrDefineField("_id") // _id field is fieldID 0 223 224 for _, result := range s.results { 225 for _, field := range result.Document.CompositeFields { 226 s.getOrDefineField(field.Name()) 227 } 228 for _, field := range result.Document.Fields { 229 s.getOrDefineField(field.Name()) 230 } 231 } 232 233 sort.Strings(s.FieldsInv[1:]) // keep _id as first field 234 235 for fieldID, fieldName := range s.FieldsInv { 236 s.FieldsMap[fieldName] = uint16(fieldID + 1) 237 } 238 239 if cap(s.IncludeDocValues) >= len(s.FieldsInv) { 240 s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] 241 } else { 242 s.IncludeDocValues = make([]bool, len(s.FieldsInv)) 243 } 244 245 s.prepareDicts() 246 247 for _, dict := range s.DictKeys { 248 sort.Strings(dict) 249 } 250 251 s.processDocuments() 252 253 storedIndexOffset, err := s.writeStoredFields() 254 if err != nil { 255 return 0, 0, 0, nil, err 256 } 257 258 var fdvIndexOffset uint64 259 var dictOffsets []uint64 260 261 if len(s.results) > 0 { 262 fdvIndexOffset, dictOffsets, err = s.writeDicts() 263 if err != nil { 264 return 0, 0, 0, nil, err 265 } 266 } else { 267 dictOffsets = make([]uint64, len(s.FieldsInv)) 268 } 269 270 fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) 271 if err != nil { 272 return 0, 0, 0, nil, err 273 } 274 275 return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil 276} 277 278func (s *interim) getOrDefineField(fieldName string) int { 279 fieldIDPlus1, exists := s.FieldsMap[fieldName] 280 if !exists { 281 fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) 282 s.FieldsMap[fieldName] = fieldIDPlus1 283 s.FieldsInv = append(s.FieldsInv, fieldName) 284 285 s.Dicts = append(s.Dicts, make(map[string]uint64)) 286 287 n := len(s.DictKeys) 288 if n < cap(s.DictKeys) { 289 s.DictKeys = s.DictKeys[:n+1] 290 s.DictKeys[n] = s.DictKeys[n][:0] 291 } else { 292 s.DictKeys = append(s.DictKeys, []string(nil)) 293 } 294 } 295 296 return int(fieldIDPlus1 - 1) 297} 298 299// fill Dicts and DictKeys from analysis results 300func (s *interim) prepareDicts() { 301 var pidNext int 302 303 var totTFs int 304 var totLocs int 305 306 visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) { 307 dict := s.Dicts[fieldID] 308 dictKeys := s.DictKeys[fieldID] 309 310 for term, tf := range tfs { 311 pidPlus1, exists := dict[term] 312 if !exists { 313 pidNext++ 314 pidPlus1 = uint64(pidNext) 315 316 dict[term] = pidPlus1 317 dictKeys = append(dictKeys, term) 318 319 s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) 320 s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) 321 } 322 323 pid := pidPlus1 - 1 324 325 s.numTermsPerPostingsList[pid] += 1 326 s.numLocsPerPostingsList[pid] += len(tf.Locations) 327 328 totLocs += len(tf.Locations) 329 } 330 331 totTFs += len(tfs) 332 333 s.DictKeys[fieldID] = dictKeys 334 } 335 336 for _, result := range s.results { 337 // walk each composite field 338 for _, field := range result.Document.CompositeFields { 339 fieldID := uint16(s.getOrDefineField(field.Name())) 340 _, tf := field.Analyze() 341 visitField(fieldID, tf) 342 } 343 344 // walk each field 345 for i, field := range result.Document.Fields { 346 fieldID := uint16(s.getOrDefineField(field.Name())) 347 tf := result.Analyzed[i] 348 visitField(fieldID, tf) 349 } 350 } 351 352 numPostingsLists := pidNext 353 354 if cap(s.Postings) >= numPostingsLists { 355 s.Postings = s.Postings[:numPostingsLists] 356 } else { 357 postings := make([]*roaring.Bitmap, numPostingsLists) 358 copy(postings, s.Postings[:cap(s.Postings)]) 359 for i := 0; i < numPostingsLists; i++ { 360 if postings[i] == nil { 361 postings[i] = roaring.New() 362 } 363 } 364 s.Postings = postings 365 } 366 367 if cap(s.FreqNorms) >= numPostingsLists { 368 s.FreqNorms = s.FreqNorms[:numPostingsLists] 369 } else { 370 s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) 371 } 372 373 if cap(s.freqNormsBacking) >= totTFs { 374 s.freqNormsBacking = s.freqNormsBacking[:totTFs] 375 } else { 376 s.freqNormsBacking = make([]interimFreqNorm, totTFs) 377 } 378 379 freqNormsBacking := s.freqNormsBacking 380 for pid, numTerms := range s.numTermsPerPostingsList { 381 s.FreqNorms[pid] = freqNormsBacking[0:0] 382 freqNormsBacking = freqNormsBacking[numTerms:] 383 } 384 385 if cap(s.Locs) >= numPostingsLists { 386 s.Locs = s.Locs[:numPostingsLists] 387 } else { 388 s.Locs = make([][]interimLoc, numPostingsLists) 389 } 390 391 if cap(s.locsBacking) >= totLocs { 392 s.locsBacking = s.locsBacking[:totLocs] 393 } else { 394 s.locsBacking = make([]interimLoc, totLocs) 395 } 396 397 locsBacking := s.locsBacking 398 for pid, numLocs := range s.numLocsPerPostingsList { 399 s.Locs[pid] = locsBacking[0:0] 400 locsBacking = locsBacking[numLocs:] 401 } 402} 403 404func (s *interim) processDocuments() { 405 numFields := len(s.FieldsInv) 406 reuseFieldLens := make([]int, numFields) 407 reuseFieldTFs := make([]analysis.TokenFrequencies, numFields) 408 409 for docNum, result := range s.results { 410 for i := 0; i < numFields; i++ { // clear these for reuse 411 reuseFieldLens[i] = 0 412 reuseFieldTFs[i] = nil 413 } 414 415 s.processDocument(uint64(docNum), result, 416 reuseFieldLens, reuseFieldTFs) 417 } 418} 419 420func (s *interim) processDocument(docNum uint64, 421 result *index.AnalysisResult, 422 fieldLens []int, fieldTFs []analysis.TokenFrequencies) { 423 visitField := func(fieldID uint16, fieldName string, 424 ln int, tf analysis.TokenFrequencies) { 425 fieldLens[fieldID] += ln 426 427 existingFreqs := fieldTFs[fieldID] 428 if existingFreqs != nil { 429 existingFreqs.MergeAll(fieldName, tf) 430 } else { 431 fieldTFs[fieldID] = tf 432 } 433 } 434 435 // walk each composite field 436 for _, field := range result.Document.CompositeFields { 437 fieldID := uint16(s.getOrDefineField(field.Name())) 438 ln, tf := field.Analyze() 439 visitField(fieldID, field.Name(), ln, tf) 440 } 441 442 // walk each field 443 for i, field := range result.Document.Fields { 444 fieldID := uint16(s.getOrDefineField(field.Name())) 445 ln := result.Length[i] 446 tf := result.Analyzed[i] 447 visitField(fieldID, field.Name(), ln, tf) 448 } 449 450 // now that it's been rolled up into fieldTFs, walk that 451 for fieldID, tfs := range fieldTFs { 452 dict := s.Dicts[fieldID] 453 norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) 454 455 for term, tf := range tfs { 456 pid := dict[term] - 1 457 bs := s.Postings[pid] 458 bs.Add(uint32(docNum)) 459 460 s.FreqNorms[pid] = append(s.FreqNorms[pid], 461 interimFreqNorm{ 462 freq: uint64(tf.Frequency()), 463 norm: norm, 464 numLocs: len(tf.Locations), 465 }) 466 467 if len(tf.Locations) > 0 { 468 locs := s.Locs[pid] 469 470 for _, loc := range tf.Locations { 471 var locf = uint16(fieldID) 472 if loc.Field != "" { 473 locf = uint16(s.getOrDefineField(loc.Field)) 474 } 475 var arrayposs []uint64 476 if len(loc.ArrayPositions) > 0 { 477 arrayposs = loc.ArrayPositions 478 } 479 locs = append(locs, interimLoc{ 480 fieldID: locf, 481 pos: uint64(loc.Position), 482 start: uint64(loc.Start), 483 end: uint64(loc.End), 484 arrayposs: arrayposs, 485 }) 486 } 487 488 s.Locs[pid] = locs 489 } 490 } 491 } 492} 493 494func (s *interim) writeStoredFields() ( 495 storedIndexOffset uint64, err error) { 496 varBuf := make([]byte, binary.MaxVarintLen64) 497 metaEncode := func(val uint64) (int, error) { 498 wb := binary.PutUvarint(varBuf, val) 499 return s.metaBuf.Write(varBuf[:wb]) 500 } 501 502 data, compressed := s.tmp0[:0], s.tmp1[:0] 503 defer func() { s.tmp0, s.tmp1 = data, compressed }() 504 505 // keyed by docNum 506 docStoredOffsets := make([]uint64, len(s.results)) 507 508 // keyed by fieldID, for the current doc in the loop 509 docStoredFields := map[uint16]interimStoredField{} 510 511 for docNum, result := range s.results { 512 for fieldID := range docStoredFields { // reset for next doc 513 delete(docStoredFields, fieldID) 514 } 515 516 for _, field := range result.Document.Fields { 517 fieldID := uint16(s.getOrDefineField(field.Name())) 518 519 opts := field.Options() 520 521 if opts.IsStored() { 522 isf := docStoredFields[fieldID] 523 isf.vals = append(isf.vals, field.Value()) 524 isf.typs = append(isf.typs, encodeFieldType(field)) 525 isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) 526 docStoredFields[fieldID] = isf 527 } 528 529 if opts.IncludeDocValues() { 530 s.IncludeDocValues[fieldID] = true 531 } 532 533 err := ValidateDocFields(field) 534 if err != nil { 535 return 0, err 536 } 537 } 538 539 var curr int 540 541 s.metaBuf.Reset() 542 data = data[:0] 543 544 // _id field special case optimizes ExternalID() lookups 545 idFieldVal := docStoredFields[uint16(0)].vals[0] 546 _, err = metaEncode(uint64(len(idFieldVal))) 547 if err != nil { 548 return 0, err 549 } 550 551 // handle non-"_id" fields 552 for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { 553 isf, exists := docStoredFields[uint16(fieldID)] 554 if exists { 555 curr, data, err = persistStoredFieldValues( 556 fieldID, isf.vals, isf.typs, isf.arrayposs, 557 curr, metaEncode, data) 558 if err != nil { 559 return 0, err 560 } 561 } 562 } 563 564 metaBytes := s.metaBuf.Bytes() 565 566 compressed = snappy.Encode(compressed[:cap(compressed)], data) 567 568 docStoredOffsets[docNum] = uint64(s.w.Count()) 569 570 _, err := writeUvarints(s.w, 571 uint64(len(metaBytes)), 572 uint64(len(idFieldVal)+len(compressed))) 573 if err != nil { 574 return 0, err 575 } 576 577 _, err = s.w.Write(metaBytes) 578 if err != nil { 579 return 0, err 580 } 581 582 _, err = s.w.Write(idFieldVal) 583 if err != nil { 584 return 0, err 585 } 586 587 _, err = s.w.Write(compressed) 588 if err != nil { 589 return 0, err 590 } 591 } 592 593 storedIndexOffset = uint64(s.w.Count()) 594 595 for _, docStoredOffset := range docStoredOffsets { 596 err = binary.Write(s.w, binary.BigEndian, docStoredOffset) 597 if err != nil { 598 return 0, err 599 } 600 } 601 602 return storedIndexOffset, nil 603} 604 605func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { 606 dictOffsets = make([]uint64, len(s.FieldsInv)) 607 608 fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) 609 fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) 610 611 buf := s.grabBuf(binary.MaxVarintLen64) 612 613 tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 614 locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) 615 fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false) 616 617 var docTermMap [][]byte 618 619 if s.builder == nil { 620 s.builder, err = vellum.New(&s.builderBuf, nil) 621 if err != nil { 622 return 0, nil, err 623 } 624 } 625 626 for fieldID, terms := range s.DictKeys { 627 if cap(docTermMap) < len(s.results) { 628 docTermMap = make([][]byte, len(s.results)) 629 } else { 630 docTermMap = docTermMap[0:len(s.results)] 631 for docNum := range docTermMap { // reset the docTermMap 632 docTermMap[docNum] = docTermMap[docNum][:0] 633 } 634 } 635 636 dict := s.Dicts[fieldID] 637 638 for _, term := range terms { // terms are already sorted 639 pid := dict[term] - 1 640 641 postingsBS := s.Postings[pid] 642 643 freqNorms := s.FreqNorms[pid] 644 freqNormOffset := 0 645 646 locs := s.Locs[pid] 647 locOffset := 0 648 649 postingsItr := postingsBS.Iterator() 650 for postingsItr.HasNext() { 651 docNum := uint64(postingsItr.Next()) 652 653 freqNorm := freqNorms[freqNormOffset] 654 655 err = tfEncoder.Add(docNum, 656 encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), 657 uint64(math.Float32bits(freqNorm.norm))) 658 if err != nil { 659 return 0, nil, err 660 } 661 662 if freqNorm.numLocs > 0 { 663 numBytesLocs := 0 664 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 665 numBytesLocs += totalUvarintBytes( 666 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 667 uint64(len(loc.arrayposs)), loc.arrayposs) 668 } 669 670 err = locEncoder.Add(docNum, uint64(numBytesLocs)) 671 if err != nil { 672 return 0, nil, err 673 } 674 675 for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { 676 err = locEncoder.Add(docNum, 677 uint64(loc.fieldID), loc.pos, loc.start, loc.end, 678 uint64(len(loc.arrayposs))) 679 if err != nil { 680 return 0, nil, err 681 } 682 683 err = locEncoder.Add(docNum, loc.arrayposs...) 684 if err != nil { 685 return 0, nil, err 686 } 687 } 688 689 locOffset += freqNorm.numLocs 690 } 691 692 freqNormOffset++ 693 694 docTermMap[docNum] = append( 695 append(docTermMap[docNum], term...), 696 termSeparator) 697 } 698 699 tfEncoder.Close() 700 locEncoder.Close() 701 702 postingsOffset, err := 703 writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) 704 if err != nil { 705 return 0, nil, err 706 } 707 708 if postingsOffset > uint64(0) { 709 err = s.builder.Insert([]byte(term), postingsOffset) 710 if err != nil { 711 return 0, nil, err 712 } 713 } 714 715 tfEncoder.Reset() 716 locEncoder.Reset() 717 } 718 719 err = s.builder.Close() 720 if err != nil { 721 return 0, nil, err 722 } 723 724 // record where this dictionary starts 725 dictOffsets[fieldID] = uint64(s.w.Count()) 726 727 vellumData := s.builderBuf.Bytes() 728 729 // write out the length of the vellum data 730 n := binary.PutUvarint(buf, uint64(len(vellumData))) 731 _, err = s.w.Write(buf[:n]) 732 if err != nil { 733 return 0, nil, err 734 } 735 736 // write this vellum to disk 737 _, err = s.w.Write(vellumData) 738 if err != nil { 739 return 0, nil, err 740 } 741 742 // reset vellum for reuse 743 s.builderBuf.Reset() 744 745 err = s.builder.Reset(&s.builderBuf) 746 if err != nil { 747 return 0, nil, err 748 } 749 750 // write the field doc values 751 if s.IncludeDocValues[fieldID] { 752 for docNum, docTerms := range docTermMap { 753 if len(docTerms) > 0 { 754 err = fdvEncoder.Add(uint64(docNum), docTerms) 755 if err != nil { 756 return 0, nil, err 757 } 758 } 759 } 760 err = fdvEncoder.Close() 761 if err != nil { 762 return 0, nil, err 763 } 764 765 fdvOffsetsStart[fieldID] = uint64(s.w.Count()) 766 767 _, err = fdvEncoder.Write() 768 if err != nil { 769 return 0, nil, err 770 } 771 772 fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) 773 774 fdvEncoder.Reset() 775 } else { 776 fdvOffsetsStart[fieldID] = fieldNotUninverted 777 fdvOffsetsEnd[fieldID] = fieldNotUninverted 778 } 779 } 780 781 fdvIndexOffset = uint64(s.w.Count()) 782 783 for i := 0; i < len(fdvOffsetsStart); i++ { 784 n := binary.PutUvarint(buf, fdvOffsetsStart[i]) 785 _, err := s.w.Write(buf[:n]) 786 if err != nil { 787 return 0, nil, err 788 } 789 n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) 790 _, err = s.w.Write(buf[:n]) 791 if err != nil { 792 return 0, nil, err 793 } 794 } 795 796 return fdvIndexOffset, dictOffsets, nil 797} 798 799func encodeFieldType(f document.Field) byte { 800 fieldType := byte('x') 801 switch f.(type) { 802 case *document.TextField: 803 fieldType = 't' 804 case *document.NumericField: 805 fieldType = 'n' 806 case *document.DateTimeField: 807 fieldType = 'd' 808 case *document.BooleanField: 809 fieldType = 'b' 810 case *document.GeoPointField: 811 fieldType = 'g' 812 case *document.CompositeField: 813 fieldType = 'c' 814 } 815 return fieldType 816} 817 818// returns the total # of bytes needed to encode the given uint64's 819// into binary.PutUVarint() encoding 820func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { 821 n = numUvarintBytes(a) 822 n += numUvarintBytes(b) 823 n += numUvarintBytes(c) 824 n += numUvarintBytes(d) 825 n += numUvarintBytes(e) 826 for _, v := range more { 827 n += numUvarintBytes(v) 828 } 829 return n 830} 831 832// returns # of bytes needed to encode x in binary.PutUvarint() encoding 833func numUvarintBytes(x uint64) (n int) { 834 for x >= 0x80 { 835 x >>= 7 836 n++ 837 } 838 return n + 1 839} 840