1// Copyright (c) The Thanos Authors. 2// Licensed under the Apache License 2.0. 3 4package indexheader 5 6import ( 7 "bufio" 8 "context" 9 "encoding/binary" 10 "hash" 11 "hash/crc32" 12 "io" 13 "io/ioutil" 14 "math" 15 "os" 16 "path/filepath" 17 "sort" 18 "time" 19 "unsafe" 20 21 "github.com/go-kit/kit/log" 22 "github.com/go-kit/kit/log/level" 23 "github.com/oklog/ulid" 24 "github.com/pkg/errors" 25 "github.com/prometheus/prometheus/tsdb/encoding" 26 "github.com/prometheus/prometheus/tsdb/fileutil" 27 "github.com/prometheus/prometheus/tsdb/index" 28 "github.com/thanos-io/thanos/pkg/block" 29 "github.com/thanos-io/thanos/pkg/objstore" 30 "github.com/thanos-io/thanos/pkg/runutil" 31) 32 33const ( 34 // BinaryFormatV1 represents first version of index-header file. 35 BinaryFormatV1 = 1 36 37 indexTOCLen = 6*8 + crc32.Size 38 binaryTOCLen = 2*8 + crc32.Size 39 // headerLen represents number of bytes reserved of index header for header. 40 headerLen = 4 + 1 + 1 + 8 41 42 // MagicIndex are 4 bytes at the head of an index-header file. 43 MagicIndex = 0xBAAAD792 44 45 symbolFactor = 32 46 47 postingLengthFieldSize = 4 48) 49 50// The table gets initialized with sync.Once but may still cause a race 51// with any other use of the crc32 package anywhere. Thus we initialize it 52// before. 53var castagnoliTable *crc32.Table 54 55func init() { 56 castagnoliTable = crc32.MakeTable(crc32.Castagnoli) 57} 58 59// newCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the 60// polynomial may be easily changed in one location at a later time, if necessary. 61func newCRC32() hash.Hash32 { 62 return crc32.New(castagnoliTable) 63} 64 65// BinaryTOC is a table of content for index-header file. 66type BinaryTOC struct { 67 // Symbols holds start to the same symbols section as index related to this index header. 68 Symbols uint64 69 // PostingsOffsetTable holds start to the the same Postings Offset Table section as index related to this index header. 70 PostingsOffsetTable uint64 71} 72 73// WriteBinary build index-header file from the pieces of index in object storage. 74func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, fn string) (err error) { 75 ir, indexVersion, err := newChunkedIndexReader(ctx, bkt, id) 76 if err != nil { 77 return errors.Wrap(err, "new index reader") 78 } 79 80 // Buffer for copying and encbuffers. 81 // This also will control the size of file writer buffer. 82 buf := make([]byte, 32*1024) 83 bw, err := newBinaryWriter(fn, buf) 84 if err != nil { 85 return errors.Wrap(err, "new binary index header writer") 86 } 87 defer runutil.CloseWithErrCapture(&err, bw, "close binary writer for %s", fn) 88 89 if err := bw.AddIndexMeta(indexVersion, ir.toc.PostingsTable); err != nil { 90 return errors.Wrap(err, "add index meta") 91 } 92 93 if err := ir.CopySymbols(bw.SymbolsWriter(), buf); err != nil { 94 return err 95 } 96 97 if err := bw.f.Flush(); err != nil { 98 return errors.Wrap(err, "flush") 99 } 100 101 if err := ir.CopyPostingsOffsets(bw.PostingOffsetsWriter(), buf); err != nil { 102 return err 103 } 104 105 if err := bw.f.Flush(); err != nil { 106 return errors.Wrap(err, "flush") 107 } 108 109 if err := bw.WriteTOC(); err != nil { 110 return errors.Wrap(err, "write index header TOC") 111 } 112 return nil 113} 114 115type chunkedIndexReader struct { 116 ctx context.Context 117 path string 118 size uint64 119 bkt objstore.BucketReader 120 toc *index.TOC 121} 122 123func newChunkedIndexReader(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID) (*chunkedIndexReader, int, error) { 124 indexFilepath := filepath.Join(id.String(), block.IndexFilename) 125 size, err := bkt.ObjectSize(ctx, indexFilepath) 126 if err != nil { 127 return nil, 0, errors.Wrapf(err, "get object size of %s", indexFilepath) 128 } 129 130 rc, err := bkt.GetRange(ctx, indexFilepath, 0, index.HeaderLen) 131 if err != nil { 132 return nil, 0, errors.Wrapf(err, "get TOC from object storage of %s", indexFilepath) 133 } 134 135 b, err := ioutil.ReadAll(rc) 136 if err != nil { 137 runutil.CloseWithErrCapture(&err, rc, "close reader") 138 return nil, 0, errors.Wrapf(err, "get header from object storage of %s", indexFilepath) 139 } 140 141 if err := rc.Close(); err != nil { 142 return nil, 0, errors.Wrap(err, "close reader") 143 } 144 145 if m := binary.BigEndian.Uint32(b[0:4]); m != index.MagicIndex { 146 return nil, 0, errors.Errorf("invalid magic number %x for %s", m, indexFilepath) 147 } 148 149 version := int(b[4:5][0]) 150 151 if version != index.FormatV1 && version != index.FormatV2 { 152 return nil, 0, errors.Errorf("not supported index file version %d of %s", version, indexFilepath) 153 } 154 155 ir := &chunkedIndexReader{ 156 ctx: ctx, 157 path: indexFilepath, 158 size: size, 159 bkt: bkt, 160 } 161 162 toc, err := ir.readTOC() 163 if err != nil { 164 return nil, 0, err 165 } 166 ir.toc = toc 167 168 return ir, version, nil 169} 170 171func (r *chunkedIndexReader) readTOC() (*index.TOC, error) { 172 rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.size-indexTOCLen-crc32.Size), indexTOCLen+crc32.Size) 173 if err != nil { 174 return nil, errors.Wrapf(err, "get TOC from object storage of %s", r.path) 175 } 176 177 tocBytes, err := ioutil.ReadAll(rc) 178 if err != nil { 179 runutil.CloseWithErrCapture(&err, rc, "close toc reader") 180 return nil, errors.Wrapf(err, "get TOC from object storage of %s", r.path) 181 } 182 183 if err := rc.Close(); err != nil { 184 return nil, errors.Wrap(err, "close toc reader") 185 } 186 187 toc, err := index.NewTOCFromByteSlice(realByteSlice(tocBytes)) 188 if err != nil { 189 return nil, errors.Wrap(err, "new TOC") 190 } 191 return toc, nil 192} 193 194func (r *chunkedIndexReader) CopySymbols(w io.Writer, buf []byte) (err error) { 195 rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.toc.Symbols), int64(r.toc.Series-r.toc.Symbols)) 196 if err != nil { 197 return errors.Wrapf(err, "get symbols from object storage of %s", r.path) 198 } 199 defer runutil.CloseWithErrCapture(&err, rc, "close symbol reader") 200 201 if _, err := io.CopyBuffer(w, rc, buf); err != nil { 202 return errors.Wrap(err, "copy symbols") 203 } 204 205 return nil 206} 207 208func (r *chunkedIndexReader) CopyPostingsOffsets(w io.Writer, buf []byte) (err error) { 209 rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.toc.PostingsTable), int64(r.size-r.toc.PostingsTable)) 210 if err != nil { 211 return errors.Wrapf(err, "get posting offset table from object storage of %s", r.path) 212 } 213 defer runutil.CloseWithErrCapture(&err, rc, "close posting offsets reader") 214 215 if _, err := io.CopyBuffer(w, rc, buf); err != nil { 216 return errors.Wrap(err, "copy posting offsets") 217 } 218 219 return nil 220} 221 222// TODO(bwplotka): Add padding for efficient read. 223type binaryWriter struct { 224 f *FileWriter 225 226 toc BinaryTOC 227 228 // Reusable memory. 229 buf encoding.Encbuf 230 231 crc32 hash.Hash 232} 233 234func newBinaryWriter(fn string, buf []byte) (w *binaryWriter, err error) { 235 dir := filepath.Dir(fn) 236 237 df, err := fileutil.OpenDir(dir) 238 if os.IsNotExist(err) { 239 if err := os.MkdirAll(dir, os.ModePerm); err != nil { 240 return nil, err 241 } 242 df, err = fileutil.OpenDir(dir) 243 } 244 if err != nil { 245 246 return nil, err 247 } 248 249 defer runutil.CloseWithErrCapture(&err, df, "dir close") 250 251 if err := os.RemoveAll(fn); err != nil { 252 return nil, errors.Wrap(err, "remove any existing index at path") 253 } 254 255 // We use file writer for buffers not larger than reused one. 256 f, err := NewFileWriter(fn, len(buf)) 257 if err != nil { 258 return nil, err 259 } 260 if err := df.Sync(); err != nil { 261 return nil, errors.Wrap(err, "sync dir") 262 } 263 264 w = &binaryWriter{ 265 f: f, 266 267 // Reusable memory. 268 buf: encoding.Encbuf{B: buf}, 269 crc32: newCRC32(), 270 } 271 272 w.buf.Reset() 273 w.buf.PutBE32(MagicIndex) 274 w.buf.PutByte(BinaryFormatV1) 275 276 return w, w.f.Write(w.buf.Get()) 277} 278 279type FileWriter struct { 280 f *os.File 281 fbuf *bufio.Writer 282 pos uint64 283 name string 284} 285 286// TODO(bwplotka): Added size to method, upstream this. 287func NewFileWriter(name string, size int) (*FileWriter, error) { 288 f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0666) 289 if err != nil { 290 return nil, err 291 } 292 return &FileWriter{ 293 f: f, 294 fbuf: bufio.NewWriterSize(f, size), 295 pos: 0, 296 name: name, 297 }, nil 298} 299 300func (fw *FileWriter) Pos() uint64 { 301 return fw.pos 302} 303 304func (fw *FileWriter) Write(bufs ...[]byte) error { 305 for _, b := range bufs { 306 n, err := fw.fbuf.Write(b) 307 fw.pos += uint64(n) 308 if err != nil { 309 return err 310 } 311 // For now the index file must not grow beyond 64GiB. Some of the fixed-sized 312 // offset references in v1 are only 4 bytes large. 313 // Once we move to compressed/varint representations in those areas, this limitation 314 // can be lifted. 315 if fw.pos > 16*math.MaxUint32 { 316 return errors.Errorf("%q exceeding max size of 64GiB", fw.name) 317 } 318 } 319 return nil 320} 321 322func (fw *FileWriter) Flush() error { 323 return fw.fbuf.Flush() 324} 325 326func (fw *FileWriter) WriteAt(buf []byte, pos uint64) error { 327 if err := fw.Flush(); err != nil { 328 return err 329 } 330 _, err := fw.f.WriteAt(buf, int64(pos)) 331 return err 332} 333 334// AddPadding adds zero byte padding until the file size is a multiple size. 335func (fw *FileWriter) AddPadding(size int) error { 336 p := fw.pos % uint64(size) 337 if p == 0 { 338 return nil 339 } 340 p = uint64(size) - p 341 342 if err := fw.Write(make([]byte, p)); err != nil { 343 return errors.Wrap(err, "add padding") 344 } 345 return nil 346} 347 348func (fw *FileWriter) Close() error { 349 if err := fw.Flush(); err != nil { 350 return err 351 } 352 if err := fw.f.Sync(); err != nil { 353 return err 354 } 355 return fw.f.Close() 356} 357 358func (fw *FileWriter) Remove() error { 359 return os.Remove(fw.name) 360} 361 362func (w *binaryWriter) AddIndexMeta(indexVersion int, indexPostingOffsetTable uint64) error { 363 w.buf.Reset() 364 w.buf.PutByte(byte(indexVersion)) 365 w.buf.PutBE64(indexPostingOffsetTable) 366 return w.f.Write(w.buf.Get()) 367} 368 369func (w *binaryWriter) SymbolsWriter() io.Writer { 370 w.toc.Symbols = w.f.Pos() 371 return w 372} 373 374func (w *binaryWriter) PostingOffsetsWriter() io.Writer { 375 w.toc.PostingsOffsetTable = w.f.Pos() 376 return w 377} 378 379func (w *binaryWriter) WriteTOC() error { 380 w.buf.Reset() 381 382 w.buf.PutBE64(w.toc.Symbols) 383 w.buf.PutBE64(w.toc.PostingsOffsetTable) 384 385 w.buf.PutHash(w.crc32) 386 387 return w.f.Write(w.buf.Get()) 388} 389 390func (w *binaryWriter) Write(p []byte) (int, error) { 391 n := w.f.Pos() 392 err := w.f.Write(p) 393 return int(w.f.Pos() - n), err 394} 395 396func (w *binaryWriter) Close() error { 397 return w.f.Close() 398} 399 400type postingValueOffsets struct { 401 offsets []postingOffset 402 lastValOffset int64 403} 404 405type postingOffset struct { 406 // label value. 407 value string 408 // offset of this entry in posting offset table in index-header file. 409 tableOff int 410} 411 412type BinaryReader struct { 413 b index.ByteSlice 414 toc *BinaryTOC 415 416 // Close that releases the underlying resources of the byte slice. 417 c io.Closer 418 419 // Map of LabelName to a list of some LabelValues's position in the offset table. 420 // The first and last values for each name are always present. 421 postings map[string]*postingValueOffsets 422 // For the v1 format, labelname -> labelvalue -> offset. 423 postingsV1 map[string]map[string]index.Range 424 425 symbols *index.Symbols 426 nameSymbols map[uint32]string // Cache of the label name symbol lookups, 427 // as there are not many and they are half of all lookups. 428 429 dec *index.Decoder 430 431 version int 432 indexVersion int 433 indexLastPostingEnd int64 434} 435 436// NewBinaryReader loads or builds new index-header if not present on disk. 437func NewBinaryReader(ctx context.Context, logger log.Logger, bkt objstore.BucketReader, dir string, id ulid.ULID) (*BinaryReader, error) { 438 binfn := filepath.Join(dir, id.String(), block.IndexHeaderFilename) 439 br, err := newFileBinaryReader(binfn) 440 if err == nil { 441 return br, nil 442 } 443 444 level.Debug(logger).Log("msg", "failed to read index-header from disk; recreating", "path", binfn, "err", err) 445 446 start := time.Now() 447 if err := WriteBinary(ctx, bkt, id, binfn); err != nil { 448 return nil, errors.Wrap(err, "write index header") 449 } 450 451 level.Debug(logger).Log("msg", "built index-header file", "path", binfn, "elapsed", time.Since(start)) 452 453 return newFileBinaryReader(binfn) 454} 455 456func newFileBinaryReader(path string) (bw *BinaryReader, err error) { 457 f, err := fileutil.OpenMmapFile(path) 458 if err != nil { 459 return nil, err 460 } 461 defer func() { 462 if err != nil { 463 runutil.CloseWithErrCapture(&err, f, "index header close") 464 } 465 }() 466 467 r := &BinaryReader{ 468 b: realByteSlice(f.Bytes()), 469 c: f, 470 postings: map[string]*postingValueOffsets{}, 471 } 472 473 // Verify header. 474 if r.b.Len() < headerLen { 475 return nil, errors.Wrap(encoding.ErrInvalidSize, "index header's header") 476 } 477 if m := binary.BigEndian.Uint32(r.b.Range(0, 4)); m != MagicIndex { 478 return nil, errors.Errorf("invalid magic number %x", m) 479 } 480 r.version = int(r.b.Range(4, 5)[0]) 481 r.indexVersion = int(r.b.Range(5, 6)[0]) 482 483 r.indexLastPostingEnd = int64(binary.BigEndian.Uint64(r.b.Range(6, headerLen))) 484 485 if r.version != BinaryFormatV1 { 486 return nil, errors.Errorf("unknown index header file version %d", r.version) 487 } 488 489 r.toc, err = newBinaryTOCFromByteSlice(r.b) 490 if err != nil { 491 return nil, errors.Wrap(err, "read index header TOC") 492 } 493 494 r.symbols, err = index.NewSymbols(r.b, r.indexVersion, int(r.toc.Symbols)) 495 if err != nil { 496 return nil, errors.Wrap(err, "read symbols") 497 } 498 499 var lastKey []string 500 if r.indexVersion == index.FormatV1 { 501 // Earlier V1 formats don't have a sorted postings offset table, so 502 // load the whole offset table into memory. 503 r.postingsV1 = map[string]map[string]index.Range{} 504 505 var prevRng index.Range 506 if err := index.ReadOffsetTable(r.b, r.toc.PostingsOffsetTable, func(key []string, off uint64, _ int) error { 507 if len(key) != 2 { 508 return errors.Errorf("unexpected key length for posting table %d", len(key)) 509 } 510 511 if lastKey != nil { 512 prevRng.End = int64(off - crc32.Size) 513 r.postingsV1[lastKey[0]][lastKey[1]] = prevRng 514 } 515 516 if _, ok := r.postingsV1[key[0]]; !ok { 517 r.postingsV1[key[0]] = map[string]index.Range{} 518 r.postings[key[0]] = nil // Used to get a list of labelnames in places. 519 } 520 521 lastKey = key 522 prevRng = index.Range{Start: int64(off + postingLengthFieldSize)} 523 return nil 524 }); err != nil { 525 return nil, errors.Wrap(err, "read postings table") 526 } 527 if lastKey != nil { 528 prevRng.End = r.indexLastPostingEnd - crc32.Size 529 r.postingsV1[lastKey[0]][lastKey[1]] = prevRng 530 } 531 } else { 532 lastTableOff := 0 533 valueCount := 0 534 535 // For the postings offset table we keep every label name but only every nth 536 // label value (plus the first and last one), to save memory. 537 if err := index.ReadOffsetTable(r.b, r.toc.PostingsOffsetTable, func(key []string, off uint64, tableOff int) error { 538 if len(key) != 2 { 539 return errors.Errorf("unexpected key length for posting table %d", len(key)) 540 } 541 542 if _, ok := r.postings[key[0]]; !ok { 543 // Next label name. 544 r.postings[key[0]] = &postingValueOffsets{} 545 if lastKey != nil { 546 if valueCount%symbolFactor != 0 { 547 // Always include last value for each label name. 548 r.postings[lastKey[0]].offsets = append(r.postings[lastKey[0]].offsets, postingOffset{value: lastKey[1], tableOff: lastTableOff}) 549 } 550 r.postings[lastKey[0]].lastValOffset = int64(off - crc32.Size) 551 lastKey = nil 552 } 553 valueCount = 0 554 } 555 556 lastKey = key 557 if valueCount%symbolFactor == 0 { 558 r.postings[key[0]].offsets = append(r.postings[key[0]].offsets, postingOffset{value: key[1], tableOff: tableOff}) 559 return nil 560 } 561 562 lastTableOff = tableOff 563 valueCount++ 564 return nil 565 }); err != nil { 566 return nil, errors.Wrap(err, "read postings table") 567 } 568 if lastKey != nil { 569 if valueCount%symbolFactor != 0 { 570 r.postings[lastKey[0]].offsets = append(r.postings[lastKey[0]].offsets, postingOffset{value: lastKey[1], tableOff: lastTableOff}) 571 } 572 r.postings[lastKey[0]].lastValOffset = r.indexLastPostingEnd - crc32.Size 573 } 574 // Trim any extra space in the slices. 575 for k, v := range r.postings { 576 l := make([]postingOffset, len(v.offsets)) 577 copy(l, v.offsets) 578 r.postings[k].offsets = l 579 } 580 } 581 582 r.nameSymbols = make(map[uint32]string, len(r.postings)) 583 for k := range r.postings { 584 if k == "" { 585 continue 586 } 587 off, err := r.symbols.ReverseLookup(k) 588 if err != nil { 589 return nil, errors.Wrap(err, "reverse symbol lookup") 590 } 591 r.nameSymbols[off] = k 592 } 593 594 r.dec = &index.Decoder{LookupSymbol: r.LookupSymbol} 595 596 return r, nil 597} 598 599// newBinaryTOCFromByteSlice return parsed TOC from given index header byte slice. 600func newBinaryTOCFromByteSlice(bs index.ByteSlice) (*BinaryTOC, error) { 601 if bs.Len() < binaryTOCLen { 602 return nil, encoding.ErrInvalidSize 603 } 604 b := bs.Range(bs.Len()-binaryTOCLen, bs.Len()) 605 606 expCRC := binary.BigEndian.Uint32(b[len(b)-4:]) 607 d := encoding.Decbuf{B: b[:len(b)-4]} 608 609 if d.Crc32(castagnoliTable) != expCRC { 610 return nil, errors.Wrap(encoding.ErrInvalidChecksum, "read index header TOC") 611 } 612 613 if err := d.Err(); err != nil { 614 return nil, err 615 } 616 617 return &BinaryTOC{ 618 Symbols: d.Be64(), 619 PostingsOffsetTable: d.Be64(), 620 }, nil 621} 622 623func (r BinaryReader) IndexVersion() int { 624 return r.indexVersion 625} 626 627// TODO(bwplotka): Get advantage of multi value offset fetch. 628func (r BinaryReader) PostingsOffset(name string, value string) (index.Range, error) { 629 rngs, err := r.postingsOffset(name, value) 630 if err != nil { 631 return index.Range{}, err 632 } 633 if len(rngs) != 1 { 634 return index.Range{}, NotFoundRangeErr 635 } 636 return rngs[0], nil 637} 638 639func (r BinaryReader) postingsOffset(name string, values ...string) ([]index.Range, error) { 640 rngs := make([]index.Range, 0, len(values)) 641 if r.indexVersion == index.FormatV1 { 642 e, ok := r.postingsV1[name] 643 if !ok { 644 return nil, nil 645 } 646 for _, v := range values { 647 rng, ok := e[v] 648 if !ok { 649 continue 650 } 651 rngs = append(rngs, rng) 652 } 653 return rngs, nil 654 } 655 656 e, ok := r.postings[name] 657 if !ok { 658 return nil, nil 659 } 660 661 if len(values) == 0 { 662 return nil, nil 663 } 664 665 skip := 0 666 valueIndex := 0 667 for valueIndex < len(values) && values[valueIndex] < e.offsets[0].value { 668 // Discard values before the start. 669 valueIndex++ 670 } 671 672 var tmpRngs []index.Range // The start, end offsets in the postings table in the original index file. 673 for valueIndex < len(values) { 674 value := values[valueIndex] 675 676 i := sort.Search(len(e.offsets), func(i int) bool { return e.offsets[i].value >= value }) 677 if i == len(e.offsets) { 678 // We're past the end. 679 break 680 } 681 if i > 0 && e.offsets[i].value != value { 682 // Need to look from previous entry. 683 i-- 684 } 685 // Don't Crc32 the entire postings offset table, this is very slow 686 // so hope any issues were caught at startup. 687 d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsOffsetTable), nil) 688 d.Skip(e.offsets[i].tableOff) 689 690 tmpRngs = tmpRngs[:0] 691 // Iterate on the offset table. 692 for d.Err() == nil { 693 if skip == 0 { 694 // These are always the same number of bytes, 695 // and it's faster to skip than parse. 696 skip = d.Len() 697 d.Uvarint() // Keycount. 698 d.UvarintBytes() // Label name. 699 skip -= d.Len() 700 } else { 701 d.Skip(skip) 702 } 703 v := d.UvarintBytes() // Label value. 704 postingOffset := int64(d.Uvarint64()) // Offset. 705 for string(v) >= value { 706 if string(v) == value { 707 tmpRngs = append(tmpRngs, index.Range{Start: postingOffset + postingLengthFieldSize}) 708 } 709 valueIndex++ 710 if valueIndex == len(values) { 711 break 712 } 713 value = values[valueIndex] 714 } 715 if i+1 == len(e.offsets) { 716 for i := range tmpRngs { 717 tmpRngs[i].End = e.lastValOffset 718 } 719 rngs = append(rngs, tmpRngs...) 720 // Need to go to a later postings offset entry, if there is one. 721 break 722 } 723 724 if value >= e.offsets[i+1].value || valueIndex == len(values) { 725 d.Skip(skip) 726 d.UvarintBytes() // Label value. 727 postingOffset := int64(d.Uvarint64()) // Offset. 728 for j := range tmpRngs { 729 tmpRngs[j].End = postingOffset - crc32.Size 730 } 731 rngs = append(rngs, tmpRngs...) 732 // Need to go to a later postings offset entry, if there is one. 733 break 734 } 735 } 736 if d.Err() != nil { 737 return nil, errors.Wrap(d.Err(), "get postings offset entry") 738 } 739 } 740 741 return rngs, nil 742} 743 744func (r BinaryReader) LookupSymbol(o uint32) (string, error) { 745 if s, ok := r.nameSymbols[o]; ok { 746 return s, nil 747 } 748 749 if r.indexVersion == index.FormatV1 { 750 // For v1 little trick is needed. Refs are actual offset inside index, not index-header. This is different 751 // of the header length difference between two files. 752 o += headerLen - index.HeaderLen 753 } 754 755 return r.symbols.Lookup(o) 756} 757 758func (r BinaryReader) LabelValues(name string) ([]string, error) { 759 if r.indexVersion == index.FormatV1 { 760 e, ok := r.postingsV1[name] 761 if !ok { 762 return nil, nil 763 } 764 values := make([]string, 0, len(e)) 765 for k := range e { 766 values = append(values, k) 767 } 768 sort.Strings(values) 769 return values, nil 770 771 } 772 e, ok := r.postings[name] 773 if !ok { 774 return nil, nil 775 } 776 if len(e.offsets) == 0 { 777 return nil, nil 778 } 779 values := make([]string, 0, len(e.offsets)*symbolFactor) 780 781 d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsOffsetTable), nil) 782 d.Skip(e.offsets[0].tableOff) 783 lastVal := e.offsets[len(e.offsets)-1].value 784 785 skip := 0 786 for d.Err() == nil { 787 if skip == 0 { 788 // These are always the same number of bytes, 789 // and it's faster to skip than parse. 790 skip = d.Len() 791 d.Uvarint() // Keycount. 792 d.UvarintBytes() // Label name. 793 skip -= d.Len() 794 } else { 795 d.Skip(skip) 796 } 797 s := yoloString(d.UvarintBytes()) // Label value. 798 values = append(values, s) 799 if s == lastVal { 800 break 801 } 802 d.Uvarint64() // Offset. 803 } 804 if d.Err() != nil { 805 return nil, errors.Wrap(d.Err(), "get postings offset entry") 806 } 807 return values, nil 808} 809 810func yoloString(b []byte) string { 811 return *((*string)(unsafe.Pointer(&b))) 812} 813 814func (r BinaryReader) LabelNames() []string { 815 allPostingsKeyName, _ := index.AllPostingsKey() 816 labelNames := make([]string, 0, len(r.postings)) 817 for name := range r.postings { 818 if name == allPostingsKeyName { 819 // This is not from any metric. 820 continue 821 } 822 labelNames = append(labelNames, name) 823 } 824 sort.Strings(labelNames) 825 return labelNames 826} 827 828func (r *BinaryReader) Close() error { return r.c.Close() } 829