1// Copyright (c) The Thanos Authors.
2// Licensed under the Apache License 2.0.
3
4package indexheader
5
6import (
7	"bufio"
8	"context"
9	"encoding/binary"
10	"hash"
11	"hash/crc32"
12	"io"
13	"io/ioutil"
14	"math"
15	"os"
16	"path/filepath"
17	"sort"
18	"time"
19	"unsafe"
20
21	"github.com/go-kit/kit/log"
22	"github.com/go-kit/kit/log/level"
23	"github.com/oklog/ulid"
24	"github.com/pkg/errors"
25	"github.com/prometheus/prometheus/tsdb/encoding"
26	"github.com/prometheus/prometheus/tsdb/fileutil"
27	"github.com/prometheus/prometheus/tsdb/index"
28	"github.com/thanos-io/thanos/pkg/block"
29	"github.com/thanos-io/thanos/pkg/objstore"
30	"github.com/thanos-io/thanos/pkg/runutil"
31)
32
33const (
34	// BinaryFormatV1 represents first version of index-header file.
35	BinaryFormatV1 = 1
36
37	indexTOCLen  = 6*8 + crc32.Size
38	binaryTOCLen = 2*8 + crc32.Size
39	// headerLen represents number of bytes reserved of index header for header.
40	headerLen = 4 + 1 + 1 + 8
41
42	// MagicIndex are 4 bytes at the head of an index-header file.
43	MagicIndex = 0xBAAAD792
44
45	symbolFactor = 32
46
47	postingLengthFieldSize = 4
48)
49
50// The table gets initialized with sync.Once but may still cause a race
51// with any other use of the crc32 package anywhere. Thus we initialize it
52// before.
53var castagnoliTable *crc32.Table
54
55func init() {
56	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
57}
58
59// newCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
60// polynomial may be easily changed in one location at a later time, if necessary.
61func newCRC32() hash.Hash32 {
62	return crc32.New(castagnoliTable)
63}
64
65// BinaryTOC is a table of content for index-header file.
66type BinaryTOC struct {
67	// Symbols holds start to the same symbols section as index related to this index header.
68	Symbols uint64
69	// PostingsOffsetTable holds start to the the same Postings Offset Table section as index related to this index header.
70	PostingsOffsetTable uint64
71}
72
73// WriteBinary build index-header file from the pieces of index in object storage.
74func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, fn string) (err error) {
75	ir, indexVersion, err := newChunkedIndexReader(ctx, bkt, id)
76	if err != nil {
77		return errors.Wrap(err, "new index reader")
78	}
79
80	// Buffer for copying and encbuffers.
81	// This also will control the size of file writer buffer.
82	buf := make([]byte, 32*1024)
83	bw, err := newBinaryWriter(fn, buf)
84	if err != nil {
85		return errors.Wrap(err, "new binary index header writer")
86	}
87	defer runutil.CloseWithErrCapture(&err, bw, "close binary writer for %s", fn)
88
89	if err := bw.AddIndexMeta(indexVersion, ir.toc.PostingsTable); err != nil {
90		return errors.Wrap(err, "add index meta")
91	}
92
93	if err := ir.CopySymbols(bw.SymbolsWriter(), buf); err != nil {
94		return err
95	}
96
97	if err := bw.f.Flush(); err != nil {
98		return errors.Wrap(err, "flush")
99	}
100
101	if err := ir.CopyPostingsOffsets(bw.PostingOffsetsWriter(), buf); err != nil {
102		return err
103	}
104
105	if err := bw.f.Flush(); err != nil {
106		return errors.Wrap(err, "flush")
107	}
108
109	if err := bw.WriteTOC(); err != nil {
110		return errors.Wrap(err, "write index header TOC")
111	}
112	return nil
113}
114
115type chunkedIndexReader struct {
116	ctx  context.Context
117	path string
118	size uint64
119	bkt  objstore.BucketReader
120	toc  *index.TOC
121}
122
123func newChunkedIndexReader(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID) (*chunkedIndexReader, int, error) {
124	indexFilepath := filepath.Join(id.String(), block.IndexFilename)
125	size, err := bkt.ObjectSize(ctx, indexFilepath)
126	if err != nil {
127		return nil, 0, errors.Wrapf(err, "get object size of %s", indexFilepath)
128	}
129
130	rc, err := bkt.GetRange(ctx, indexFilepath, 0, index.HeaderLen)
131	if err != nil {
132		return nil, 0, errors.Wrapf(err, "get TOC from object storage of %s", indexFilepath)
133	}
134
135	b, err := ioutil.ReadAll(rc)
136	if err != nil {
137		runutil.CloseWithErrCapture(&err, rc, "close reader")
138		return nil, 0, errors.Wrapf(err, "get header from object storage of %s", indexFilepath)
139	}
140
141	if err := rc.Close(); err != nil {
142		return nil, 0, errors.Wrap(err, "close reader")
143	}
144
145	if m := binary.BigEndian.Uint32(b[0:4]); m != index.MagicIndex {
146		return nil, 0, errors.Errorf("invalid magic number %x for %s", m, indexFilepath)
147	}
148
149	version := int(b[4:5][0])
150
151	if version != index.FormatV1 && version != index.FormatV2 {
152		return nil, 0, errors.Errorf("not supported index file version %d of %s", version, indexFilepath)
153	}
154
155	ir := &chunkedIndexReader{
156		ctx:  ctx,
157		path: indexFilepath,
158		size: size,
159		bkt:  bkt,
160	}
161
162	toc, err := ir.readTOC()
163	if err != nil {
164		return nil, 0, err
165	}
166	ir.toc = toc
167
168	return ir, version, nil
169}
170
171func (r *chunkedIndexReader) readTOC() (*index.TOC, error) {
172	rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.size-indexTOCLen-crc32.Size), indexTOCLen+crc32.Size)
173	if err != nil {
174		return nil, errors.Wrapf(err, "get TOC from object storage of %s", r.path)
175	}
176
177	tocBytes, err := ioutil.ReadAll(rc)
178	if err != nil {
179		runutil.CloseWithErrCapture(&err, rc, "close toc reader")
180		return nil, errors.Wrapf(err, "get TOC from object storage of %s", r.path)
181	}
182
183	if err := rc.Close(); err != nil {
184		return nil, errors.Wrap(err, "close toc reader")
185	}
186
187	toc, err := index.NewTOCFromByteSlice(realByteSlice(tocBytes))
188	if err != nil {
189		return nil, errors.Wrap(err, "new TOC")
190	}
191	return toc, nil
192}
193
194func (r *chunkedIndexReader) CopySymbols(w io.Writer, buf []byte) (err error) {
195	rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.toc.Symbols), int64(r.toc.Series-r.toc.Symbols))
196	if err != nil {
197		return errors.Wrapf(err, "get symbols from object storage of %s", r.path)
198	}
199	defer runutil.CloseWithErrCapture(&err, rc, "close symbol reader")
200
201	if _, err := io.CopyBuffer(w, rc, buf); err != nil {
202		return errors.Wrap(err, "copy symbols")
203	}
204
205	return nil
206}
207
208func (r *chunkedIndexReader) CopyPostingsOffsets(w io.Writer, buf []byte) (err error) {
209	rc, err := r.bkt.GetRange(r.ctx, r.path, int64(r.toc.PostingsTable), int64(r.size-r.toc.PostingsTable))
210	if err != nil {
211		return errors.Wrapf(err, "get posting offset table from object storage of %s", r.path)
212	}
213	defer runutil.CloseWithErrCapture(&err, rc, "close posting offsets reader")
214
215	if _, err := io.CopyBuffer(w, rc, buf); err != nil {
216		return errors.Wrap(err, "copy posting offsets")
217	}
218
219	return nil
220}
221
222// TODO(bwplotka): Add padding for efficient read.
223type binaryWriter struct {
224	f *FileWriter
225
226	toc BinaryTOC
227
228	// Reusable memory.
229	buf encoding.Encbuf
230
231	crc32 hash.Hash
232}
233
234func newBinaryWriter(fn string, buf []byte) (w *binaryWriter, err error) {
235	dir := filepath.Dir(fn)
236
237	df, err := fileutil.OpenDir(dir)
238	if os.IsNotExist(err) {
239		if err := os.MkdirAll(dir, os.ModePerm); err != nil {
240			return nil, err
241		}
242		df, err = fileutil.OpenDir(dir)
243	}
244	if err != nil {
245
246		return nil, err
247	}
248
249	defer runutil.CloseWithErrCapture(&err, df, "dir close")
250
251	if err := os.RemoveAll(fn); err != nil {
252		return nil, errors.Wrap(err, "remove any existing index at path")
253	}
254
255	// We use file writer for buffers not larger than reused one.
256	f, err := NewFileWriter(fn, len(buf))
257	if err != nil {
258		return nil, err
259	}
260	if err := df.Sync(); err != nil {
261		return nil, errors.Wrap(err, "sync dir")
262	}
263
264	w = &binaryWriter{
265		f: f,
266
267		// Reusable memory.
268		buf:   encoding.Encbuf{B: buf},
269		crc32: newCRC32(),
270	}
271
272	w.buf.Reset()
273	w.buf.PutBE32(MagicIndex)
274	w.buf.PutByte(BinaryFormatV1)
275
276	return w, w.f.Write(w.buf.Get())
277}
278
279type FileWriter struct {
280	f    *os.File
281	fbuf *bufio.Writer
282	pos  uint64
283	name string
284}
285
286// TODO(bwplotka): Added size to method, upstream this.
287func NewFileWriter(name string, size int) (*FileWriter, error) {
288	f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0666)
289	if err != nil {
290		return nil, err
291	}
292	return &FileWriter{
293		f:    f,
294		fbuf: bufio.NewWriterSize(f, size),
295		pos:  0,
296		name: name,
297	}, nil
298}
299
300func (fw *FileWriter) Pos() uint64 {
301	return fw.pos
302}
303
304func (fw *FileWriter) Write(bufs ...[]byte) error {
305	for _, b := range bufs {
306		n, err := fw.fbuf.Write(b)
307		fw.pos += uint64(n)
308		if err != nil {
309			return err
310		}
311		// For now the index file must not grow beyond 64GiB. Some of the fixed-sized
312		// offset references in v1 are only 4 bytes large.
313		// Once we move to compressed/varint representations in those areas, this limitation
314		// can be lifted.
315		if fw.pos > 16*math.MaxUint32 {
316			return errors.Errorf("%q exceeding max size of 64GiB", fw.name)
317		}
318	}
319	return nil
320}
321
322func (fw *FileWriter) Flush() error {
323	return fw.fbuf.Flush()
324}
325
326func (fw *FileWriter) WriteAt(buf []byte, pos uint64) error {
327	if err := fw.Flush(); err != nil {
328		return err
329	}
330	_, err := fw.f.WriteAt(buf, int64(pos))
331	return err
332}
333
334// AddPadding adds zero byte padding until the file size is a multiple size.
335func (fw *FileWriter) AddPadding(size int) error {
336	p := fw.pos % uint64(size)
337	if p == 0 {
338		return nil
339	}
340	p = uint64(size) - p
341
342	if err := fw.Write(make([]byte, p)); err != nil {
343		return errors.Wrap(err, "add padding")
344	}
345	return nil
346}
347
348func (fw *FileWriter) Close() error {
349	if err := fw.Flush(); err != nil {
350		return err
351	}
352	if err := fw.f.Sync(); err != nil {
353		return err
354	}
355	return fw.f.Close()
356}
357
358func (fw *FileWriter) Remove() error {
359	return os.Remove(fw.name)
360}
361
362func (w *binaryWriter) AddIndexMeta(indexVersion int, indexPostingOffsetTable uint64) error {
363	w.buf.Reset()
364	w.buf.PutByte(byte(indexVersion))
365	w.buf.PutBE64(indexPostingOffsetTable)
366	return w.f.Write(w.buf.Get())
367}
368
369func (w *binaryWriter) SymbolsWriter() io.Writer {
370	w.toc.Symbols = w.f.Pos()
371	return w
372}
373
374func (w *binaryWriter) PostingOffsetsWriter() io.Writer {
375	w.toc.PostingsOffsetTable = w.f.Pos()
376	return w
377}
378
379func (w *binaryWriter) WriteTOC() error {
380	w.buf.Reset()
381
382	w.buf.PutBE64(w.toc.Symbols)
383	w.buf.PutBE64(w.toc.PostingsOffsetTable)
384
385	w.buf.PutHash(w.crc32)
386
387	return w.f.Write(w.buf.Get())
388}
389
390func (w *binaryWriter) Write(p []byte) (int, error) {
391	n := w.f.Pos()
392	err := w.f.Write(p)
393	return int(w.f.Pos() - n), err
394}
395
396func (w *binaryWriter) Close() error {
397	return w.f.Close()
398}
399
400type postingValueOffsets struct {
401	offsets       []postingOffset
402	lastValOffset int64
403}
404
405type postingOffset struct {
406	// label value.
407	value string
408	// offset of this entry in posting offset table in index-header file.
409	tableOff int
410}
411
412type BinaryReader struct {
413	b   index.ByteSlice
414	toc *BinaryTOC
415
416	// Close that releases the underlying resources of the byte slice.
417	c io.Closer
418
419	// Map of LabelName to a list of some LabelValues's position in the offset table.
420	// The first and last values for each name are always present.
421	postings map[string]*postingValueOffsets
422	// For the v1 format, labelname -> labelvalue -> offset.
423	postingsV1 map[string]map[string]index.Range
424
425	symbols     *index.Symbols
426	nameSymbols map[uint32]string // Cache of the label name symbol lookups,
427	// as there are not many and they are half of all lookups.
428
429	dec *index.Decoder
430
431	version             int
432	indexVersion        int
433	indexLastPostingEnd int64
434}
435
436// NewBinaryReader loads or builds new index-header if not present on disk.
437func NewBinaryReader(ctx context.Context, logger log.Logger, bkt objstore.BucketReader, dir string, id ulid.ULID) (*BinaryReader, error) {
438	binfn := filepath.Join(dir, id.String(), block.IndexHeaderFilename)
439	br, err := newFileBinaryReader(binfn)
440	if err == nil {
441		return br, nil
442	}
443
444	level.Debug(logger).Log("msg", "failed to read index-header from disk; recreating", "path", binfn, "err", err)
445
446	start := time.Now()
447	if err := WriteBinary(ctx, bkt, id, binfn); err != nil {
448		return nil, errors.Wrap(err, "write index header")
449	}
450
451	level.Debug(logger).Log("msg", "built index-header file", "path", binfn, "elapsed", time.Since(start))
452
453	return newFileBinaryReader(binfn)
454}
455
456func newFileBinaryReader(path string) (bw *BinaryReader, err error) {
457	f, err := fileutil.OpenMmapFile(path)
458	if err != nil {
459		return nil, err
460	}
461	defer func() {
462		if err != nil {
463			runutil.CloseWithErrCapture(&err, f, "index header close")
464		}
465	}()
466
467	r := &BinaryReader{
468		b:        realByteSlice(f.Bytes()),
469		c:        f,
470		postings: map[string]*postingValueOffsets{},
471	}
472
473	// Verify header.
474	if r.b.Len() < headerLen {
475		return nil, errors.Wrap(encoding.ErrInvalidSize, "index header's header")
476	}
477	if m := binary.BigEndian.Uint32(r.b.Range(0, 4)); m != MagicIndex {
478		return nil, errors.Errorf("invalid magic number %x", m)
479	}
480	r.version = int(r.b.Range(4, 5)[0])
481	r.indexVersion = int(r.b.Range(5, 6)[0])
482
483	r.indexLastPostingEnd = int64(binary.BigEndian.Uint64(r.b.Range(6, headerLen)))
484
485	if r.version != BinaryFormatV1 {
486		return nil, errors.Errorf("unknown index header file version %d", r.version)
487	}
488
489	r.toc, err = newBinaryTOCFromByteSlice(r.b)
490	if err != nil {
491		return nil, errors.Wrap(err, "read index header TOC")
492	}
493
494	r.symbols, err = index.NewSymbols(r.b, r.indexVersion, int(r.toc.Symbols))
495	if err != nil {
496		return nil, errors.Wrap(err, "read symbols")
497	}
498
499	var lastKey []string
500	if r.indexVersion == index.FormatV1 {
501		// Earlier V1 formats don't have a sorted postings offset table, so
502		// load the whole offset table into memory.
503		r.postingsV1 = map[string]map[string]index.Range{}
504
505		var prevRng index.Range
506		if err := index.ReadOffsetTable(r.b, r.toc.PostingsOffsetTable, func(key []string, off uint64, _ int) error {
507			if len(key) != 2 {
508				return errors.Errorf("unexpected key length for posting table %d", len(key))
509			}
510
511			if lastKey != nil {
512				prevRng.End = int64(off - crc32.Size)
513				r.postingsV1[lastKey[0]][lastKey[1]] = prevRng
514			}
515
516			if _, ok := r.postingsV1[key[0]]; !ok {
517				r.postingsV1[key[0]] = map[string]index.Range{}
518				r.postings[key[0]] = nil // Used to get a list of labelnames in places.
519			}
520
521			lastKey = key
522			prevRng = index.Range{Start: int64(off + postingLengthFieldSize)}
523			return nil
524		}); err != nil {
525			return nil, errors.Wrap(err, "read postings table")
526		}
527		if lastKey != nil {
528			prevRng.End = r.indexLastPostingEnd - crc32.Size
529			r.postingsV1[lastKey[0]][lastKey[1]] = prevRng
530		}
531	} else {
532		lastTableOff := 0
533		valueCount := 0
534
535		// For the postings offset table we keep every label name but only every nth
536		// label value (plus the first and last one), to save memory.
537		if err := index.ReadOffsetTable(r.b, r.toc.PostingsOffsetTable, func(key []string, off uint64, tableOff int) error {
538			if len(key) != 2 {
539				return errors.Errorf("unexpected key length for posting table %d", len(key))
540			}
541
542			if _, ok := r.postings[key[0]]; !ok {
543				// Next label name.
544				r.postings[key[0]] = &postingValueOffsets{}
545				if lastKey != nil {
546					if valueCount%symbolFactor != 0 {
547						// Always include last value for each label name.
548						r.postings[lastKey[0]].offsets = append(r.postings[lastKey[0]].offsets, postingOffset{value: lastKey[1], tableOff: lastTableOff})
549					}
550					r.postings[lastKey[0]].lastValOffset = int64(off - crc32.Size)
551					lastKey = nil
552				}
553				valueCount = 0
554			}
555
556			lastKey = key
557			if valueCount%symbolFactor == 0 {
558				r.postings[key[0]].offsets = append(r.postings[key[0]].offsets, postingOffset{value: key[1], tableOff: tableOff})
559				return nil
560			}
561
562			lastTableOff = tableOff
563			valueCount++
564			return nil
565		}); err != nil {
566			return nil, errors.Wrap(err, "read postings table")
567		}
568		if lastKey != nil {
569			if valueCount%symbolFactor != 0 {
570				r.postings[lastKey[0]].offsets = append(r.postings[lastKey[0]].offsets, postingOffset{value: lastKey[1], tableOff: lastTableOff})
571			}
572			r.postings[lastKey[0]].lastValOffset = r.indexLastPostingEnd - crc32.Size
573		}
574		// Trim any extra space in the slices.
575		for k, v := range r.postings {
576			l := make([]postingOffset, len(v.offsets))
577			copy(l, v.offsets)
578			r.postings[k].offsets = l
579		}
580	}
581
582	r.nameSymbols = make(map[uint32]string, len(r.postings))
583	for k := range r.postings {
584		if k == "" {
585			continue
586		}
587		off, err := r.symbols.ReverseLookup(k)
588		if err != nil {
589			return nil, errors.Wrap(err, "reverse symbol lookup")
590		}
591		r.nameSymbols[off] = k
592	}
593
594	r.dec = &index.Decoder{LookupSymbol: r.LookupSymbol}
595
596	return r, nil
597}
598
599// newBinaryTOCFromByteSlice return parsed TOC from given index header byte slice.
600func newBinaryTOCFromByteSlice(bs index.ByteSlice) (*BinaryTOC, error) {
601	if bs.Len() < binaryTOCLen {
602		return nil, encoding.ErrInvalidSize
603	}
604	b := bs.Range(bs.Len()-binaryTOCLen, bs.Len())
605
606	expCRC := binary.BigEndian.Uint32(b[len(b)-4:])
607	d := encoding.Decbuf{B: b[:len(b)-4]}
608
609	if d.Crc32(castagnoliTable) != expCRC {
610		return nil, errors.Wrap(encoding.ErrInvalidChecksum, "read index header TOC")
611	}
612
613	if err := d.Err(); err != nil {
614		return nil, err
615	}
616
617	return &BinaryTOC{
618		Symbols:             d.Be64(),
619		PostingsOffsetTable: d.Be64(),
620	}, nil
621}
622
623func (r BinaryReader) IndexVersion() int {
624	return r.indexVersion
625}
626
627// TODO(bwplotka): Get advantage of multi value offset fetch.
628func (r BinaryReader) PostingsOffset(name string, value string) (index.Range, error) {
629	rngs, err := r.postingsOffset(name, value)
630	if err != nil {
631		return index.Range{}, err
632	}
633	if len(rngs) != 1 {
634		return index.Range{}, NotFoundRangeErr
635	}
636	return rngs[0], nil
637}
638
639func (r BinaryReader) postingsOffset(name string, values ...string) ([]index.Range, error) {
640	rngs := make([]index.Range, 0, len(values))
641	if r.indexVersion == index.FormatV1 {
642		e, ok := r.postingsV1[name]
643		if !ok {
644			return nil, nil
645		}
646		for _, v := range values {
647			rng, ok := e[v]
648			if !ok {
649				continue
650			}
651			rngs = append(rngs, rng)
652		}
653		return rngs, nil
654	}
655
656	e, ok := r.postings[name]
657	if !ok {
658		return nil, nil
659	}
660
661	if len(values) == 0 {
662		return nil, nil
663	}
664
665	skip := 0
666	valueIndex := 0
667	for valueIndex < len(values) && values[valueIndex] < e.offsets[0].value {
668		// Discard values before the start.
669		valueIndex++
670	}
671
672	var tmpRngs []index.Range // The start, end offsets in the postings table in the original index file.
673	for valueIndex < len(values) {
674		value := values[valueIndex]
675
676		i := sort.Search(len(e.offsets), func(i int) bool { return e.offsets[i].value >= value })
677		if i == len(e.offsets) {
678			// We're past the end.
679			break
680		}
681		if i > 0 && e.offsets[i].value != value {
682			// Need to look from previous entry.
683			i--
684		}
685		// Don't Crc32 the entire postings offset table, this is very slow
686		// so hope any issues were caught at startup.
687		d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsOffsetTable), nil)
688		d.Skip(e.offsets[i].tableOff)
689
690		tmpRngs = tmpRngs[:0]
691		// Iterate on the offset table.
692		for d.Err() == nil {
693			if skip == 0 {
694				// These are always the same number of bytes,
695				// and it's faster to skip than parse.
696				skip = d.Len()
697				d.Uvarint()      // Keycount.
698				d.UvarintBytes() // Label name.
699				skip -= d.Len()
700			} else {
701				d.Skip(skip)
702			}
703			v := d.UvarintBytes()                 // Label value.
704			postingOffset := int64(d.Uvarint64()) // Offset.
705			for string(v) >= value {
706				if string(v) == value {
707					tmpRngs = append(tmpRngs, index.Range{Start: postingOffset + postingLengthFieldSize})
708				}
709				valueIndex++
710				if valueIndex == len(values) {
711					break
712				}
713				value = values[valueIndex]
714			}
715			if i+1 == len(e.offsets) {
716				for i := range tmpRngs {
717					tmpRngs[i].End = e.lastValOffset
718				}
719				rngs = append(rngs, tmpRngs...)
720				// Need to go to a later postings offset entry, if there is one.
721				break
722			}
723
724			if value >= e.offsets[i+1].value || valueIndex == len(values) {
725				d.Skip(skip)
726				d.UvarintBytes()                      // Label value.
727				postingOffset := int64(d.Uvarint64()) // Offset.
728				for j := range tmpRngs {
729					tmpRngs[j].End = postingOffset - crc32.Size
730				}
731				rngs = append(rngs, tmpRngs...)
732				// Need to go to a later postings offset entry, if there is one.
733				break
734			}
735		}
736		if d.Err() != nil {
737			return nil, errors.Wrap(d.Err(), "get postings offset entry")
738		}
739	}
740
741	return rngs, nil
742}
743
744func (r BinaryReader) LookupSymbol(o uint32) (string, error) {
745	if s, ok := r.nameSymbols[o]; ok {
746		return s, nil
747	}
748
749	if r.indexVersion == index.FormatV1 {
750		// For v1 little trick is needed. Refs are actual offset inside index, not index-header. This is different
751		// of the header length difference between two files.
752		o += headerLen - index.HeaderLen
753	}
754
755	return r.symbols.Lookup(o)
756}
757
758func (r BinaryReader) LabelValues(name string) ([]string, error) {
759	if r.indexVersion == index.FormatV1 {
760		e, ok := r.postingsV1[name]
761		if !ok {
762			return nil, nil
763		}
764		values := make([]string, 0, len(e))
765		for k := range e {
766			values = append(values, k)
767		}
768		sort.Strings(values)
769		return values, nil
770
771	}
772	e, ok := r.postings[name]
773	if !ok {
774		return nil, nil
775	}
776	if len(e.offsets) == 0 {
777		return nil, nil
778	}
779	values := make([]string, 0, len(e.offsets)*symbolFactor)
780
781	d := encoding.NewDecbufAt(r.b, int(r.toc.PostingsOffsetTable), nil)
782	d.Skip(e.offsets[0].tableOff)
783	lastVal := e.offsets[len(e.offsets)-1].value
784
785	skip := 0
786	for d.Err() == nil {
787		if skip == 0 {
788			// These are always the same number of bytes,
789			// and it's faster to skip than parse.
790			skip = d.Len()
791			d.Uvarint()      // Keycount.
792			d.UvarintBytes() // Label name.
793			skip -= d.Len()
794		} else {
795			d.Skip(skip)
796		}
797		s := yoloString(d.UvarintBytes()) // Label value.
798		values = append(values, s)
799		if s == lastVal {
800			break
801		}
802		d.Uvarint64() // Offset.
803	}
804	if d.Err() != nil {
805		return nil, errors.Wrap(d.Err(), "get postings offset entry")
806	}
807	return values, nil
808}
809
810func yoloString(b []byte) string {
811	return *((*string)(unsafe.Pointer(&b)))
812}
813
814func (r BinaryReader) LabelNames() []string {
815	allPostingsKeyName, _ := index.AllPostingsKey()
816	labelNames := make([]string, 0, len(r.postings))
817	for name := range r.postings {
818		if name == allPostingsKeyName {
819			// This is not from any metric.
820			continue
821		}
822		labelNames = append(labelNames, name)
823	}
824	sort.Strings(labelNames)
825	return labelNames
826}
827
828func (r *BinaryReader) Close() error { return r.c.Close() }
829