1// Copyright 2018 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//go:generate go get -u modernc.org/golex
15//go:generate golex -o=openmetricslex.l.go openmetricslex.l
16
17package textparse
18
19import (
20	"bytes"
21	"fmt"
22	"io"
23	"math"
24	"sort"
25	"strings"
26	"unicode/utf8"
27
28	"github.com/pkg/errors"
29
30	"github.com/prometheus/prometheus/pkg/exemplar"
31	"github.com/prometheus/prometheus/pkg/labels"
32	"github.com/prometheus/prometheus/pkg/value"
33)
34
35var allowedSuffixes = [][]byte{[]byte("_total"), []byte("_bucket")}
36
37type openMetricsLexer struct {
38	b     []byte
39	i     int
40	start int
41	err   error
42	state int
43}
44
45// buf returns the buffer of the current token.
46func (l *openMetricsLexer) buf() []byte {
47	return l.b[l.start:l.i]
48}
49
50func (l *openMetricsLexer) cur() byte {
51	if l.i < len(l.b) {
52		return l.b[l.i]
53	}
54	return byte(' ')
55}
56
57// next advances the openMetricsLexer to the next character.
58func (l *openMetricsLexer) next() byte {
59	l.i++
60	if l.i >= len(l.b) {
61		l.err = io.EOF
62		return byte(tEOF)
63	}
64	// Lex struggles with null bytes. If we are in a label value or help string, where
65	// they are allowed, consume them here immediately.
66	for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) {
67		l.i++
68		if l.i >= len(l.b) {
69			l.err = io.EOF
70			return byte(tEOF)
71		}
72	}
73	return l.b[l.i]
74}
75
76func (l *openMetricsLexer) Error(es string) {
77	l.err = errors.New(es)
78}
79
80// OpenMetricsParser parses samples from a byte slice of samples in the official
81// OpenMetrics text exposition format.
82// This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit
83type OpenMetricsParser struct {
84	l       *openMetricsLexer
85	series  []byte
86	text    []byte
87	mtype   MetricType
88	val     float64
89	ts      int64
90	hasTS   bool
91	start   int
92	offsets []int
93
94	eOffsets      []int
95	exemplar      []byte
96	exemplarVal   float64
97	exemplarTs    int64
98	hasExemplarTs bool
99}
100
101// NewOpenMetricsParser returns a new parser of the byte slice.
102func NewOpenMetricsParser(b []byte) Parser {
103	return &OpenMetricsParser{l: &openMetricsLexer{b: b}}
104}
105
106// Series returns the bytes of the series, the timestamp if set, and the value
107// of the current sample.
108func (p *OpenMetricsParser) Series() ([]byte, *int64, float64) {
109	if p.hasTS {
110		ts := p.ts
111		return p.series, &ts, p.val
112	}
113	return p.series, nil, p.val
114}
115
116// Help returns the metric name and help text in the current entry.
117// Must only be called after Next returned a help entry.
118// The returned byte slices become invalid after the next call to Next.
119func (p *OpenMetricsParser) Help() ([]byte, []byte) {
120	m := p.l.b[p.offsets[0]:p.offsets[1]]
121
122	// Replacer causes allocations. Replace only when necessary.
123	if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 {
124		// OpenMetrics always uses the Prometheus format label value escaping.
125		return m, []byte(lvalReplacer.Replace(string(p.text)))
126	}
127	return m, p.text
128}
129
130// Type returns the metric name and type in the current entry.
131// Must only be called after Next returned a type entry.
132// The returned byte slices become invalid after the next call to Next.
133func (p *OpenMetricsParser) Type() ([]byte, MetricType) {
134	return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype
135}
136
137// Unit returns the metric name and unit in the current entry.
138// Must only be called after Next returned a unit entry.
139// The returned byte slices become invalid after the next call to Next.
140func (p *OpenMetricsParser) Unit() ([]byte, []byte) {
141	// The Prometheus format does not have units.
142	return p.l.b[p.offsets[0]:p.offsets[1]], p.text
143}
144
145// Comment returns the text of the current comment.
146// Must only be called after Next returned a comment entry.
147// The returned byte slice becomes invalid after the next call to Next.
148func (p *OpenMetricsParser) Comment() []byte {
149	return p.text
150}
151
152// Metric writes the labels of the current sample into the passed labels.
153// It returns the string from which the metric was parsed.
154func (p *OpenMetricsParser) Metric(l *labels.Labels) string {
155	// Allocate the full immutable string immediately, so we just
156	// have to create references on it below.
157	s := string(p.series)
158
159	*l = append(*l, labels.Label{
160		Name:  labels.MetricName,
161		Value: s[:p.offsets[0]-p.start],
162	})
163
164	for i := 1; i < len(p.offsets); i += 4 {
165		a := p.offsets[i] - p.start
166		b := p.offsets[i+1] - p.start
167		c := p.offsets[i+2] - p.start
168		d := p.offsets[i+3] - p.start
169
170		// Replacer causes allocations. Replace only when necessary.
171		if strings.IndexByte(s[c:d], byte('\\')) >= 0 {
172			*l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])})
173			continue
174		}
175		*l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]})
176	}
177
178	// Sort labels. We can skip the first entry since the metric name is
179	// already at the right place.
180	sort.Sort((*l)[1:])
181
182	return s
183}
184
185// Exemplar writes the exemplar of the current sample into the passed
186// exemplar. It returns the whether an exemplar exists.
187func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool {
188	if len(p.exemplar) == 0 {
189		return false
190	}
191
192	// Allocate the full immutable string immediately, so we just
193	// have to create references on it below.
194	s := string(p.exemplar)
195
196	e.Value = p.exemplarVal
197	if p.hasExemplarTs {
198		e.HasTs = true
199		e.Ts = p.exemplarTs
200	}
201
202	for i := 0; i < len(p.eOffsets); i += 4 {
203		a := p.eOffsets[i] - p.start
204		b := p.eOffsets[i+1] - p.start
205		c := p.eOffsets[i+2] - p.start
206		d := p.eOffsets[i+3] - p.start
207
208		e.Labels = append(e.Labels, labels.Label{Name: s[a:b], Value: s[c:d]})
209	}
210
211	// Sort the labels.
212	sort.Sort(e.Labels)
213
214	return true
215}
216
217// nextToken returns the next token from the openMetricsLexer.
218func (p *OpenMetricsParser) nextToken() token {
219	tok := p.l.Lex()
220	return tok
221}
222
223// Next advances the parser to the next sample. It returns false if no
224// more samples were read or an error occurred.
225func (p *OpenMetricsParser) Next() (Entry, error) {
226	var err error
227
228	p.start = p.l.i
229	p.offsets = p.offsets[:0]
230	p.eOffsets = p.eOffsets[:0]
231	p.exemplar = p.exemplar[:0]
232	p.exemplarVal = 0
233	p.hasExemplarTs = false
234
235	switch t := p.nextToken(); t {
236	case tEOFWord:
237		if t := p.nextToken(); t != tEOF {
238			return EntryInvalid, errors.New("unexpected data after # EOF")
239		}
240		return EntryInvalid, io.EOF
241	case tEOF:
242		return EntryInvalid, errors.New("data does not end with # EOF")
243	case tHelp, tType, tUnit:
244		switch t := p.nextToken(); t {
245		case tMName:
246			p.offsets = append(p.offsets, p.l.start, p.l.i)
247		default:
248			return EntryInvalid, parseError("expected metric name after HELP", t)
249		}
250		switch t := p.nextToken(); t {
251		case tText:
252			if len(p.l.buf()) > 1 {
253				p.text = p.l.buf()[1 : len(p.l.buf())-1]
254			} else {
255				p.text = []byte{}
256			}
257		default:
258			return EntryInvalid, parseError("expected text in HELP", t)
259		}
260		switch t {
261		case tType:
262			switch s := yoloString(p.text); s {
263			case "counter":
264				p.mtype = MetricTypeCounter
265			case "gauge":
266				p.mtype = MetricTypeGauge
267			case "histogram":
268				p.mtype = MetricTypeHistogram
269			case "gaugehistogram":
270				p.mtype = MetricTypeGaugeHistogram
271			case "summary":
272				p.mtype = MetricTypeSummary
273			case "info":
274				p.mtype = MetricTypeInfo
275			case "stateset":
276				p.mtype = MetricTypeStateset
277			case "unknown":
278				p.mtype = MetricTypeUnknown
279			default:
280				return EntryInvalid, errors.Errorf("invalid metric type %q", s)
281			}
282		case tHelp:
283			if !utf8.Valid(p.text) {
284				return EntryInvalid, errors.New("help text is not a valid utf8 string")
285			}
286		}
287		switch t {
288		case tHelp:
289			return EntryHelp, nil
290		case tType:
291			return EntryType, nil
292		case tUnit:
293			m := yoloString(p.l.b[p.offsets[0]:p.offsets[1]])
294			u := yoloString(p.text)
295			if len(u) > 0 {
296				if !strings.HasSuffix(m, u) || len(m) < len(u)+1 || p.l.b[p.offsets[1]-len(u)-1] != '_' {
297					return EntryInvalid, errors.Errorf("unit not a suffix of metric %q", m)
298				}
299			}
300			return EntryUnit, nil
301		}
302
303	case tMName:
304		p.offsets = append(p.offsets, p.l.i)
305		p.series = p.l.b[p.start:p.l.i]
306
307		t2 := p.nextToken()
308		if t2 == tBraceOpen {
309			p.offsets, err = p.parseLVals(p.offsets)
310			if err != nil {
311				return EntryInvalid, err
312			}
313			p.series = p.l.b[p.start:p.l.i]
314			t2 = p.nextToken()
315		}
316		p.val, err = p.getFloatValue(t2, "metric")
317		if err != nil {
318			return EntryInvalid, err
319		}
320
321		p.hasTS = false
322		switch t2 := p.nextToken(); t2 {
323		case tEOF:
324			return EntryInvalid, errors.New("data does not end with # EOF")
325		case tLinebreak:
326			break
327		case tComment:
328			if err := p.parseComment(); err != nil {
329				return EntryInvalid, err
330			}
331		case tTimestamp:
332			p.hasTS = true
333			var ts float64
334			// A float is enough to hold what we need for millisecond resolution.
335			if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil {
336				return EntryInvalid, err
337			}
338			if math.IsNaN(ts) || math.IsInf(ts, 0) {
339				return EntryInvalid, errors.New("invalid timestamp")
340			}
341			p.ts = int64(ts * 1000)
342			switch t3 := p.nextToken(); t3 {
343			case tLinebreak:
344			case tComment:
345				if err := p.parseComment(); err != nil {
346					return EntryInvalid, err
347				}
348			default:
349				return EntryInvalid, parseError("expected next entry after timestamp", t3)
350			}
351		default:
352			return EntryInvalid, parseError("expected timestamp or # symbol", t2)
353		}
354		return EntrySeries, nil
355
356	default:
357		err = errors.Errorf("%q %q is not a valid start token", t, string(p.l.cur()))
358	}
359	return EntryInvalid, err
360}
361
362func (p *OpenMetricsParser) parseComment() error {
363	// Validate the name of the metric. It must have _total or _bucket as
364	// suffix for exemplars to be supported.
365	if err := p.validateNameForExemplar(p.series[:p.offsets[0]-p.start]); err != nil {
366		return err
367	}
368
369	var err error
370	// Parse the labels.
371	p.eOffsets, err = p.parseLVals(p.eOffsets)
372	if err != nil {
373		return err
374	}
375	p.exemplar = p.l.b[p.start:p.l.i]
376
377	// Get the value.
378	p.exemplarVal, err = p.getFloatValue(p.nextToken(), "exemplar labels")
379	if err != nil {
380		return err
381	}
382
383	// Read the optional timestamp.
384	p.hasExemplarTs = false
385	switch t2 := p.nextToken(); t2 {
386	case tEOF:
387		return errors.New("data does not end with # EOF")
388	case tLinebreak:
389		break
390	case tTimestamp:
391		p.hasExemplarTs = true
392		var ts float64
393		// A float is enough to hold what we need for millisecond resolution.
394		if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil {
395			return err
396		}
397		if math.IsNaN(ts) || math.IsInf(ts, 0) {
398			return errors.New("invalid exemplar timestamp")
399		}
400		p.exemplarTs = int64(ts * 1000)
401		switch t3 := p.nextToken(); t3 {
402		case tLinebreak:
403		default:
404			return parseError("expected next entry after exemplar timestamp", t3)
405		}
406	default:
407		return parseError("expected timestamp or comment", t2)
408	}
409	return nil
410}
411
412func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) {
413	first := true
414	for {
415		t := p.nextToken()
416		switch t {
417		case tBraceClose:
418			return offsets, nil
419		case tComma:
420			if first {
421				return nil, parseError("expected label name or left brace", t)
422			}
423			t = p.nextToken()
424			if t != tLName {
425				return nil, parseError("expected label name", t)
426			}
427		case tLName:
428			if !first {
429				return nil, parseError("expected comma", t)
430			}
431		default:
432			if first {
433				return nil, parseError("expected label name or left brace", t)
434			}
435			return nil, parseError("expected comma or left brace", t)
436
437		}
438		first = false
439		// t is now a label name.
440
441		offsets = append(offsets, p.l.start, p.l.i)
442
443		if t := p.nextToken(); t != tEqual {
444			return nil, parseError("expected equal", t)
445		}
446		if t := p.nextToken(); t != tLValue {
447			return nil, parseError("expected label value", t)
448		}
449		if !utf8.Valid(p.l.buf()) {
450			return nil, errors.New("invalid UTF-8 label value")
451		}
452
453		// The openMetricsLexer ensures the value string is quoted. Strip first
454		// and last character.
455		offsets = append(offsets, p.l.start+1, p.l.i-1)
456	}
457}
458
459func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error) {
460	if t != tValue {
461		return 0, parseError(fmt.Sprintf("expected value after %v", after), t)
462	}
463	val, err := parseFloat(yoloString(p.l.buf()[1:]))
464	if err != nil {
465		return 0, err
466	}
467	// Ensure canonical NaN value.
468	if math.IsNaN(p.exemplarVal) {
469		val = math.Float64frombits(value.NormalNaN)
470	}
471	return val, nil
472}
473
474func (p *OpenMetricsParser) validateNameForExemplar(name []byte) error {
475	for _, suffix := range allowedSuffixes {
476		if bytes.HasSuffix(name, suffix) {
477			return nil
478		}
479	}
480	return fmt.Errorf("metric name %v does not support exemplars", string(name))
481}
482