1// Copyright 2018 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14//go:generate go get -u modernc.org/golex 15//go:generate golex -o=openmetricslex.l.go openmetricslex.l 16 17package textparse 18 19import ( 20 "bytes" 21 "fmt" 22 "io" 23 "math" 24 "sort" 25 "strings" 26 "unicode/utf8" 27 28 "github.com/pkg/errors" 29 30 "github.com/prometheus/prometheus/pkg/exemplar" 31 "github.com/prometheus/prometheus/pkg/labels" 32 "github.com/prometheus/prometheus/pkg/value" 33) 34 35var allowedSuffixes = [][]byte{[]byte("_total"), []byte("_bucket")} 36 37type openMetricsLexer struct { 38 b []byte 39 i int 40 start int 41 err error 42 state int 43} 44 45// buf returns the buffer of the current token. 46func (l *openMetricsLexer) buf() []byte { 47 return l.b[l.start:l.i] 48} 49 50func (l *openMetricsLexer) cur() byte { 51 if l.i < len(l.b) { 52 return l.b[l.i] 53 } 54 return byte(' ') 55} 56 57// next advances the openMetricsLexer to the next character. 58func (l *openMetricsLexer) next() byte { 59 l.i++ 60 if l.i >= len(l.b) { 61 l.err = io.EOF 62 return byte(tEOF) 63 } 64 // Lex struggles with null bytes. If we are in a label value or help string, where 65 // they are allowed, consume them here immediately. 66 for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { 67 l.i++ 68 if l.i >= len(l.b) { 69 l.err = io.EOF 70 return byte(tEOF) 71 } 72 } 73 return l.b[l.i] 74} 75 76func (l *openMetricsLexer) Error(es string) { 77 l.err = errors.New(es) 78} 79 80// OpenMetricsParser parses samples from a byte slice of samples in the official 81// OpenMetrics text exposition format. 82// This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit 83type OpenMetricsParser struct { 84 l *openMetricsLexer 85 series []byte 86 text []byte 87 mtype MetricType 88 val float64 89 ts int64 90 hasTS bool 91 start int 92 offsets []int 93 94 eOffsets []int 95 exemplar []byte 96 exemplarVal float64 97 exemplarTs int64 98 hasExemplarTs bool 99} 100 101// NewOpenMetricsParser returns a new parser of the byte slice. 102func NewOpenMetricsParser(b []byte) Parser { 103 return &OpenMetricsParser{l: &openMetricsLexer{b: b}} 104} 105 106// Series returns the bytes of the series, the timestamp if set, and the value 107// of the current sample. 108func (p *OpenMetricsParser) Series() ([]byte, *int64, float64) { 109 if p.hasTS { 110 ts := p.ts 111 return p.series, &ts, p.val 112 } 113 return p.series, nil, p.val 114} 115 116// Help returns the metric name and help text in the current entry. 117// Must only be called after Next returned a help entry. 118// The returned byte slices become invalid after the next call to Next. 119func (p *OpenMetricsParser) Help() ([]byte, []byte) { 120 m := p.l.b[p.offsets[0]:p.offsets[1]] 121 122 // Replacer causes allocations. Replace only when necessary. 123 if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 { 124 // OpenMetrics always uses the Prometheus format label value escaping. 125 return m, []byte(lvalReplacer.Replace(string(p.text))) 126 } 127 return m, p.text 128} 129 130// Type returns the metric name and type in the current entry. 131// Must only be called after Next returned a type entry. 132// The returned byte slices become invalid after the next call to Next. 133func (p *OpenMetricsParser) Type() ([]byte, MetricType) { 134 return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype 135} 136 137// Unit returns the metric name and unit in the current entry. 138// Must only be called after Next returned a unit entry. 139// The returned byte slices become invalid after the next call to Next. 140func (p *OpenMetricsParser) Unit() ([]byte, []byte) { 141 // The Prometheus format does not have units. 142 return p.l.b[p.offsets[0]:p.offsets[1]], p.text 143} 144 145// Comment returns the text of the current comment. 146// Must only be called after Next returned a comment entry. 147// The returned byte slice becomes invalid after the next call to Next. 148func (p *OpenMetricsParser) Comment() []byte { 149 return p.text 150} 151 152// Metric writes the labels of the current sample into the passed labels. 153// It returns the string from which the metric was parsed. 154func (p *OpenMetricsParser) Metric(l *labels.Labels) string { 155 // Allocate the full immutable string immediately, so we just 156 // have to create references on it below. 157 s := string(p.series) 158 159 *l = append(*l, labels.Label{ 160 Name: labels.MetricName, 161 Value: s[:p.offsets[0]-p.start], 162 }) 163 164 for i := 1; i < len(p.offsets); i += 4 { 165 a := p.offsets[i] - p.start 166 b := p.offsets[i+1] - p.start 167 c := p.offsets[i+2] - p.start 168 d := p.offsets[i+3] - p.start 169 170 // Replacer causes allocations. Replace only when necessary. 171 if strings.IndexByte(s[c:d], byte('\\')) >= 0 { 172 *l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])}) 173 continue 174 } 175 *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) 176 } 177 178 // Sort labels. We can skip the first entry since the metric name is 179 // already at the right place. 180 sort.Sort((*l)[1:]) 181 182 return s 183} 184 185// Exemplar writes the exemplar of the current sample into the passed 186// exemplar. It returns the whether an exemplar exists. 187func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { 188 if len(p.exemplar) == 0 { 189 return false 190 } 191 192 // Allocate the full immutable string immediately, so we just 193 // have to create references on it below. 194 s := string(p.exemplar) 195 196 e.Value = p.exemplarVal 197 if p.hasExemplarTs { 198 e.HasTs = true 199 e.Ts = p.exemplarTs 200 } 201 202 for i := 0; i < len(p.eOffsets); i += 4 { 203 a := p.eOffsets[i] - p.start 204 b := p.eOffsets[i+1] - p.start 205 c := p.eOffsets[i+2] - p.start 206 d := p.eOffsets[i+3] - p.start 207 208 e.Labels = append(e.Labels, labels.Label{Name: s[a:b], Value: s[c:d]}) 209 } 210 211 // Sort the labels. 212 sort.Sort(e.Labels) 213 214 return true 215} 216 217// nextToken returns the next token from the openMetricsLexer. 218func (p *OpenMetricsParser) nextToken() token { 219 tok := p.l.Lex() 220 return tok 221} 222 223// Next advances the parser to the next sample. It returns false if no 224// more samples were read or an error occurred. 225func (p *OpenMetricsParser) Next() (Entry, error) { 226 var err error 227 228 p.start = p.l.i 229 p.offsets = p.offsets[:0] 230 p.eOffsets = p.eOffsets[:0] 231 p.exemplar = p.exemplar[:0] 232 p.exemplarVal = 0 233 p.hasExemplarTs = false 234 235 switch t := p.nextToken(); t { 236 case tEOFWord: 237 if t := p.nextToken(); t != tEOF { 238 return EntryInvalid, errors.New("unexpected data after # EOF") 239 } 240 return EntryInvalid, io.EOF 241 case tEOF: 242 return EntryInvalid, errors.New("data does not end with # EOF") 243 case tHelp, tType, tUnit: 244 switch t := p.nextToken(); t { 245 case tMName: 246 p.offsets = append(p.offsets, p.l.start, p.l.i) 247 default: 248 return EntryInvalid, parseError("expected metric name after HELP", t) 249 } 250 switch t := p.nextToken(); t { 251 case tText: 252 if len(p.l.buf()) > 1 { 253 p.text = p.l.buf()[1 : len(p.l.buf())-1] 254 } else { 255 p.text = []byte{} 256 } 257 default: 258 return EntryInvalid, parseError("expected text in HELP", t) 259 } 260 switch t { 261 case tType: 262 switch s := yoloString(p.text); s { 263 case "counter": 264 p.mtype = MetricTypeCounter 265 case "gauge": 266 p.mtype = MetricTypeGauge 267 case "histogram": 268 p.mtype = MetricTypeHistogram 269 case "gaugehistogram": 270 p.mtype = MetricTypeGaugeHistogram 271 case "summary": 272 p.mtype = MetricTypeSummary 273 case "info": 274 p.mtype = MetricTypeInfo 275 case "stateset": 276 p.mtype = MetricTypeStateset 277 case "unknown": 278 p.mtype = MetricTypeUnknown 279 default: 280 return EntryInvalid, errors.Errorf("invalid metric type %q", s) 281 } 282 case tHelp: 283 if !utf8.Valid(p.text) { 284 return EntryInvalid, errors.New("help text is not a valid utf8 string") 285 } 286 } 287 switch t { 288 case tHelp: 289 return EntryHelp, nil 290 case tType: 291 return EntryType, nil 292 case tUnit: 293 m := yoloString(p.l.b[p.offsets[0]:p.offsets[1]]) 294 u := yoloString(p.text) 295 if len(u) > 0 { 296 if !strings.HasSuffix(m, u) || len(m) < len(u)+1 || p.l.b[p.offsets[1]-len(u)-1] != '_' { 297 return EntryInvalid, errors.Errorf("unit not a suffix of metric %q", m) 298 } 299 } 300 return EntryUnit, nil 301 } 302 303 case tMName: 304 p.offsets = append(p.offsets, p.l.i) 305 p.series = p.l.b[p.start:p.l.i] 306 307 t2 := p.nextToken() 308 if t2 == tBraceOpen { 309 p.offsets, err = p.parseLVals(p.offsets) 310 if err != nil { 311 return EntryInvalid, err 312 } 313 p.series = p.l.b[p.start:p.l.i] 314 t2 = p.nextToken() 315 } 316 p.val, err = p.getFloatValue(t2, "metric") 317 if err != nil { 318 return EntryInvalid, err 319 } 320 321 p.hasTS = false 322 switch t2 := p.nextToken(); t2 { 323 case tEOF: 324 return EntryInvalid, errors.New("data does not end with # EOF") 325 case tLinebreak: 326 break 327 case tComment: 328 if err := p.parseComment(); err != nil { 329 return EntryInvalid, err 330 } 331 case tTimestamp: 332 p.hasTS = true 333 var ts float64 334 // A float is enough to hold what we need for millisecond resolution. 335 if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { 336 return EntryInvalid, err 337 } 338 if math.IsNaN(ts) || math.IsInf(ts, 0) { 339 return EntryInvalid, errors.New("invalid timestamp") 340 } 341 p.ts = int64(ts * 1000) 342 switch t3 := p.nextToken(); t3 { 343 case tLinebreak: 344 case tComment: 345 if err := p.parseComment(); err != nil { 346 return EntryInvalid, err 347 } 348 default: 349 return EntryInvalid, parseError("expected next entry after timestamp", t3) 350 } 351 default: 352 return EntryInvalid, parseError("expected timestamp or # symbol", t2) 353 } 354 return EntrySeries, nil 355 356 default: 357 err = errors.Errorf("%q %q is not a valid start token", t, string(p.l.cur())) 358 } 359 return EntryInvalid, err 360} 361 362func (p *OpenMetricsParser) parseComment() error { 363 // Validate the name of the metric. It must have _total or _bucket as 364 // suffix for exemplars to be supported. 365 if err := p.validateNameForExemplar(p.series[:p.offsets[0]-p.start]); err != nil { 366 return err 367 } 368 369 var err error 370 // Parse the labels. 371 p.eOffsets, err = p.parseLVals(p.eOffsets) 372 if err != nil { 373 return err 374 } 375 p.exemplar = p.l.b[p.start:p.l.i] 376 377 // Get the value. 378 p.exemplarVal, err = p.getFloatValue(p.nextToken(), "exemplar labels") 379 if err != nil { 380 return err 381 } 382 383 // Read the optional timestamp. 384 p.hasExemplarTs = false 385 switch t2 := p.nextToken(); t2 { 386 case tEOF: 387 return errors.New("data does not end with # EOF") 388 case tLinebreak: 389 break 390 case tTimestamp: 391 p.hasExemplarTs = true 392 var ts float64 393 // A float is enough to hold what we need for millisecond resolution. 394 if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { 395 return err 396 } 397 if math.IsNaN(ts) || math.IsInf(ts, 0) { 398 return errors.New("invalid exemplar timestamp") 399 } 400 p.exemplarTs = int64(ts * 1000) 401 switch t3 := p.nextToken(); t3 { 402 case tLinebreak: 403 default: 404 return parseError("expected next entry after exemplar timestamp", t3) 405 } 406 default: 407 return parseError("expected timestamp or comment", t2) 408 } 409 return nil 410} 411 412func (p *OpenMetricsParser) parseLVals(offsets []int) ([]int, error) { 413 first := true 414 for { 415 t := p.nextToken() 416 switch t { 417 case tBraceClose: 418 return offsets, nil 419 case tComma: 420 if first { 421 return nil, parseError("expected label name or left brace", t) 422 } 423 t = p.nextToken() 424 if t != tLName { 425 return nil, parseError("expected label name", t) 426 } 427 case tLName: 428 if !first { 429 return nil, parseError("expected comma", t) 430 } 431 default: 432 if first { 433 return nil, parseError("expected label name or left brace", t) 434 } 435 return nil, parseError("expected comma or left brace", t) 436 437 } 438 first = false 439 // t is now a label name. 440 441 offsets = append(offsets, p.l.start, p.l.i) 442 443 if t := p.nextToken(); t != tEqual { 444 return nil, parseError("expected equal", t) 445 } 446 if t := p.nextToken(); t != tLValue { 447 return nil, parseError("expected label value", t) 448 } 449 if !utf8.Valid(p.l.buf()) { 450 return nil, errors.New("invalid UTF-8 label value") 451 } 452 453 // The openMetricsLexer ensures the value string is quoted. Strip first 454 // and last character. 455 offsets = append(offsets, p.l.start+1, p.l.i-1) 456 } 457} 458 459func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error) { 460 if t != tValue { 461 return 0, parseError(fmt.Sprintf("expected value after %v", after), t) 462 } 463 val, err := parseFloat(yoloString(p.l.buf()[1:])) 464 if err != nil { 465 return 0, err 466 } 467 // Ensure canonical NaN value. 468 if math.IsNaN(p.exemplarVal) { 469 val = math.Float64frombits(value.NormalNaN) 470 } 471 return val, nil 472} 473 474func (p *OpenMetricsParser) validateNameForExemplar(name []byte) error { 475 for _, suffix := range allowedSuffixes { 476 if bytes.HasSuffix(name, suffix) { 477 return nil 478 } 479 } 480 return fmt.Errorf("metric name %v does not support exemplars", string(name)) 481} 482