1// Copyright 2017 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14//go:generate go get -u modernc.org/golex 15//go:generate golex -o=promlex.l.go promlex.l 16 17package textparse 18 19import ( 20 "fmt" 21 "io" 22 "math" 23 "sort" 24 "strconv" 25 "strings" 26 "unicode/utf8" 27 "unsafe" 28 29 "github.com/pkg/errors" 30 31 "github.com/prometheus/prometheus/pkg/exemplar" 32 "github.com/prometheus/prometheus/pkg/labels" 33 "github.com/prometheus/prometheus/pkg/value" 34) 35 36type promlexer struct { 37 b []byte 38 i int 39 start int 40 err error 41 state int 42} 43 44type token int 45 46const ( 47 tInvalid token = -1 48 tEOF token = 0 49 tLinebreak token = iota 50 tWhitespace 51 tHelp 52 tType 53 tUnit 54 tEofWord 55 tText 56 tComment 57 tBlank 58 tMName 59 tBraceOpen 60 tBraceClose 61 tLName 62 tLValue 63 tComma 64 tEqual 65 tTimestamp 66 tValue 67) 68 69func (t token) String() string { 70 switch t { 71 case tInvalid: 72 return "INVALID" 73 case tEOF: 74 return "EOF" 75 case tLinebreak: 76 return "LINEBREAK" 77 case tWhitespace: 78 return "WHITESPACE" 79 case tHelp: 80 return "HELP" 81 case tType: 82 return "TYPE" 83 case tUnit: 84 return "UNIT" 85 case tEofWord: 86 return "EOFWORD" 87 case tText: 88 return "TEXT" 89 case tComment: 90 return "COMMENT" 91 case tBlank: 92 return "BLANK" 93 case tMName: 94 return "MNAME" 95 case tBraceOpen: 96 return "BOPEN" 97 case tBraceClose: 98 return "BCLOSE" 99 case tLName: 100 return "LNAME" 101 case tLValue: 102 return "LVALUE" 103 case tEqual: 104 return "EQUAL" 105 case tComma: 106 return "COMMA" 107 case tTimestamp: 108 return "TIMESTAMP" 109 case tValue: 110 return "VALUE" 111 } 112 return fmt.Sprintf("<invalid: %d>", t) 113} 114 115// buf returns the buffer of the current token. 116func (l *promlexer) buf() []byte { 117 return l.b[l.start:l.i] 118} 119 120func (l *promlexer) cur() byte { 121 return l.b[l.i] 122} 123 124// next advances the promlexer to the next character. 125func (l *promlexer) next() byte { 126 l.i++ 127 if l.i >= len(l.b) { 128 l.err = io.EOF 129 return byte(tEOF) 130 } 131 // Lex struggles with null bytes. If we are in a label value or help string, where 132 // they are allowed, consume them here immediately. 133 for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { 134 l.i++ 135 } 136 return l.b[l.i] 137} 138 139func (l *promlexer) Error(es string) { 140 l.err = errors.New(es) 141} 142 143// PromParser parses samples from a byte slice of samples in the official 144// Prometheus text exposition format. 145type PromParser struct { 146 l *promlexer 147 series []byte 148 text []byte 149 mtype MetricType 150 val float64 151 ts int64 152 hasTS bool 153 start int 154 offsets []int 155} 156 157// NewPromParser returns a new parser of the byte slice. 158func NewPromParser(b []byte) Parser { 159 return &PromParser{l: &promlexer{b: append(b, '\n')}} 160} 161 162// Series returns the bytes of the series, the timestamp if set, and the value 163// of the current sample. 164func (p *PromParser) Series() ([]byte, *int64, float64) { 165 if p.hasTS { 166 return p.series, &p.ts, p.val 167 } 168 return p.series, nil, p.val 169} 170 171// Help returns the metric name and help text in the current entry. 172// Must only be called after Next returned a help entry. 173// The returned byte slices become invalid after the next call to Next. 174func (p *PromParser) Help() ([]byte, []byte) { 175 m := p.l.b[p.offsets[0]:p.offsets[1]] 176 177 // Replacer causes allocations. Replace only when necessary. 178 if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 { 179 return m, []byte(helpReplacer.Replace(string(p.text))) 180 } 181 return m, p.text 182} 183 184// Type returns the metric name and type in the current entry. 185// Must only be called after Next returned a type entry. 186// The returned byte slices become invalid after the next call to Next. 187func (p *PromParser) Type() ([]byte, MetricType) { 188 return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype 189} 190 191// Unit returns the metric name and unit in the current entry. 192// Must only be called after Next returned a unit entry. 193// The returned byte slices become invalid after the next call to Next. 194func (p *PromParser) Unit() ([]byte, []byte) { 195 // The Prometheus format does not have units. 196 return nil, nil 197} 198 199// Comment returns the text of the current comment. 200// Must only be called after Next returned a comment entry. 201// The returned byte slice becomes invalid after the next call to Next. 202func (p *PromParser) Comment() []byte { 203 return p.text 204} 205 206// Metric writes the labels of the current sample into the passed labels. 207// It returns the string from which the metric was parsed. 208func (p *PromParser) Metric(l *labels.Labels) string { 209 // Allocate the full immutable string immediately, so we just 210 // have to create references on it below. 211 s := string(p.series) 212 213 *l = append(*l, labels.Label{ 214 Name: labels.MetricName, 215 Value: s[:p.offsets[0]-p.start], 216 }) 217 218 for i := 1; i < len(p.offsets); i += 4 { 219 a := p.offsets[i] - p.start 220 b := p.offsets[i+1] - p.start 221 c := p.offsets[i+2] - p.start 222 d := p.offsets[i+3] - p.start 223 224 // Replacer causes allocations. Replace only when necessary. 225 if strings.IndexByte(s[c:d], byte('\\')) >= 0 { 226 *l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])}) 227 continue 228 } 229 *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) 230 } 231 232 // Sort labels to maintain the sorted labels invariant. 233 sort.Sort(*l) 234 235 return s 236} 237 238// Exemplar writes the exemplar of the current sample into the passed 239// exemplar. It returns if an exemplar exists. 240func (p *PromParser) Exemplar(e *exemplar.Exemplar) bool { 241 return false 242} 243 244// nextToken returns the next token from the promlexer. It skips over tabs 245// and spaces. 246func (p *PromParser) nextToken() token { 247 for { 248 if tok := p.l.Lex(); tok != tWhitespace { 249 return tok 250 } 251 } 252} 253 254func parseError(exp string, got token) error { 255 return errors.Errorf("%s, got %q", exp, got) 256} 257 258// Next advances the parser to the next sample. It returns false if no 259// more samples were read or an error occurred. 260func (p *PromParser) Next() (Entry, error) { 261 var err error 262 263 p.start = p.l.i 264 p.offsets = p.offsets[:0] 265 266 switch t := p.nextToken(); t { 267 case tEOF: 268 return EntryInvalid, io.EOF 269 case tLinebreak: 270 // Allow full blank lines. 271 return p.Next() 272 273 case tHelp, tType: 274 switch t := p.nextToken(); t { 275 case tMName: 276 p.offsets = append(p.offsets, p.l.start, p.l.i) 277 default: 278 return EntryInvalid, parseError("expected metric name after HELP", t) 279 } 280 switch t := p.nextToken(); t { 281 case tText: 282 if len(p.l.buf()) > 1 { 283 p.text = p.l.buf()[1:] 284 } else { 285 p.text = []byte{} 286 } 287 default: 288 return EntryInvalid, parseError("expected text in HELP", t) 289 } 290 switch t { 291 case tType: 292 switch s := yoloString(p.text); s { 293 case "counter": 294 p.mtype = MetricTypeCounter 295 case "gauge": 296 p.mtype = MetricTypeGauge 297 case "histogram": 298 p.mtype = MetricTypeHistogram 299 case "summary": 300 p.mtype = MetricTypeSummary 301 case "untyped": 302 p.mtype = MetricTypeUnknown 303 default: 304 return EntryInvalid, errors.Errorf("invalid metric type %q", s) 305 } 306 case tHelp: 307 if !utf8.Valid(p.text) { 308 return EntryInvalid, errors.Errorf("help text is not a valid utf8 string") 309 } 310 } 311 if t := p.nextToken(); t != tLinebreak { 312 return EntryInvalid, parseError("linebreak expected after metadata", t) 313 } 314 switch t { 315 case tHelp: 316 return EntryHelp, nil 317 case tType: 318 return EntryType, nil 319 } 320 case tComment: 321 p.text = p.l.buf() 322 if t := p.nextToken(); t != tLinebreak { 323 return EntryInvalid, parseError("linebreak expected after comment", t) 324 } 325 return EntryComment, nil 326 327 case tMName: 328 p.offsets = append(p.offsets, p.l.i) 329 p.series = p.l.b[p.start:p.l.i] 330 331 t2 := p.nextToken() 332 if t2 == tBraceOpen { 333 if err := p.parseLVals(); err != nil { 334 return EntryInvalid, err 335 } 336 p.series = p.l.b[p.start:p.l.i] 337 t2 = p.nextToken() 338 } 339 if t2 != tValue { 340 return EntryInvalid, parseError("expected value after metric", t) 341 } 342 if p.val, err = parseFloat(yoloString(p.l.buf())); err != nil { 343 return EntryInvalid, err 344 } 345 // Ensure canonical NaN value. 346 if math.IsNaN(p.val) { 347 p.val = math.Float64frombits(value.NormalNaN) 348 } 349 p.hasTS = false 350 switch p.nextToken() { 351 case tLinebreak: 352 break 353 case tTimestamp: 354 p.hasTS = true 355 if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil { 356 return EntryInvalid, err 357 } 358 if t2 := p.nextToken(); t2 != tLinebreak { 359 return EntryInvalid, parseError("expected next entry after timestamp", t) 360 } 361 default: 362 return EntryInvalid, parseError("expected timestamp or new record", t) 363 } 364 return EntrySeries, nil 365 366 default: 367 err = errors.Errorf("%q is not a valid start token", t) 368 } 369 return EntryInvalid, err 370} 371 372func (p *PromParser) parseLVals() error { 373 t := p.nextToken() 374 for { 375 switch t { 376 case tBraceClose: 377 return nil 378 case tLName: 379 default: 380 return parseError("expected label name", t) 381 } 382 p.offsets = append(p.offsets, p.l.start, p.l.i) 383 384 if t := p.nextToken(); t != tEqual { 385 return parseError("expected equal", t) 386 } 387 if t := p.nextToken(); t != tLValue { 388 return parseError("expected label value", t) 389 } 390 if !utf8.Valid(p.l.buf()) { 391 return errors.Errorf("invalid UTF-8 label value") 392 } 393 394 // The promlexer ensures the value string is quoted. Strip first 395 // and last character. 396 p.offsets = append(p.offsets, p.l.start+1, p.l.i-1) 397 398 // Free trailing commas are allowed. 399 if t = p.nextToken(); t == tComma { 400 t = p.nextToken() 401 } 402 } 403} 404 405var lvalReplacer = strings.NewReplacer( 406 `\"`, "\"", 407 `\\`, "\\", 408 `\n`, "\n", 409) 410 411var helpReplacer = strings.NewReplacer( 412 `\\`, "\\", 413 `\n`, "\n", 414) 415 416func yoloString(b []byte) string { 417 return *((*string)(unsafe.Pointer(&b))) 418} 419 420func parseFloat(s string) (float64, error) { 421 // Keep to pre-Go 1.13 float formats. 422 if strings.ContainsAny(s, "pP_") { 423 return 0, fmt.Errorf("unsupported character in float") 424 } 425 return strconv.ParseFloat(s, 64) 426} 427