1// Copyright 2017 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14//go:generate go get -u modernc.org/golex 15//go:generate golex -o=promlex.l.go promlex.l 16 17package textparse 18 19import ( 20 "fmt" 21 "io" 22 "math" 23 "sort" 24 "strconv" 25 "strings" 26 "unicode/utf8" 27 "unsafe" 28 29 "github.com/pkg/errors" 30 31 "github.com/prometheus/prometheus/pkg/labels" 32 "github.com/prometheus/prometheus/pkg/value" 33) 34 35type promlexer struct { 36 b []byte 37 i int 38 start int 39 err error 40 state int 41} 42 43type token int 44 45const ( 46 tInvalid token = -1 47 tEOF token = 0 48 tLinebreak token = iota 49 tWhitespace 50 tHelp 51 tType 52 tUnit 53 tEofWord 54 tText 55 tComment 56 tBlank 57 tMName 58 tBraceOpen 59 tBraceClose 60 tLName 61 tLValue 62 tComma 63 tEqual 64 tTimestamp 65 tValue 66) 67 68func (t token) String() string { 69 switch t { 70 case tInvalid: 71 return "INVALID" 72 case tEOF: 73 return "EOF" 74 case tLinebreak: 75 return "LINEBREAK" 76 case tWhitespace: 77 return "WHITESPACE" 78 case tHelp: 79 return "HELP" 80 case tType: 81 return "TYPE" 82 case tUnit: 83 return "UNIT" 84 case tEofWord: 85 return "EOFWORD" 86 case tText: 87 return "TEXT" 88 case tComment: 89 return "COMMENT" 90 case tBlank: 91 return "BLANK" 92 case tMName: 93 return "MNAME" 94 case tBraceOpen: 95 return "BOPEN" 96 case tBraceClose: 97 return "BCLOSE" 98 case tLName: 99 return "LNAME" 100 case tLValue: 101 return "LVALUE" 102 case tEqual: 103 return "EQUAL" 104 case tComma: 105 return "COMMA" 106 case tTimestamp: 107 return "TIMESTAMP" 108 case tValue: 109 return "VALUE" 110 } 111 return fmt.Sprintf("<invalid: %d>", t) 112} 113 114// buf returns the buffer of the current token. 115func (l *promlexer) buf() []byte { 116 return l.b[l.start:l.i] 117} 118 119func (l *promlexer) cur() byte { 120 return l.b[l.i] 121} 122 123// next advances the promlexer to the next character. 124func (l *promlexer) next() byte { 125 l.i++ 126 if l.i >= len(l.b) { 127 l.err = io.EOF 128 return byte(tEOF) 129 } 130 // Lex struggles with null bytes. If we are in a label value or help string, where 131 // they are allowed, consume them here immediately. 132 for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { 133 l.i++ 134 } 135 return l.b[l.i] 136} 137 138func (l *promlexer) Error(es string) { 139 l.err = errors.New(es) 140} 141 142// PromParser parses samples from a byte slice of samples in the official 143// Prometheus text exposition format. 144type PromParser struct { 145 l *promlexer 146 series []byte 147 text []byte 148 mtype MetricType 149 val float64 150 ts int64 151 hasTS bool 152 start int 153 offsets []int 154} 155 156// New returns a new parser of the byte slice. 157func NewPromParser(b []byte) Parser { 158 return &PromParser{l: &promlexer{b: append(b, '\n')}} 159} 160 161// Series returns the bytes of the series, the timestamp if set, and the value 162// of the current sample. 163func (p *PromParser) Series() ([]byte, *int64, float64) { 164 if p.hasTS { 165 return p.series, &p.ts, p.val 166 } 167 return p.series, nil, p.val 168} 169 170// Help returns the metric name and help text in the current entry. 171// Must only be called after Next returned a help entry. 172// The returned byte slices become invalid after the next call to Next. 173func (p *PromParser) Help() ([]byte, []byte) { 174 m := p.l.b[p.offsets[0]:p.offsets[1]] 175 176 // Replacer causes allocations. Replace only when necessary. 177 if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 { 178 return m, []byte(helpReplacer.Replace(string(p.text))) 179 } 180 return m, p.text 181} 182 183// Type returns the metric name and type in the current entry. 184// Must only be called after Next returned a type entry. 185// The returned byte slices become invalid after the next call to Next. 186func (p *PromParser) Type() ([]byte, MetricType) { 187 return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype 188} 189 190// Unit returns the metric name and unit in the current entry. 191// Must only be called after Next returned a unit entry. 192// The returned byte slices become invalid after the next call to Next. 193func (p *PromParser) Unit() ([]byte, []byte) { 194 // The Prometheus format does not have units. 195 return nil, nil 196} 197 198// Comment returns the text of the current comment. 199// Must only be called after Next returned a comment entry. 200// The returned byte slice becomes invalid after the next call to Next. 201func (p *PromParser) Comment() []byte { 202 return p.text 203} 204 205// Metric writes the labels of the current sample into the passed labels. 206// It returns the string from which the metric was parsed. 207func (p *PromParser) Metric(l *labels.Labels) string { 208 // Allocate the full immutable string immediately, so we just 209 // have to create references on it below. 210 s := string(p.series) 211 212 *l = append(*l, labels.Label{ 213 Name: labels.MetricName, 214 Value: s[:p.offsets[0]-p.start], 215 }) 216 217 for i := 1; i < len(p.offsets); i += 4 { 218 a := p.offsets[i] - p.start 219 b := p.offsets[i+1] - p.start 220 c := p.offsets[i+2] - p.start 221 d := p.offsets[i+3] - p.start 222 223 // Replacer causes allocations. Replace only when necessary. 224 if strings.IndexByte(s[c:d], byte('\\')) >= 0 { 225 *l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])}) 226 continue 227 } 228 *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) 229 } 230 231 // Sort labels to maintain the sorted labels invariant. 232 sort.Sort(*l) 233 234 return s 235} 236 237// nextToken returns the next token from the promlexer. It skips over tabs 238// and spaces. 239func (p *PromParser) nextToken() token { 240 for { 241 if tok := p.l.Lex(); tok != tWhitespace { 242 return tok 243 } 244 } 245} 246 247func parseError(exp string, got token) error { 248 return errors.Errorf("%s, got %q", exp, got) 249} 250 251// Next advances the parser to the next sample. It returns false if no 252// more samples were read or an error occurred. 253func (p *PromParser) Next() (Entry, error) { 254 var err error 255 256 p.start = p.l.i 257 p.offsets = p.offsets[:0] 258 259 switch t := p.nextToken(); t { 260 case tEOF: 261 return EntryInvalid, io.EOF 262 case tLinebreak: 263 // Allow full blank lines. 264 return p.Next() 265 266 case tHelp, tType: 267 switch t := p.nextToken(); t { 268 case tMName: 269 p.offsets = append(p.offsets, p.l.start, p.l.i) 270 default: 271 return EntryInvalid, parseError("expected metric name after HELP", t) 272 } 273 switch t := p.nextToken(); t { 274 case tText: 275 if len(p.l.buf()) > 1 { 276 p.text = p.l.buf()[1:] 277 } else { 278 p.text = []byte{} 279 } 280 default: 281 return EntryInvalid, parseError("expected text in HELP", t) 282 } 283 switch t { 284 case tType: 285 switch s := yoloString(p.text); s { 286 case "counter": 287 p.mtype = MetricTypeCounter 288 case "gauge": 289 p.mtype = MetricTypeGauge 290 case "histogram": 291 p.mtype = MetricTypeHistogram 292 case "summary": 293 p.mtype = MetricTypeSummary 294 case "untyped": 295 p.mtype = MetricTypeUnknown 296 default: 297 return EntryInvalid, errors.Errorf("invalid metric type %q", s) 298 } 299 case tHelp: 300 if !utf8.Valid(p.text) { 301 return EntryInvalid, errors.Errorf("help text is not a valid utf8 string") 302 } 303 } 304 if t := p.nextToken(); t != tLinebreak { 305 return EntryInvalid, parseError("linebreak expected after metadata", t) 306 } 307 switch t { 308 case tHelp: 309 return EntryHelp, nil 310 case tType: 311 return EntryType, nil 312 } 313 case tComment: 314 p.text = p.l.buf() 315 if t := p.nextToken(); t != tLinebreak { 316 return EntryInvalid, parseError("linebreak expected after comment", t) 317 } 318 return EntryComment, nil 319 320 case tMName: 321 p.offsets = append(p.offsets, p.l.i) 322 p.series = p.l.b[p.start:p.l.i] 323 324 t2 := p.nextToken() 325 if t2 == tBraceOpen { 326 if err := p.parseLVals(); err != nil { 327 return EntryInvalid, err 328 } 329 p.series = p.l.b[p.start:p.l.i] 330 t2 = p.nextToken() 331 } 332 if t2 != tValue { 333 return EntryInvalid, parseError("expected value after metric", t) 334 } 335 if p.val, err = strconv.ParseFloat(yoloString(p.l.buf()), 64); err != nil { 336 return EntryInvalid, err 337 } 338 // Ensure canonical NaN value. 339 if math.IsNaN(p.val) { 340 p.val = math.Float64frombits(value.NormalNaN) 341 } 342 p.hasTS = false 343 switch p.nextToken() { 344 case tLinebreak: 345 break 346 case tTimestamp: 347 p.hasTS = true 348 if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil { 349 return EntryInvalid, err 350 } 351 if t2 := p.nextToken(); t2 != tLinebreak { 352 return EntryInvalid, parseError("expected next entry after timestamp", t) 353 } 354 default: 355 return EntryInvalid, parseError("expected timestamp or new record", t) 356 } 357 return EntrySeries, nil 358 359 default: 360 err = errors.Errorf("%q is not a valid start token", t) 361 } 362 return EntryInvalid, err 363} 364 365func (p *PromParser) parseLVals() error { 366 t := p.nextToken() 367 for { 368 switch t { 369 case tBraceClose: 370 return nil 371 case tLName: 372 default: 373 return parseError("expected label name", t) 374 } 375 p.offsets = append(p.offsets, p.l.start, p.l.i) 376 377 if t := p.nextToken(); t != tEqual { 378 return parseError("expected equal", t) 379 } 380 if t := p.nextToken(); t != tLValue { 381 return parseError("expected label value", t) 382 } 383 if !utf8.Valid(p.l.buf()) { 384 return errors.Errorf("invalid UTF-8 label value") 385 } 386 387 // The promlexer ensures the value string is quoted. Strip first 388 // and last character. 389 p.offsets = append(p.offsets, p.l.start+1, p.l.i-1) 390 391 // Free trailing commas are allowed. 392 if t = p.nextToken(); t == tComma { 393 t = p.nextToken() 394 } 395 } 396} 397 398var lvalReplacer = strings.NewReplacer( 399 `\"`, "\"", 400 `\\`, "\\", 401 `\n`, "\n", 402) 403 404var helpReplacer = strings.NewReplacer( 405 `\\`, "\\", 406 `\n`, "\n", 407) 408 409func yoloString(b []byte) string { 410 return *((*string)(unsafe.Pointer(&b))) 411} 412