1// Copyright 2017 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14//go:generate go get github.com/cznic/golex 15//go:generate golex -o=promlex.l.go promlex.l 16 17package textparse 18 19import ( 20 "errors" 21 "fmt" 22 "io" 23 "math" 24 "sort" 25 "strconv" 26 "strings" 27 "unicode/utf8" 28 "unsafe" 29 30 "github.com/prometheus/prometheus/pkg/labels" 31 "github.com/prometheus/prometheus/pkg/value" 32) 33 34type promlexer struct { 35 b []byte 36 i int 37 start int 38 err error 39 state int 40} 41 42type token int 43 44const ( 45 tInvalid token = -1 46 tEOF token = 0 47 tLinebreak token = iota 48 tWhitespace 49 tHelp 50 tType 51 tUnit 52 tEofWord 53 tText 54 tComment 55 tBlank 56 tMName 57 tBraceOpen 58 tBraceClose 59 tLName 60 tLValue 61 tComma 62 tEqual 63 tTimestamp 64 tValue 65) 66 67func (t token) String() string { 68 switch t { 69 case tInvalid: 70 return "INVALID" 71 case tEOF: 72 return "EOF" 73 case tLinebreak: 74 return "LINEBREAK" 75 case tWhitespace: 76 return "WHITESPACE" 77 case tHelp: 78 return "HELP" 79 case tType: 80 return "TYPE" 81 case tUnit: 82 return "UNIT" 83 case tEofWord: 84 return "EOFWORD" 85 case tText: 86 return "TEXT" 87 case tComment: 88 return "COMMENT" 89 case tBlank: 90 return "BLANK" 91 case tMName: 92 return "MNAME" 93 case tBraceOpen: 94 return "BOPEN" 95 case tBraceClose: 96 return "BCLOSE" 97 case tLName: 98 return "LNAME" 99 case tLValue: 100 return "LVALUE" 101 case tEqual: 102 return "EQUAL" 103 case tComma: 104 return "COMMA" 105 case tTimestamp: 106 return "TIMESTAMP" 107 case tValue: 108 return "VALUE" 109 } 110 return fmt.Sprintf("<invalid: %d>", t) 111} 112 113// buf returns the buffer of the current token. 114func (l *promlexer) buf() []byte { 115 return l.b[l.start:l.i] 116} 117 118func (l *promlexer) cur() byte { 119 return l.b[l.i] 120} 121 122// next advances the promlexer to the next character. 123func (l *promlexer) next() byte { 124 l.i++ 125 if l.i >= len(l.b) { 126 l.err = io.EOF 127 return byte(tEOF) 128 } 129 // Lex struggles with null bytes. If we are in a label value or help string, where 130 // they are allowed, consume them here immediately. 131 for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { 132 l.i++ 133 } 134 return l.b[l.i] 135} 136 137func (l *promlexer) Error(es string) { 138 l.err = errors.New(es) 139} 140 141// PromParser parses samples from a byte slice of samples in the official 142// Prometheus text exposition format. 143type PromParser struct { 144 l *promlexer 145 series []byte 146 text []byte 147 mtype MetricType 148 val float64 149 ts int64 150 hasTS bool 151 start int 152 offsets []int 153} 154 155// New returns a new parser of the byte slice. 156func NewPromParser(b []byte) Parser { 157 return &PromParser{l: &promlexer{b: append(b, '\n')}} 158} 159 160// Series returns the bytes of the series, the timestamp if set, and the value 161// of the current sample. 162func (p *PromParser) Series() ([]byte, *int64, float64) { 163 if p.hasTS { 164 return p.series, &p.ts, p.val 165 } 166 return p.series, nil, p.val 167} 168 169// Help returns the metric name and help text in the current entry. 170// Must only be called after Next returned a help entry. 171// The returned byte slices become invalid after the next call to Next. 172func (p *PromParser) Help() ([]byte, []byte) { 173 m := p.l.b[p.offsets[0]:p.offsets[1]] 174 175 // Replacer causes allocations. Replace only when necessary. 176 if strings.IndexByte(yoloString(p.text), byte('\\')) >= 0 { 177 return m, []byte(helpReplacer.Replace(string(p.text))) 178 } 179 return m, p.text 180} 181 182// Type returns the metric name and type in the current entry. 183// Must only be called after Next returned a type entry. 184// The returned byte slices become invalid after the next call to Next. 185func (p *PromParser) Type() ([]byte, MetricType) { 186 return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype 187} 188 189// Unit returns the metric name and unit in the current entry. 190// Must only be called after Next returned a unit entry. 191// The returned byte slices become invalid after the next call to Next. 192func (p *PromParser) Unit() ([]byte, []byte) { 193 // The Prometheus format does not have units. 194 return nil, nil 195} 196 197// Comment returns the text of the current comment. 198// Must only be called after Next returned a comment entry. 199// The returned byte slice becomes invalid after the next call to Next. 200func (p *PromParser) Comment() []byte { 201 return p.text 202} 203 204// Metric writes the labels of the current sample into the passed labels. 205// It returns the string from which the metric was parsed. 206func (p *PromParser) Metric(l *labels.Labels) string { 207 // Allocate the full immutable string immediately, so we just 208 // have to create references on it below. 209 s := string(p.series) 210 211 *l = append(*l, labels.Label{ 212 Name: labels.MetricName, 213 Value: s[:p.offsets[0]-p.start], 214 }) 215 216 for i := 1; i < len(p.offsets); i += 4 { 217 a := p.offsets[i] - p.start 218 b := p.offsets[i+1] - p.start 219 c := p.offsets[i+2] - p.start 220 d := p.offsets[i+3] - p.start 221 222 // Replacer causes allocations. Replace only when necessary. 223 if strings.IndexByte(s[c:d], byte('\\')) >= 0 { 224 *l = append(*l, labels.Label{Name: s[a:b], Value: lvalReplacer.Replace(s[c:d])}) 225 continue 226 } 227 *l = append(*l, labels.Label{Name: s[a:b], Value: s[c:d]}) 228 } 229 230 // Sort labels. We can skip the first entry since the metric name is 231 // already at the right place. 232 sort.Sort((*l)[1:]) 233 234 return s 235} 236 237// nextToken returns the next token from the promlexer. It skips over tabs 238// and spaces. 239func (p *PromParser) nextToken() token { 240 for { 241 if tok := p.l.Lex(); tok != tWhitespace { 242 return tok 243 } 244 } 245} 246 247func parseError(exp string, got token) error { 248 return fmt.Errorf("%s, got %q", exp, got) 249} 250 251// Next advances the parser to the next sample. It returns false if no 252// more samples were read or an error occurred. 253func (p *PromParser) Next() (Entry, error) { 254 var err error 255 256 p.start = p.l.i 257 p.offsets = p.offsets[:0] 258 259 switch t := p.nextToken(); t { 260 case tEOF: 261 return EntryInvalid, io.EOF 262 case tLinebreak: 263 // Allow full blank lines. 264 return p.Next() 265 266 case tHelp, tType: 267 switch t := p.nextToken(); t { 268 case tMName: 269 p.offsets = append(p.offsets, p.l.start, p.l.i) 270 default: 271 return EntryInvalid, parseError("expected metric name after HELP", t) 272 } 273 switch t := p.nextToken(); t { 274 case tText: 275 if len(p.l.buf()) > 1 { 276 p.text = p.l.buf()[1:] 277 } else { 278 p.text = []byte{} 279 } 280 default: 281 return EntryInvalid, parseError("expected text in HELP", t) 282 } 283 switch t { 284 case tType: 285 switch s := yoloString(p.text); s { 286 case "counter": 287 p.mtype = MetricTypeCounter 288 case "gauge": 289 p.mtype = MetricTypeGauge 290 case "histogram": 291 p.mtype = MetricTypeHistogram 292 case "summary": 293 p.mtype = MetricTypeSummary 294 case "untyped": 295 p.mtype = MetricTypeUnknown 296 default: 297 return EntryInvalid, fmt.Errorf("invalid metric type %q", s) 298 } 299 case tHelp: 300 if !utf8.Valid(p.text) { 301 return EntryInvalid, fmt.Errorf("help text is not a valid utf8 string") 302 } 303 } 304 if t := p.nextToken(); t != tLinebreak { 305 return EntryInvalid, parseError("linebreak expected after metadata", t) 306 } 307 switch t { 308 case tHelp: 309 return EntryHelp, nil 310 case tType: 311 return EntryType, nil 312 } 313 case tComment: 314 p.text = p.l.buf() 315 if t := p.nextToken(); t != tLinebreak { 316 return EntryInvalid, parseError("linebreak expected after comment", t) 317 } 318 return EntryComment, nil 319 320 case tMName: 321 p.offsets = append(p.offsets, p.l.i) 322 p.series = p.l.b[p.start:p.l.i] 323 324 t2 := p.nextToken() 325 if t2 == tBraceOpen { 326 if err := p.parseLVals(); err != nil { 327 return EntryInvalid, err 328 } 329 p.series = p.l.b[p.start:p.l.i] 330 t2 = p.nextToken() 331 } 332 if t2 != tValue { 333 return EntryInvalid, parseError("expected value after metric", t) 334 } 335 if p.val, err = strconv.ParseFloat(yoloString(p.l.buf()), 64); err != nil { 336 return EntryInvalid, err 337 } 338 // Ensure canonical NaN value. 339 if math.IsNaN(p.val) { 340 p.val = math.Float64frombits(value.NormalNaN) 341 } 342 p.hasTS = false 343 switch p.nextToken() { 344 case tLinebreak: 345 break 346 case tTimestamp: 347 p.hasTS = true 348 if p.ts, err = strconv.ParseInt(yoloString(p.l.buf()), 10, 64); err != nil { 349 return EntryInvalid, err 350 } 351 if t2 := p.nextToken(); t2 != tLinebreak { 352 return EntryInvalid, parseError("expected next entry after timestamp", t) 353 } 354 default: 355 return EntryInvalid, parseError("expected timestamp or new record", t) 356 } 357 return EntrySeries, nil 358 359 default: 360 err = fmt.Errorf("%q is not a valid start token", t) 361 } 362 return EntryInvalid, err 363} 364 365func (p *PromParser) parseLVals() error { 366 t := p.nextToken() 367 for { 368 switch t { 369 case tBraceClose: 370 return nil 371 case tLName: 372 default: 373 return parseError("expected label name", t) 374 } 375 p.offsets = append(p.offsets, p.l.start, p.l.i) 376 377 if t := p.nextToken(); t != tEqual { 378 return parseError("expected equal", t) 379 } 380 if t := p.nextToken(); t != tLValue { 381 return parseError("expected label value", t) 382 } 383 if !utf8.Valid(p.l.buf()) { 384 return fmt.Errorf("invalid UTF-8 label value") 385 } 386 387 // The promlexer ensures the value string is quoted. Strip first 388 // and last character. 389 p.offsets = append(p.offsets, p.l.start+1, p.l.i-1) 390 391 // Free trailing commas are allowed. 392 if t = p.nextToken(); t == tComma { 393 t = p.nextToken() 394 } 395 } 396} 397 398var lvalReplacer = strings.NewReplacer( 399 `\"`, "\"", 400 `\\`, "\\", 401 `\n`, "\n", 402) 403 404var helpReplacer = strings.NewReplacer( 405 `\\`, "\\", 406 `\n`, "\n", 407) 408 409func yoloString(b []byte) string { 410 return *((*string)(unsafe.Pointer(&b))) 411} 412