1 2package hclsyntax 3 4import ( 5 "bytes" 6 7 "github.com/hashicorp/hcl2/hcl" 8) 9 10// This file is generated from scan_tokens.rl. DO NOT EDIT. 11%%{ 12 # (except when you are actually in scan_tokens.rl here, so edit away!) 13 14 machine hcltok; 15 write data; 16}%% 17 18func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token { 19 stripData := stripUTF8BOM(data) 20 start.Byte += len(data) - len(stripData) 21 data = stripData 22 23 f := &tokenAccum{ 24 Filename: filename, 25 Bytes: data, 26 Pos: start, 27 StartByte: start.Byte, 28 } 29 30 %%{ 31 include UnicodeDerived "unicode_derived.rl"; 32 33 UTF8Cont = 0x80 .. 0xBF; 34 AnyUTF8 = ( 35 0x00..0x7F | 36 0xC0..0xDF . UTF8Cont | 37 0xE0..0xEF . UTF8Cont . UTF8Cont | 38 0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont 39 ); 40 BrokenUTF8 = any - AnyUTF8; 41 42 NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit); 43 NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.'))); 44 Ident = (ID_Start | '_') (ID_Continue | '-')*; 45 46 # Symbols that just represent themselves are handled as a single rule. 47 SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'"; 48 49 EqualOp = "=="; 50 NotEqual = "!="; 51 GreaterThanEqual = ">="; 52 LessThanEqual = "<="; 53 LogicalAnd = "&&"; 54 LogicalOr = "||"; 55 56 Ellipsis = "..."; 57 FatArrow = "=>"; 58 59 Newline = '\r' ? '\n'; 60 EndOfLine = Newline; 61 62 BeginStringTmpl = '"'; 63 BeginHeredocTmpl = '<<' ('-')? Ident Newline; 64 65 Comment = ( 66 # The :>> operator in these is a "finish-guarded concatenation", 67 # which terminates the sequence on its left when it completes 68 # the sequence on its right. 69 # In the single-line comment cases this is allowing us to make 70 # the trailing EndOfLine optional while still having the overall 71 # pattern terminate. In the multi-line case it ensures that 72 # the first comment in the file ends at the first */, rather than 73 # gobbling up all of the "any*" until the _final_ */ in the file. 74 ("#" (any - EndOfLine)* :>> EndOfLine?) | 75 ("//" (any - EndOfLine)* :>> EndOfLine?) | 76 ("/*" any* :>> "*/") 77 ); 78 79 # Note: hclwrite assumes that only ASCII spaces appear between tokens, 80 # and uses this assumption to recreate the spaces between tokens by 81 # looking at byte offset differences. This means it will produce 82 # incorrect results in the presence of tabs, but that's acceptable 83 # because the canonical style (which hclwrite itself can impose 84 # automatically is to never use tabs). 85 Spaces = (' ' | 0x09)+; 86 87 action beginStringTemplate { 88 token(TokenOQuote); 89 fcall stringTemplate; 90 } 91 92 action endStringTemplate { 93 token(TokenCQuote); 94 fret; 95 } 96 97 action beginHeredocTemplate { 98 token(TokenOHeredoc); 99 // the token is currently the whole heredoc introducer, like 100 // <<EOT or <<-EOT, followed by a newline. We want to extract 101 // just the "EOT" portion that we'll use as the closing marker. 102 103 marker := data[ts+2:te-1] 104 if marker[0] == '-' { 105 marker = marker[1:] 106 } 107 if marker[len(marker)-1] == '\r' { 108 marker = marker[:len(marker)-1] 109 } 110 111 heredocs = append(heredocs, heredocInProgress{ 112 Marker: marker, 113 StartOfLine: true, 114 }) 115 116 fcall heredocTemplate; 117 } 118 119 action heredocLiteralEOL { 120 // This action is called specificially when a heredoc literal 121 // ends with a newline character. 122 123 // This might actually be our end marker. 124 topdoc := &heredocs[len(heredocs)-1] 125 if topdoc.StartOfLine { 126 maybeMarker := bytes.TrimSpace(data[ts:te]) 127 if bytes.Equal(maybeMarker, topdoc.Marker) { 128 // We actually emit two tokens here: the end-of-heredoc 129 // marker first, and then separately the newline that 130 // follows it. This then avoids issues with the closing 131 // marker consuming a newline that would normally be used 132 // to mark the end of an attribute definition. 133 // We might have either a \n sequence or an \r\n sequence 134 // here, so we must handle both. 135 nls := te-1 136 nle := te 137 te-- 138 if data[te-1] == '\r' { 139 // back up one more byte 140 nls-- 141 te-- 142 } 143 token(TokenCHeredoc); 144 ts = nls 145 te = nle 146 token(TokenNewline); 147 heredocs = heredocs[:len(heredocs)-1] 148 fret; 149 } 150 } 151 152 topdoc.StartOfLine = true; 153 token(TokenStringLit); 154 } 155 156 action heredocLiteralMidline { 157 // This action is called when a heredoc literal _doesn't_ end 158 // with a newline character, e.g. because we're about to enter 159 // an interpolation sequence. 160 heredocs[len(heredocs)-1].StartOfLine = false; 161 token(TokenStringLit); 162 } 163 164 action bareTemplateLiteral { 165 token(TokenStringLit); 166 } 167 168 action beginTemplateInterp { 169 token(TokenTemplateInterp); 170 braces++; 171 retBraces = append(retBraces, braces); 172 if len(heredocs) > 0 { 173 heredocs[len(heredocs)-1].StartOfLine = false; 174 } 175 fcall main; 176 } 177 178 action beginTemplateControl { 179 token(TokenTemplateControl); 180 braces++; 181 retBraces = append(retBraces, braces); 182 if len(heredocs) > 0 { 183 heredocs[len(heredocs)-1].StartOfLine = false; 184 } 185 fcall main; 186 } 187 188 action openBrace { 189 token(TokenOBrace); 190 braces++; 191 } 192 193 action closeBrace { 194 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces { 195 token(TokenTemplateSeqEnd); 196 braces--; 197 retBraces = retBraces[0:len(retBraces)-1] 198 fret; 199 } else { 200 token(TokenCBrace); 201 braces--; 202 } 203 } 204 205 action closeTemplateSeqEatWhitespace { 206 // Only consume from the retBraces stack and return if we are at 207 // a suitable brace nesting level, otherwise things will get 208 // confused. (Not entering this branch indicates a syntax error, 209 // which we will catch in the parser.) 210 if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces { 211 token(TokenTemplateSeqEnd); 212 braces--; 213 retBraces = retBraces[0:len(retBraces)-1] 214 fret; 215 } else { 216 // We intentionally generate a TokenTemplateSeqEnd here, 217 // even though the user apparently wanted a brace, because 218 // we want to allow the parser to catch the incorrect use 219 // of a ~} to balance a generic opening brace, rather than 220 // a template sequence. 221 token(TokenTemplateSeqEnd); 222 braces--; 223 } 224 } 225 226 TemplateInterp = "${" ("~")?; 227 TemplateControl = "%{" ("~")?; 228 EndStringTmpl = '"'; 229 NewlineChars = ("\r"|"\n"); 230 NewlineCharsSeq = NewlineChars+; 231 StringLiteralChars = (AnyUTF8 - NewlineChars); 232 TemplateIgnoredNonBrace = (^'{' %{ fhold; }); 233 TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp); 234 TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl); 235 QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\")); 236 TemplateStringLiteral = ( 237 (TemplateNotInterp) | 238 (TemplateNotControl) | 239 (QuotedStringLiteralWithEsc)+ 240 ); 241 HeredocStringLiteral = ( 242 (TemplateNotInterp) | 243 (TemplateNotControl) | 244 (StringLiteralChars - ("$" | '%'))* 245 ); 246 BareStringLiteral = ( 247 (TemplateNotInterp) | 248 (TemplateNotControl) | 249 (StringLiteralChars - ("$" | '%'))* 250 ) Newline?; 251 252 stringTemplate := |* 253 TemplateInterp => beginTemplateInterp; 254 TemplateControl => beginTemplateControl; 255 EndStringTmpl => endStringTemplate; 256 TemplateStringLiteral => { token(TokenQuotedLit); }; 257 NewlineCharsSeq => { token(TokenQuotedNewline); }; 258 AnyUTF8 => { token(TokenInvalid); }; 259 BrokenUTF8 => { token(TokenBadUTF8); }; 260 *|; 261 262 heredocTemplate := |* 263 TemplateInterp => beginTemplateInterp; 264 TemplateControl => beginTemplateControl; 265 HeredocStringLiteral EndOfLine => heredocLiteralEOL; 266 HeredocStringLiteral => heredocLiteralMidline; 267 BrokenUTF8 => { token(TokenBadUTF8); }; 268 *|; 269 270 bareTemplate := |* 271 TemplateInterp => beginTemplateInterp; 272 TemplateControl => beginTemplateControl; 273 BareStringLiteral => bareTemplateLiteral; 274 BrokenUTF8 => { token(TokenBadUTF8); }; 275 *|; 276 277 identOnly := |* 278 Ident => { token(TokenIdent) }; 279 BrokenUTF8 => { token(TokenBadUTF8) }; 280 AnyUTF8 => { token(TokenInvalid) }; 281 *|; 282 283 main := |* 284 Spaces => {}; 285 NumberLit => { token(TokenNumberLit) }; 286 Ident => { token(TokenIdent) }; 287 288 Comment => { token(TokenComment) }; 289 Newline => { token(TokenNewline) }; 290 291 EqualOp => { token(TokenEqualOp); }; 292 NotEqual => { token(TokenNotEqual); }; 293 GreaterThanEqual => { token(TokenGreaterThanEq); }; 294 LessThanEqual => { token(TokenLessThanEq); }; 295 LogicalAnd => { token(TokenAnd); }; 296 LogicalOr => { token(TokenOr); }; 297 Ellipsis => { token(TokenEllipsis); }; 298 FatArrow => { token(TokenFatArrow); }; 299 SelfToken => { selfToken() }; 300 301 "{" => openBrace; 302 "}" => closeBrace; 303 304 "~}" => closeTemplateSeqEatWhitespace; 305 306 BeginStringTmpl => beginStringTemplate; 307 BeginHeredocTmpl => beginHeredocTemplate; 308 309 BrokenUTF8 => { token(TokenBadUTF8) }; 310 AnyUTF8 => { token(TokenInvalid) }; 311 *|; 312 313 }%% 314 315 // Ragel state 316 p := 0 // "Pointer" into data 317 pe := len(data) // End-of-data "pointer" 318 ts := 0 319 te := 0 320 act := 0 321 eof := pe 322 var stack []int 323 var top int 324 325 var cs int // current state 326 switch mode { 327 case scanNormal: 328 cs = hcltok_en_main 329 case scanTemplate: 330 cs = hcltok_en_bareTemplate 331 case scanIdentOnly: 332 cs = hcltok_en_identOnly 333 default: 334 panic("invalid scanMode") 335 } 336 337 braces := 0 338 var retBraces []int // stack of brace levels that cause us to use fret 339 var heredocs []heredocInProgress // stack of heredocs we're currently processing 340 341 %%{ 342 prepush { 343 stack = append(stack, 0); 344 } 345 postpop { 346 stack = stack[:len(stack)-1]; 347 } 348 }%% 349 350 // Make Go compiler happy 351 _ = ts 352 _ = te 353 _ = act 354 _ = eof 355 356 token := func (ty TokenType) { 357 f.emitToken(ty, ts, te) 358 } 359 selfToken := func () { 360 b := data[ts:te] 361 if len(b) != 1 { 362 // should never happen 363 panic("selfToken only works for single-character tokens") 364 } 365 f.emitToken(TokenType(b[0]), ts, te) 366 } 367 368 %%{ 369 write init nocs; 370 write exec; 371 }%% 372 373 // If we fall out here without being in a final state then we've 374 // encountered something that the scanner can't match, which we'll 375 // deal with as an invalid. 376 if cs < hcltok_first_final { 377 if mode == scanTemplate && len(stack) == 0 { 378 // If we're scanning a bare template then any straggling 379 // top-level stuff is actually literal string, rather than 380 // invalid. This handles the case where the template ends 381 // with a single "$" or "%", which trips us up because we 382 // want to see another character to decide if it's a sequence 383 // or an escape. 384 f.emitToken(TokenStringLit, ts, len(data)) 385 } else { 386 f.emitToken(TokenInvalid, ts, len(data)) 387 } 388 } 389 390 // We always emit a synthetic EOF token at the end, since it gives the 391 // parser position information for an "unexpected EOF" diagnostic. 392 f.emitToken(TokenEOF, len(data), len(data)) 393 394 return f.Tokens 395} 396