1package hclwrite 2 3import ( 4 "fmt" 5 "sort" 6 7 "github.com/hashicorp/hcl/v2" 8 "github.com/hashicorp/hcl/v2/hclsyntax" 9 "github.com/zclconf/go-cty/cty" 10) 11 12// Our "parser" here is actually not doing any parsing of its own. Instead, 13// it leans on the native parser in hclsyntax, and then uses the source ranges 14// from the AST to partition the raw token sequence to match the raw tokens 15// up to AST nodes. 16// 17// This strategy feels somewhat counter-intuitive, since most of the work the 18// parser does is thrown away here, but this strategy is chosen because the 19// normal parsing work done by hclsyntax is considered to be the "main case", 20// while modifying and re-printing source is more of an edge case, used only 21// in ancillary tools, and so it's good to keep all the main parsing logic 22// with the main case but keep all of the extra complexity of token wrangling 23// out of the main parser, which is already rather complex just serving the 24// use-cases it already serves. 25// 26// If the parsing step produces any errors, the returned File is nil because 27// we can't reliably extract tokens from the partial AST produced by an 28// erroneous parse. 29func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) { 30 file, diags := hclsyntax.ParseConfig(src, filename, start) 31 if diags.HasErrors() { 32 return nil, diags 33 } 34 35 // To do our work here, we use the "native" tokens (those from hclsyntax) 36 // to match against source ranges in the AST, but ultimately produce 37 // slices from our sequence of "writer" tokens, which contain only 38 // *relative* position information that is more appropriate for 39 // transformation/writing use-cases. 40 nativeTokens, diags := hclsyntax.LexConfig(src, filename, start) 41 if diags.HasErrors() { 42 // should never happen, since we would've caught these diags in 43 // the first call above. 44 return nil, diags 45 } 46 writerTokens := writerTokens(nativeTokens) 47 48 from := inputTokens{ 49 nativeTokens: nativeTokens, 50 writerTokens: writerTokens, 51 } 52 53 before, root, after := parseBody(file.Body.(*hclsyntax.Body), from) 54 ret := &File{ 55 inTree: newInTree(), 56 57 srcBytes: src, 58 body: root, 59 } 60 61 nodes := ret.inTree.children 62 nodes.Append(before.Tokens()) 63 nodes.AppendNode(root) 64 nodes.Append(after.Tokens()) 65 66 return ret, diags 67} 68 69type inputTokens struct { 70 nativeTokens hclsyntax.Tokens 71 writerTokens Tokens 72} 73 74func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputTokens) { 75 start, end := partitionTokens(it.nativeTokens, rng) 76 before = it.Slice(0, start) 77 within = it.Slice(start, end) 78 after = it.Slice(end, len(it.nativeTokens)) 79 return 80} 81 82func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) { 83 for i, t := range it.writerTokens { 84 if t.Type == ty { 85 return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens)) 86 } 87 } 88 panic(fmt.Sprintf("didn't find any token of type %s", ty)) 89} 90 91func (it inputTokens) PartitionTypeOk(ty hclsyntax.TokenType) (before, within, after inputTokens, ok bool) { 92 for i, t := range it.writerTokens { 93 if t.Type == ty { 94 return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens)), true 95 } 96 } 97 98 return inputTokens{}, inputTokens{}, inputTokens{}, false 99} 100 101func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) { 102 before, within, after := it.PartitionType(ty) 103 if within.Len() != 1 { 104 panic("PartitionType found more than one token") 105 } 106 return before, within.Tokens()[0], after 107} 108 109// PartitionIncludeComments is like Partition except the returned "within" 110// range includes any lead and line comments associated with the range. 111func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) { 112 start, end := partitionTokens(it.nativeTokens, rng) 113 start = partitionLeadCommentTokens(it.nativeTokens[:start]) 114 _, afterNewline := partitionLineEndTokens(it.nativeTokens[end:]) 115 end += afterNewline 116 117 before = it.Slice(0, start) 118 within = it.Slice(start, end) 119 after = it.Slice(end, len(it.nativeTokens)) 120 return 121 122} 123 124// PartitionBlockItem is similar to PartitionIncludeComments but it returns 125// the comments as separate token sequences so that they can be captured into 126// AST attributes. It makes assumptions that apply only to block items, so 127// should not be used for other constructs. 128func (it inputTokens) PartitionBlockItem(rng hcl.Range) (before, leadComments, within, lineComments, newline, after inputTokens) { 129 before, within, after = it.Partition(rng) 130 before, leadComments = before.PartitionLeadComments() 131 lineComments, newline, after = after.PartitionLineEndTokens() 132 return 133} 134 135func (it inputTokens) PartitionLeadComments() (before, within inputTokens) { 136 start := partitionLeadCommentTokens(it.nativeTokens) 137 before = it.Slice(0, start) 138 within = it.Slice(start, len(it.nativeTokens)) 139 return 140} 141 142func (it inputTokens) PartitionLineEndTokens() (comments, newline, after inputTokens) { 143 afterComments, afterNewline := partitionLineEndTokens(it.nativeTokens) 144 comments = it.Slice(0, afterComments) 145 newline = it.Slice(afterComments, afterNewline) 146 after = it.Slice(afterNewline, len(it.nativeTokens)) 147 return 148} 149 150func (it inputTokens) Slice(start, end int) inputTokens { 151 // When we slice, we create a new slice with no additional capacity because 152 // we expect that these slices will be mutated in order to insert 153 // new code into the AST, and we want to ensure that a new underlying 154 // array gets allocated in that case, rather than writing into some 155 // following slice and corrupting it. 156 return inputTokens{ 157 nativeTokens: it.nativeTokens[start:end:end], 158 writerTokens: it.writerTokens[start:end:end], 159 } 160} 161 162func (it inputTokens) Len() int { 163 return len(it.nativeTokens) 164} 165 166func (it inputTokens) Tokens() Tokens { 167 return it.writerTokens 168} 169 170func (it inputTokens) Types() []hclsyntax.TokenType { 171 ret := make([]hclsyntax.TokenType, len(it.nativeTokens)) 172 for i, tok := range it.nativeTokens { 173 ret[i] = tok.Type 174 } 175 return ret 176} 177 178// parseBody locates the given body within the given input tokens and returns 179// the resulting *Body object as well as the tokens that appeared before and 180// after it. 181func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) { 182 before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange) 183 184 // The main AST doesn't retain the original source ordering of the 185 // body items, so we need to reconstruct that ordering by inspecting 186 // their source ranges. 187 nativeItems := make([]hclsyntax.Node, 0, len(nativeBody.Attributes)+len(nativeBody.Blocks)) 188 for _, nativeAttr := range nativeBody.Attributes { 189 nativeItems = append(nativeItems, nativeAttr) 190 } 191 for _, nativeBlock := range nativeBody.Blocks { 192 nativeItems = append(nativeItems, nativeBlock) 193 } 194 sort.Sort(nativeNodeSorter{nativeItems}) 195 196 body := &Body{ 197 inTree: newInTree(), 198 items: newNodeSet(), 199 } 200 201 remain := within 202 for _, nativeItem := range nativeItems { 203 beforeItem, item, afterItem := parseBodyItem(nativeItem, remain) 204 205 if beforeItem.Len() > 0 { 206 body.AppendUnstructuredTokens(beforeItem.Tokens()) 207 } 208 body.appendItemNode(item) 209 210 remain = afterItem 211 } 212 213 if remain.Len() > 0 { 214 body.AppendUnstructuredTokens(remain.Tokens()) 215 } 216 217 return before, newNode(body), after 218} 219 220func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) { 221 before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range()) 222 223 var item *node 224 225 switch tItem := nativeItem.(type) { 226 case *hclsyntax.Attribute: 227 item = parseAttribute(tItem, within, leadComments, lineComments, newline) 228 case *hclsyntax.Block: 229 item = parseBlock(tItem, within, leadComments, lineComments, newline) 230 default: 231 // should never happen if caller is behaving 232 panic("unsupported native item type") 233 } 234 235 return before, item, after 236} 237 238func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node { 239 attr := &Attribute{ 240 inTree: newInTree(), 241 } 242 children := attr.inTree.children 243 244 { 245 cn := newNode(newComments(leadComments.Tokens())) 246 attr.leadComments = cn 247 children.AppendNode(cn) 248 } 249 250 before, nameTokens, from := from.Partition(nativeAttr.NameRange) 251 { 252 children.AppendUnstructuredTokens(before.Tokens()) 253 if nameTokens.Len() != 1 { 254 // Should never happen with valid input 255 panic("attribute name is not exactly one token") 256 } 257 token := nameTokens.Tokens()[0] 258 in := newNode(newIdentifier(token)) 259 attr.name = in 260 children.AppendNode(in) 261 } 262 263 before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange) 264 children.AppendUnstructuredTokens(before.Tokens()) 265 children.AppendUnstructuredTokens(equalsTokens.Tokens()) 266 267 before, exprTokens, from := from.Partition(nativeAttr.Expr.Range()) 268 { 269 children.AppendUnstructuredTokens(before.Tokens()) 270 exprNode := parseExpression(nativeAttr.Expr, exprTokens) 271 attr.expr = exprNode 272 children.AppendNode(exprNode) 273 } 274 275 { 276 cn := newNode(newComments(lineComments.Tokens())) 277 attr.lineComments = cn 278 children.AppendNode(cn) 279 } 280 281 children.AppendUnstructuredTokens(newline.Tokens()) 282 283 // Collect any stragglers, though there shouldn't be any 284 children.AppendUnstructuredTokens(from.Tokens()) 285 286 return newNode(attr) 287} 288 289func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node { 290 block := &Block{ 291 inTree: newInTree(), 292 } 293 children := block.inTree.children 294 295 { 296 cn := newNode(newComments(leadComments.Tokens())) 297 block.leadComments = cn 298 children.AppendNode(cn) 299 } 300 301 before, typeTokens, from := from.Partition(nativeBlock.TypeRange) 302 { 303 children.AppendUnstructuredTokens(before.Tokens()) 304 if typeTokens.Len() != 1 { 305 // Should never happen with valid input 306 panic("block type name is not exactly one token") 307 } 308 token := typeTokens.Tokens()[0] 309 in := newNode(newIdentifier(token)) 310 block.typeName = in 311 children.AppendNode(in) 312 } 313 314 before, labelsNode, from := parseBlockLabels(nativeBlock, from) 315 block.labels = labelsNode 316 children.AppendNode(labelsNode) 317 318 before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange) 319 children.AppendUnstructuredTokens(before.Tokens()) 320 block.open = children.AppendUnstructuredTokens(oBrace.Tokens()) 321 322 // We go a bit out of order here: we go hunting for the closing brace 323 // so that we have a delimited body, but then we'll deal with the body 324 // before we actually append the closing brace and any straggling tokens 325 // that appear after it. 326 bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange) 327 before, body, after := parseBody(nativeBlock.Body, bodyTokens) 328 children.AppendUnstructuredTokens(before.Tokens()) 329 block.body = body 330 children.AppendNode(body) 331 children.AppendUnstructuredTokens(after.Tokens()) 332 333 block.close = children.AppendUnstructuredTokens(cBrace.Tokens()) 334 335 // stragglers 336 children.AppendUnstructuredTokens(from.Tokens()) 337 if lineComments.Len() > 0 { 338 // blocks don't actually have line comments, so we'll just treat 339 // them as extra stragglers 340 children.AppendUnstructuredTokens(lineComments.Tokens()) 341 } 342 children.AppendUnstructuredTokens(newline.Tokens()) 343 344 return newNode(block) 345} 346 347func parseBlockLabels(nativeBlock *hclsyntax.Block, from inputTokens) (inputTokens, *node, inputTokens) { 348 labelsObj := newBlockLabels(nil) 349 children := labelsObj.children 350 351 var beforeAll inputTokens 352 for i, rng := range nativeBlock.LabelRanges { 353 var before, labelTokens inputTokens 354 before, labelTokens, from = from.Partition(rng) 355 if i == 0 { 356 beforeAll = before 357 } else { 358 children.AppendUnstructuredTokens(before.Tokens()) 359 } 360 tokens := labelTokens.Tokens() 361 var ln *node 362 if len(tokens) == 1 && tokens[0].Type == hclsyntax.TokenIdent { 363 ln = newNode(newIdentifier(tokens[0])) 364 } else { 365 ln = newNode(newQuoted(tokens)) 366 } 367 labelsObj.items.Add(ln) 368 children.AppendNode(ln) 369 } 370 371 after := from 372 return beforeAll, newNode(labelsObj), after 373} 374 375func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node { 376 expr := newExpression() 377 children := expr.inTree.children 378 379 nativeVars := nativeExpr.Variables() 380 381 for _, nativeTraversal := range nativeVars { 382 before, traversal, after := parseTraversal(nativeTraversal, from) 383 children.AppendUnstructuredTokens(before.Tokens()) 384 children.AppendNode(traversal) 385 expr.absTraversals.Add(traversal) 386 from = after 387 } 388 // Attach any stragglers that don't belong to a traversal to the expression 389 // itself. In an expression with no traversals at all, this is just the 390 // entirety of "from". 391 children.AppendUnstructuredTokens(from.Tokens()) 392 393 return newNode(expr) 394} 395 396func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) { 397 traversal := newTraversal() 398 children := traversal.inTree.children 399 before, from, after = from.Partition(nativeTraversal.SourceRange()) 400 401 stepAfter := from 402 for _, nativeStep := range nativeTraversal { 403 before, step, after := parseTraversalStep(nativeStep, stepAfter) 404 children.AppendUnstructuredTokens(before.Tokens()) 405 children.AppendNode(step) 406 traversal.steps.Add(step) 407 stepAfter = after 408 } 409 410 return before, newNode(traversal), after 411} 412 413func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) { 414 var children *nodes 415 switch tNativeStep := nativeStep.(type) { 416 417 case hcl.TraverseRoot, hcl.TraverseAttr: 418 step := newTraverseName() 419 children = step.inTree.children 420 before, from, after = from.Partition(nativeStep.SourceRange()) 421 inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent) 422 name := newIdentifier(token) 423 children.AppendUnstructuredTokens(inBefore.Tokens()) 424 step.name = children.Append(name) 425 children.AppendUnstructuredTokens(inAfter.Tokens()) 426 return before, newNode(step), after 427 428 case hcl.TraverseIndex: 429 step := newTraverseIndex() 430 children = step.inTree.children 431 before, from, after = from.Partition(nativeStep.SourceRange()) 432 433 if inBefore, dot, from, ok := from.PartitionTypeOk(hclsyntax.TokenDot); ok { 434 children.AppendUnstructuredTokens(inBefore.Tokens()) 435 children.AppendUnstructuredTokens(dot.Tokens()) 436 437 valBefore, valToken, valAfter := from.PartitionTypeSingle(hclsyntax.TokenNumberLit) 438 children.AppendUnstructuredTokens(valBefore.Tokens()) 439 key := newNumber(valToken) 440 step.key = children.Append(key) 441 children.AppendUnstructuredTokens(valAfter.Tokens()) 442 443 return before, newNode(step), after 444 } 445 446 var inBefore, oBrack, keyTokens, cBrack inputTokens 447 inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack) 448 children.AppendUnstructuredTokens(inBefore.Tokens()) 449 children.AppendUnstructuredTokens(oBrack.Tokens()) 450 keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack) 451 452 keyVal := tNativeStep.Key 453 switch keyVal.Type() { 454 case cty.String: 455 key := newQuoted(keyTokens.Tokens()) 456 step.key = children.Append(key) 457 case cty.Number: 458 valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit) 459 children.AppendUnstructuredTokens(valBefore.Tokens()) 460 key := newNumber(valToken) 461 step.key = children.Append(key) 462 children.AppendUnstructuredTokens(valAfter.Tokens()) 463 } 464 465 children.AppendUnstructuredTokens(cBrack.Tokens()) 466 children.AppendUnstructuredTokens(from.Tokens()) 467 468 return before, newNode(step), after 469 default: 470 panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep)) 471 } 472 473} 474 475// writerTokens takes a sequence of tokens as produced by the main hclsyntax 476// package and transforms it into an equivalent sequence of tokens using 477// this package's own token model. 478// 479// The resulting list contains the same number of tokens and uses the same 480// indices as the input, allowing the two sets of tokens to be correlated 481// by index. 482func writerTokens(nativeTokens hclsyntax.Tokens) Tokens { 483 // Ultimately we want a slice of token _pointers_, but since we can 484 // predict how much memory we're going to devote to tokens we'll allocate 485 // it all as a single flat buffer and thus give the GC less work to do. 486 tokBuf := make([]Token, len(nativeTokens)) 487 var lastByteOffset int 488 for i, mainToken := range nativeTokens { 489 // Create a copy of the bytes so that we can mutate without 490 // corrupting the original token stream. 491 bytes := make([]byte, len(mainToken.Bytes)) 492 copy(bytes, mainToken.Bytes) 493 494 tokBuf[i] = Token{ 495 Type: mainToken.Type, 496 Bytes: bytes, 497 498 // We assume here that spaces are always ASCII spaces, since 499 // that's what the scanner also assumes, and thus the number 500 // of bytes skipped is also the number of space characters. 501 SpacesBefore: mainToken.Range.Start.Byte - lastByteOffset, 502 } 503 504 lastByteOffset = mainToken.Range.End.Byte 505 } 506 507 // Now make a slice of pointers into the previous slice. 508 ret := make(Tokens, len(tokBuf)) 509 for i := range ret { 510 ret[i] = &tokBuf[i] 511 } 512 513 return ret 514} 515 516// partitionTokens takes a sequence of tokens and a hcl.Range and returns 517// two indices within the token sequence that correspond with the range 518// boundaries, such that the slice operator could be used to produce 519// three token sequences for before, within, and after respectively: 520// 521// start, end := partitionTokens(toks, rng) 522// before := toks[:start] 523// within := toks[start:end] 524// after := toks[end:] 525// 526// This works best when the range is aligned with token boundaries (e.g. 527// because it was produced in terms of the scanner's result) but if that isn't 528// true then it will make a best effort that may produce strange results at 529// the boundaries. 530// 531// Native hclsyntax tokens are used here, because they contain the necessary 532// absolute position information. However, since writerTokens produces a 533// correlatable sequence of writer tokens, the resulting indices can be 534// used also to index into its result, allowing the partitioning of writer 535// tokens to be driven by the partitioning of native tokens. 536// 537// The tokens are assumed to be in source order and non-overlapping, which 538// will be true if the token sequence from the scanner is used directly. 539func partitionTokens(toks hclsyntax.Tokens, rng hcl.Range) (start, end int) { 540 // We use a linear search here because we assume that in most cases our 541 // target range is close to the beginning of the sequence, and the sequences 542 // are generally small for most reasonable files anyway. 543 for i := 0; ; i++ { 544 if i >= len(toks) { 545 // No tokens for the given range at all! 546 return len(toks), len(toks) 547 } 548 549 if toks[i].Range.Start.Byte >= rng.Start.Byte { 550 start = i 551 break 552 } 553 } 554 555 for i := start; ; i++ { 556 if i >= len(toks) { 557 // The range "hangs off" the end of the token sequence 558 return start, len(toks) 559 } 560 561 if toks[i].Range.Start.Byte >= rng.End.Byte { 562 end = i // end marker is exclusive 563 break 564 } 565 } 566 567 return start, end 568} 569 570// partitionLeadCommentTokens takes a sequence of tokens that is assumed 571// to immediately precede a construct that can have lead comment tokens, 572// and returns the index into that sequence where the lead comments begin. 573// 574// Lead comments are defined as whole lines containing only comment tokens 575// with no blank lines between. If no such lines are found, the returned 576// index will be len(toks). 577func partitionLeadCommentTokens(toks hclsyntax.Tokens) int { 578 // single-line comments (which is what we're interested in here) 579 // consume their trailing newline, so we can just walk backwards 580 // until we stop seeing comment tokens. 581 for i := len(toks) - 1; i >= 0; i-- { 582 if toks[i].Type != hclsyntax.TokenComment { 583 return i + 1 584 } 585 } 586 return 0 587} 588 589// partitionLineEndTokens takes a sequence of tokens that is assumed 590// to immediately follow a construct that can have a line comment, and 591// returns first the index where any line comments end and then second 592// the index immediately after the trailing newline. 593// 594// Line comments are defined as comments that appear immediately after 595// a construct on the same line where its significant tokens ended. 596// 597// Since single-line comment tokens (# and //) include the newline that 598// terminates them, in the presence of these the two returned indices 599// will be the same since the comment itself serves as the line end. 600func partitionLineEndTokens(toks hclsyntax.Tokens) (afterComment, afterNewline int) { 601 for i := 0; i < len(toks); i++ { 602 tok := toks[i] 603 if tok.Type != hclsyntax.TokenComment { 604 switch tok.Type { 605 case hclsyntax.TokenNewline: 606 return i, i + 1 607 case hclsyntax.TokenEOF: 608 // Although this is valid, we mustn't include the EOF 609 // itself as our "newline" or else strange things will 610 // happen when we try to append new items. 611 return i, i 612 default: 613 // If we have well-formed input here then nothing else should be 614 // possible. This path should never happen, because we only try 615 // to extract tokens from the sequence if the parser succeeded, 616 // and it should catch this problem itself. 617 panic("malformed line trailers: expected only comments and newlines") 618 } 619 } 620 621 if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' { 622 // Newline at the end of a single-line comment serves both as 623 // the end of comments *and* the end of the line. 624 return i + 1, i + 1 625 } 626 } 627 return len(toks), len(toks) 628} 629 630// lexConfig uses the hclsyntax scanner to get a token stream and then 631// rewrites it into this package's token model. 632// 633// Any errors produced during scanning are ignored, so the results of this 634// function should be used with care. 635func lexConfig(src []byte) Tokens { 636 mainTokens, _ := hclsyntax.LexConfig(src, "", hcl.Pos{Byte: 0, Line: 1, Column: 1}) 637 return writerTokens(mainTokens) 638} 639