1package hclwrite
2
3import (
4	"fmt"
5	"sort"
6
7	"github.com/hashicorp/hcl/v2"
8	"github.com/hashicorp/hcl/v2/hclsyntax"
9	"github.com/zclconf/go-cty/cty"
10)
11
12// Our "parser" here is actually not doing any parsing of its own. Instead,
13// it leans on the native parser in hclsyntax, and then uses the source ranges
14// from the AST to partition the raw token sequence to match the raw tokens
15// up to AST nodes.
16//
17// This strategy feels somewhat counter-intuitive, since most of the work the
18// parser does is thrown away here, but this strategy is chosen because the
19// normal parsing work done by hclsyntax is considered to be the "main case",
20// while modifying and re-printing source is more of an edge case, used only
21// in ancillary tools, and so it's good to keep all the main parsing logic
22// with the main case but keep all of the extra complexity of token wrangling
23// out of the main parser, which is already rather complex just serving the
24// use-cases it already serves.
25//
26// If the parsing step produces any errors, the returned File is nil because
27// we can't reliably extract tokens from the partial AST produced by an
28// erroneous parse.
29func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) {
30	file, diags := hclsyntax.ParseConfig(src, filename, start)
31	if diags.HasErrors() {
32		return nil, diags
33	}
34
35	// To do our work here, we use the "native" tokens (those from hclsyntax)
36	// to match against source ranges in the AST, but ultimately produce
37	// slices from our sequence of "writer" tokens, which contain only
38	// *relative* position information that is more appropriate for
39	// transformation/writing use-cases.
40	nativeTokens, diags := hclsyntax.LexConfig(src, filename, start)
41	if diags.HasErrors() {
42		// should never happen, since we would've caught these diags in
43		// the first call above.
44		return nil, diags
45	}
46	writerTokens := writerTokens(nativeTokens)
47
48	from := inputTokens{
49		nativeTokens: nativeTokens,
50		writerTokens: writerTokens,
51	}
52
53	before, root, after := parseBody(file.Body.(*hclsyntax.Body), from)
54	ret := &File{
55		inTree: newInTree(),
56
57		srcBytes: src,
58		body:     root,
59	}
60
61	nodes := ret.inTree.children
62	nodes.Append(before.Tokens())
63	nodes.AppendNode(root)
64	nodes.Append(after.Tokens())
65
66	return ret, diags
67}
68
69type inputTokens struct {
70	nativeTokens hclsyntax.Tokens
71	writerTokens Tokens
72}
73
74func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputTokens) {
75	start, end := partitionTokens(it.nativeTokens, rng)
76	before = it.Slice(0, start)
77	within = it.Slice(start, end)
78	after = it.Slice(end, len(it.nativeTokens))
79	return
80}
81
82func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) {
83	for i, t := range it.writerTokens {
84		if t.Type == ty {
85			return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens))
86		}
87	}
88	panic(fmt.Sprintf("didn't find any token of type %s", ty))
89}
90
91func (it inputTokens) PartitionTypeOk(ty hclsyntax.TokenType) (before, within, after inputTokens, ok bool) {
92	for i, t := range it.writerTokens {
93		if t.Type == ty {
94			return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens)), true
95		}
96	}
97
98	return inputTokens{}, inputTokens{}, inputTokens{}, false
99}
100
101func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) {
102	before, within, after := it.PartitionType(ty)
103	if within.Len() != 1 {
104		panic("PartitionType found more than one token")
105	}
106	return before, within.Tokens()[0], after
107}
108
109// PartitionIncludeComments is like Partition except the returned "within"
110// range includes any lead and line comments associated with the range.
111func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) {
112	start, end := partitionTokens(it.nativeTokens, rng)
113	start = partitionLeadCommentTokens(it.nativeTokens[:start])
114	_, afterNewline := partitionLineEndTokens(it.nativeTokens[end:])
115	end += afterNewline
116
117	before = it.Slice(0, start)
118	within = it.Slice(start, end)
119	after = it.Slice(end, len(it.nativeTokens))
120	return
121
122}
123
124// PartitionBlockItem is similar to PartitionIncludeComments but it returns
125// the comments as separate token sequences so that they can be captured into
126// AST attributes. It makes assumptions that apply only to block items, so
127// should not be used for other constructs.
128func (it inputTokens) PartitionBlockItem(rng hcl.Range) (before, leadComments, within, lineComments, newline, after inputTokens) {
129	before, within, after = it.Partition(rng)
130	before, leadComments = before.PartitionLeadComments()
131	lineComments, newline, after = after.PartitionLineEndTokens()
132	return
133}
134
135func (it inputTokens) PartitionLeadComments() (before, within inputTokens) {
136	start := partitionLeadCommentTokens(it.nativeTokens)
137	before = it.Slice(0, start)
138	within = it.Slice(start, len(it.nativeTokens))
139	return
140}
141
142func (it inputTokens) PartitionLineEndTokens() (comments, newline, after inputTokens) {
143	afterComments, afterNewline := partitionLineEndTokens(it.nativeTokens)
144	comments = it.Slice(0, afterComments)
145	newline = it.Slice(afterComments, afterNewline)
146	after = it.Slice(afterNewline, len(it.nativeTokens))
147	return
148}
149
150func (it inputTokens) Slice(start, end int) inputTokens {
151	// When we slice, we create a new slice with no additional capacity because
152	// we expect that these slices will be mutated in order to insert
153	// new code into the AST, and we want to ensure that a new underlying
154	// array gets allocated in that case, rather than writing into some
155	// following slice and corrupting it.
156	return inputTokens{
157		nativeTokens: it.nativeTokens[start:end:end],
158		writerTokens: it.writerTokens[start:end:end],
159	}
160}
161
162func (it inputTokens) Len() int {
163	return len(it.nativeTokens)
164}
165
166func (it inputTokens) Tokens() Tokens {
167	return it.writerTokens
168}
169
170func (it inputTokens) Types() []hclsyntax.TokenType {
171	ret := make([]hclsyntax.TokenType, len(it.nativeTokens))
172	for i, tok := range it.nativeTokens {
173		ret[i] = tok.Type
174	}
175	return ret
176}
177
178// parseBody locates the given body within the given input tokens and returns
179// the resulting *Body object as well as the tokens that appeared before and
180// after it.
181func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) {
182	before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange)
183
184	// The main AST doesn't retain the original source ordering of the
185	// body items, so we need to reconstruct that ordering by inspecting
186	// their source ranges.
187	nativeItems := make([]hclsyntax.Node, 0, len(nativeBody.Attributes)+len(nativeBody.Blocks))
188	for _, nativeAttr := range nativeBody.Attributes {
189		nativeItems = append(nativeItems, nativeAttr)
190	}
191	for _, nativeBlock := range nativeBody.Blocks {
192		nativeItems = append(nativeItems, nativeBlock)
193	}
194	sort.Sort(nativeNodeSorter{nativeItems})
195
196	body := &Body{
197		inTree: newInTree(),
198		items:  newNodeSet(),
199	}
200
201	remain := within
202	for _, nativeItem := range nativeItems {
203		beforeItem, item, afterItem := parseBodyItem(nativeItem, remain)
204
205		if beforeItem.Len() > 0 {
206			body.AppendUnstructuredTokens(beforeItem.Tokens())
207		}
208		body.appendItemNode(item)
209
210		remain = afterItem
211	}
212
213	if remain.Len() > 0 {
214		body.AppendUnstructuredTokens(remain.Tokens())
215	}
216
217	return before, newNode(body), after
218}
219
220func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) {
221	before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range())
222
223	var item *node
224
225	switch tItem := nativeItem.(type) {
226	case *hclsyntax.Attribute:
227		item = parseAttribute(tItem, within, leadComments, lineComments, newline)
228	case *hclsyntax.Block:
229		item = parseBlock(tItem, within, leadComments, lineComments, newline)
230	default:
231		// should never happen if caller is behaving
232		panic("unsupported native item type")
233	}
234
235	return before, item, after
236}
237
238func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node {
239	attr := &Attribute{
240		inTree: newInTree(),
241	}
242	children := attr.inTree.children
243
244	{
245		cn := newNode(newComments(leadComments.Tokens()))
246		attr.leadComments = cn
247		children.AppendNode(cn)
248	}
249
250	before, nameTokens, from := from.Partition(nativeAttr.NameRange)
251	{
252		children.AppendUnstructuredTokens(before.Tokens())
253		if nameTokens.Len() != 1 {
254			// Should never happen with valid input
255			panic("attribute name is not exactly one token")
256		}
257		token := nameTokens.Tokens()[0]
258		in := newNode(newIdentifier(token))
259		attr.name = in
260		children.AppendNode(in)
261	}
262
263	before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange)
264	children.AppendUnstructuredTokens(before.Tokens())
265	children.AppendUnstructuredTokens(equalsTokens.Tokens())
266
267	before, exprTokens, from := from.Partition(nativeAttr.Expr.Range())
268	{
269		children.AppendUnstructuredTokens(before.Tokens())
270		exprNode := parseExpression(nativeAttr.Expr, exprTokens)
271		attr.expr = exprNode
272		children.AppendNode(exprNode)
273	}
274
275	{
276		cn := newNode(newComments(lineComments.Tokens()))
277		attr.lineComments = cn
278		children.AppendNode(cn)
279	}
280
281	children.AppendUnstructuredTokens(newline.Tokens())
282
283	// Collect any stragglers, though there shouldn't be any
284	children.AppendUnstructuredTokens(from.Tokens())
285
286	return newNode(attr)
287}
288
289func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node {
290	block := &Block{
291		inTree: newInTree(),
292	}
293	children := block.inTree.children
294
295	{
296		cn := newNode(newComments(leadComments.Tokens()))
297		block.leadComments = cn
298		children.AppendNode(cn)
299	}
300
301	before, typeTokens, from := from.Partition(nativeBlock.TypeRange)
302	{
303		children.AppendUnstructuredTokens(before.Tokens())
304		if typeTokens.Len() != 1 {
305			// Should never happen with valid input
306			panic("block type name is not exactly one token")
307		}
308		token := typeTokens.Tokens()[0]
309		in := newNode(newIdentifier(token))
310		block.typeName = in
311		children.AppendNode(in)
312	}
313
314	before, labelsNode, from := parseBlockLabels(nativeBlock, from)
315	block.labels = labelsNode
316	children.AppendNode(labelsNode)
317
318	before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange)
319	children.AppendUnstructuredTokens(before.Tokens())
320	block.open = children.AppendUnstructuredTokens(oBrace.Tokens())
321
322	// We go a bit out of order here: we go hunting for the closing brace
323	// so that we have a delimited body, but then we'll deal with the body
324	// before we actually append the closing brace and any straggling tokens
325	// that appear after it.
326	bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange)
327	before, body, after := parseBody(nativeBlock.Body, bodyTokens)
328	children.AppendUnstructuredTokens(before.Tokens())
329	block.body = body
330	children.AppendNode(body)
331	children.AppendUnstructuredTokens(after.Tokens())
332
333	block.close = children.AppendUnstructuredTokens(cBrace.Tokens())
334
335	// stragglers
336	children.AppendUnstructuredTokens(from.Tokens())
337	if lineComments.Len() > 0 {
338		// blocks don't actually have line comments, so we'll just treat
339		// them as extra stragglers
340		children.AppendUnstructuredTokens(lineComments.Tokens())
341	}
342	children.AppendUnstructuredTokens(newline.Tokens())
343
344	return newNode(block)
345}
346
347func parseBlockLabels(nativeBlock *hclsyntax.Block, from inputTokens) (inputTokens, *node, inputTokens) {
348	labelsObj := newBlockLabels(nil)
349	children := labelsObj.children
350
351	var beforeAll inputTokens
352	for i, rng := range nativeBlock.LabelRanges {
353		var before, labelTokens inputTokens
354		before, labelTokens, from = from.Partition(rng)
355		if i == 0 {
356			beforeAll = before
357		} else {
358			children.AppendUnstructuredTokens(before.Tokens())
359		}
360		tokens := labelTokens.Tokens()
361		var ln *node
362		if len(tokens) == 1 && tokens[0].Type == hclsyntax.TokenIdent {
363			ln = newNode(newIdentifier(tokens[0]))
364		} else {
365			ln = newNode(newQuoted(tokens))
366		}
367		labelsObj.items.Add(ln)
368		children.AppendNode(ln)
369	}
370
371	after := from
372	return beforeAll, newNode(labelsObj), after
373}
374
375func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node {
376	expr := newExpression()
377	children := expr.inTree.children
378
379	nativeVars := nativeExpr.Variables()
380
381	for _, nativeTraversal := range nativeVars {
382		before, traversal, after := parseTraversal(nativeTraversal, from)
383		children.AppendUnstructuredTokens(before.Tokens())
384		children.AppendNode(traversal)
385		expr.absTraversals.Add(traversal)
386		from = after
387	}
388	// Attach any stragglers that don't belong to a traversal to the expression
389	// itself. In an expression with no traversals at all, this is just the
390	// entirety of "from".
391	children.AppendUnstructuredTokens(from.Tokens())
392
393	return newNode(expr)
394}
395
396func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) {
397	traversal := newTraversal()
398	children := traversal.inTree.children
399	before, from, after = from.Partition(nativeTraversal.SourceRange())
400
401	stepAfter := from
402	for _, nativeStep := range nativeTraversal {
403		before, step, after := parseTraversalStep(nativeStep, stepAfter)
404		children.AppendUnstructuredTokens(before.Tokens())
405		children.AppendNode(step)
406		traversal.steps.Add(step)
407		stepAfter = after
408	}
409
410	return before, newNode(traversal), after
411}
412
413func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) {
414	var children *nodes
415	switch tNativeStep := nativeStep.(type) {
416
417	case hcl.TraverseRoot, hcl.TraverseAttr:
418		step := newTraverseName()
419		children = step.inTree.children
420		before, from, after = from.Partition(nativeStep.SourceRange())
421		inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent)
422		name := newIdentifier(token)
423		children.AppendUnstructuredTokens(inBefore.Tokens())
424		step.name = children.Append(name)
425		children.AppendUnstructuredTokens(inAfter.Tokens())
426		return before, newNode(step), after
427
428	case hcl.TraverseIndex:
429		step := newTraverseIndex()
430		children = step.inTree.children
431		before, from, after = from.Partition(nativeStep.SourceRange())
432
433		if inBefore, dot, from, ok := from.PartitionTypeOk(hclsyntax.TokenDot); ok {
434			children.AppendUnstructuredTokens(inBefore.Tokens())
435			children.AppendUnstructuredTokens(dot.Tokens())
436
437			valBefore, valToken, valAfter := from.PartitionTypeSingle(hclsyntax.TokenNumberLit)
438			children.AppendUnstructuredTokens(valBefore.Tokens())
439			key := newNumber(valToken)
440			step.key = children.Append(key)
441			children.AppendUnstructuredTokens(valAfter.Tokens())
442
443			return before, newNode(step), after
444		}
445
446		var inBefore, oBrack, keyTokens, cBrack inputTokens
447		inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack)
448		children.AppendUnstructuredTokens(inBefore.Tokens())
449		children.AppendUnstructuredTokens(oBrack.Tokens())
450		keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack)
451
452		keyVal := tNativeStep.Key
453		switch keyVal.Type() {
454		case cty.String:
455			key := newQuoted(keyTokens.Tokens())
456			step.key = children.Append(key)
457		case cty.Number:
458			valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit)
459			children.AppendUnstructuredTokens(valBefore.Tokens())
460			key := newNumber(valToken)
461			step.key = children.Append(key)
462			children.AppendUnstructuredTokens(valAfter.Tokens())
463		}
464
465		children.AppendUnstructuredTokens(cBrack.Tokens())
466		children.AppendUnstructuredTokens(from.Tokens())
467
468		return before, newNode(step), after
469	default:
470		panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep))
471	}
472
473}
474
475// writerTokens takes a sequence of tokens as produced by the main hclsyntax
476// package and transforms it into an equivalent sequence of tokens using
477// this package's own token model.
478//
479// The resulting list contains the same number of tokens and uses the same
480// indices as the input, allowing the two sets of tokens to be correlated
481// by index.
482func writerTokens(nativeTokens hclsyntax.Tokens) Tokens {
483	// Ultimately we want a slice of token _pointers_, but since we can
484	// predict how much memory we're going to devote to tokens we'll allocate
485	// it all as a single flat buffer and thus give the GC less work to do.
486	tokBuf := make([]Token, len(nativeTokens))
487	var lastByteOffset int
488	for i, mainToken := range nativeTokens {
489		// Create a copy of the bytes so that we can mutate without
490		// corrupting the original token stream.
491		bytes := make([]byte, len(mainToken.Bytes))
492		copy(bytes, mainToken.Bytes)
493
494		tokBuf[i] = Token{
495			Type:  mainToken.Type,
496			Bytes: bytes,
497
498			// We assume here that spaces are always ASCII spaces, since
499			// that's what the scanner also assumes, and thus the number
500			// of bytes skipped is also the number of space characters.
501			SpacesBefore: mainToken.Range.Start.Byte - lastByteOffset,
502		}
503
504		lastByteOffset = mainToken.Range.End.Byte
505	}
506
507	// Now make a slice of pointers into the previous slice.
508	ret := make(Tokens, len(tokBuf))
509	for i := range ret {
510		ret[i] = &tokBuf[i]
511	}
512
513	return ret
514}
515
516// partitionTokens takes a sequence of tokens and a hcl.Range and returns
517// two indices within the token sequence that correspond with the range
518// boundaries, such that the slice operator could be used to produce
519// three token sequences for before, within, and after respectively:
520//
521//     start, end := partitionTokens(toks, rng)
522//     before := toks[:start]
523//     within := toks[start:end]
524//     after := toks[end:]
525//
526// This works best when the range is aligned with token boundaries (e.g.
527// because it was produced in terms of the scanner's result) but if that isn't
528// true then it will make a best effort that may produce strange results at
529// the boundaries.
530//
531// Native hclsyntax tokens are used here, because they contain the necessary
532// absolute position information. However, since writerTokens produces a
533// correlatable sequence of writer tokens, the resulting indices can be
534// used also to index into its result, allowing the partitioning of writer
535// tokens to be driven by the partitioning of native tokens.
536//
537// The tokens are assumed to be in source order and non-overlapping, which
538// will be true if the token sequence from the scanner is used directly.
539func partitionTokens(toks hclsyntax.Tokens, rng hcl.Range) (start, end int) {
540	// We use a linear search here because we assume that in most cases our
541	// target range is close to the beginning of the sequence, and the sequences
542	// are generally small for most reasonable files anyway.
543	for i := 0; ; i++ {
544		if i >= len(toks) {
545			// No tokens for the given range at all!
546			return len(toks), len(toks)
547		}
548
549		if toks[i].Range.Start.Byte >= rng.Start.Byte {
550			start = i
551			break
552		}
553	}
554
555	for i := start; ; i++ {
556		if i >= len(toks) {
557			// The range "hangs off" the end of the token sequence
558			return start, len(toks)
559		}
560
561		if toks[i].Range.Start.Byte >= rng.End.Byte {
562			end = i // end marker is exclusive
563			break
564		}
565	}
566
567	return start, end
568}
569
570// partitionLeadCommentTokens takes a sequence of tokens that is assumed
571// to immediately precede a construct that can have lead comment tokens,
572// and returns the index into that sequence where the lead comments begin.
573//
574// Lead comments are defined as whole lines containing only comment tokens
575// with no blank lines between. If no such lines are found, the returned
576// index will be len(toks).
577func partitionLeadCommentTokens(toks hclsyntax.Tokens) int {
578	// single-line comments (which is what we're interested in here)
579	// consume their trailing newline, so we can just walk backwards
580	// until we stop seeing comment tokens.
581	for i := len(toks) - 1; i >= 0; i-- {
582		if toks[i].Type != hclsyntax.TokenComment {
583			return i + 1
584		}
585	}
586	return 0
587}
588
589// partitionLineEndTokens takes a sequence of tokens that is assumed
590// to immediately follow a construct that can have a line comment, and
591// returns first the index where any line comments end and then second
592// the index immediately after the trailing newline.
593//
594// Line comments are defined as comments that appear immediately after
595// a construct on the same line where its significant tokens ended.
596//
597// Since single-line comment tokens (# and //) include the newline that
598// terminates them, in the presence of these the two returned indices
599// will be the same since the comment itself serves as the line end.
600func partitionLineEndTokens(toks hclsyntax.Tokens) (afterComment, afterNewline int) {
601	for i := 0; i < len(toks); i++ {
602		tok := toks[i]
603		if tok.Type != hclsyntax.TokenComment {
604			switch tok.Type {
605			case hclsyntax.TokenNewline:
606				return i, i + 1
607			case hclsyntax.TokenEOF:
608				// Although this is valid, we mustn't include the EOF
609				// itself as our "newline" or else strange things will
610				// happen when we try to append new items.
611				return i, i
612			default:
613				// If we have well-formed input here then nothing else should be
614				// possible. This path should never happen, because we only try
615				// to extract tokens from the sequence if the parser succeeded,
616				// and it should catch this problem itself.
617				panic("malformed line trailers: expected only comments and newlines")
618			}
619		}
620
621		if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
622			// Newline at the end of a single-line comment serves both as
623			// the end of comments *and* the end of the line.
624			return i + 1, i + 1
625		}
626	}
627	return len(toks), len(toks)
628}
629
630// lexConfig uses the hclsyntax scanner to get a token stream and then
631// rewrites it into this package's token model.
632//
633// Any errors produced during scanning are ignored, so the results of this
634// function should be used with care.
635func lexConfig(src []byte) Tokens {
636	mainTokens, _ := hclsyntax.LexConfig(src, "", hcl.Pos{Byte: 0, Line: 1, Column: 1})
637	return writerTokens(mainTokens)
638}
639