1package parser
2
3import (
4	"fmt"
5	"io/ioutil"
6	"strings"
7
8	"github.com/goccy/go-yaml/ast"
9	"github.com/goccy/go-yaml/internal/errors"
10	"github.com/goccy/go-yaml/lexer"
11	"github.com/goccy/go-yaml/token"
12	"golang.org/x/xerrors"
13)
14
15type parser struct{}
16
17func (p *parser) parseMapping(ctx *context) (ast.Node, error) {
18	node := ast.Mapping(ctx.currentToken(), true)
19	ctx.progress(1) // skip MappingStart token
20	for ctx.next() {
21		tk := ctx.currentToken()
22		if tk.Type == token.MappingEndType {
23			node.End = tk
24			break
25		} else if tk.Type == token.CollectEntryType {
26			ctx.progress(1)
27			continue
28		}
29
30		value, err := p.parseMappingValue(ctx)
31		if err != nil {
32			return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node")
33		}
34		mvnode, ok := value.(*ast.MappingValueNode)
35		if !ok {
36			return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken())
37		}
38		node.Values = append(node.Values, mvnode)
39		ctx.progress(1)
40	}
41	return node, nil
42}
43
44func (p *parser) parseSequence(ctx *context) (ast.Node, error) {
45	node := ast.Sequence(ctx.currentToken(), true)
46	ctx.progress(1) // skip SequenceStart token
47	for ctx.next() {
48		tk := ctx.currentToken()
49		if tk.Type == token.SequenceEndType {
50			node.End = tk
51			break
52		} else if tk.Type == token.CollectEntryType {
53			ctx.progress(1)
54			continue
55		}
56
57		value, err := p.parseToken(ctx, tk)
58		if err != nil {
59			return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node")
60		}
61		node.Values = append(node.Values, value)
62		ctx.progress(1)
63	}
64	return node, nil
65}
66
67func (p *parser) parseTag(ctx *context) (ast.Node, error) {
68	tagToken := ctx.currentToken()
69	node := ast.Tag(tagToken)
70	ctx.progress(1) // skip tag token
71	var (
72		value ast.Node
73		err   error
74	)
75	switch token.ReservedTagKeyword(tagToken.Value) {
76	case token.MappingTag,
77		token.OrderedMapTag:
78		value, err = p.parseMapping(ctx)
79	case token.IntegerTag,
80		token.FloatTag,
81		token.StringTag,
82		token.BinaryTag,
83		token.TimestampTag,
84		token.NullTag:
85		typ := ctx.currentToken().Type
86		if typ == token.LiteralType || typ == token.FoldedType {
87			value, err = p.parseLiteral(ctx)
88		} else {
89			value = p.parseScalarValue(ctx.currentToken())
90		}
91	case token.SequenceTag,
92		token.SetTag:
93		err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken)
94	default:
95		// custom tag
96		value, err = p.parseToken(ctx, ctx.currentToken())
97	}
98	if err != nil {
99		return nil, errors.Wrapf(err, "failed to parse tag value")
100	}
101	node.Value = value
102	return node, nil
103}
104
105func (p *parser) removeLeftSideNewLineCharacter(src string) string {
106	// CR or LF or CRLF
107	return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n")
108}
109
110func (p *parser) existsNewLineCharacter(src string) bool {
111	if strings.Index(src, "\n") > 0 {
112		return true
113	}
114	if strings.Index(src, "\r") > 0 {
115		return true
116	}
117	return false
118}
119
120func (p *parser) validateMapKey(tk *token.Token) error {
121	if tk.Type != token.StringType {
122		return nil
123	}
124	origin := p.removeLeftSideNewLineCharacter(tk.Origin)
125	if p.existsNewLineCharacter(origin) {
126		return errors.ErrSyntax("unexpected key name", tk)
127	}
128	return nil
129}
130
131func (p *parser) createNullToken(base *token.Token) *token.Token {
132	pos := *(base.Position)
133	pos.Column++
134	return token.New("null", "null", &pos)
135}
136
137func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) {
138	tk := ctx.currentToken()
139	if tk == nil {
140		nullToken := p.createNullToken(colonToken)
141		ctx.insertToken(ctx.idx, nullToken)
142		return ast.Null(nullToken), nil
143	}
144
145	if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType {
146		// in this case,
147		// ----
148		// key: <value does not defined>
149		// next
150		nullToken := p.createNullToken(colonToken)
151		ctx.insertToken(ctx.idx, nullToken)
152		return ast.Null(nullToken), nil
153	}
154
155	if tk.Position.Column < key.GetToken().Position.Column {
156		// in this case,
157		// ----
158		//   key: <value does not defined>
159		// next
160		nullToken := p.createNullToken(colonToken)
161		ctx.insertToken(ctx.idx, nullToken)
162		return ast.Null(nullToken), nil
163	}
164
165	value, err := p.parseToken(ctx, ctx.currentToken())
166	if err != nil {
167		return nil, errors.Wrapf(err, "failed to parse mapping 'value' node")
168	}
169	return value, nil
170}
171
172func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error {
173	keyColumn := key.GetToken().Position.Column
174	valueColumn := value.GetToken().Position.Column
175	if keyColumn != valueColumn {
176		return nil
177	}
178	if value.Type() != ast.StringType {
179		return nil
180	}
181	ntk := ctx.nextToken()
182	if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) {
183		return errors.ErrSyntax("could not found expected ':' token", value.GetToken())
184	}
185	return nil
186}
187
188func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) {
189	key, err := p.parseMapKey(ctx)
190	if err != nil {
191		return nil, errors.Wrapf(err, "failed to parse map key")
192	}
193	if err := p.validateMapKey(key.GetToken()); err != nil {
194		return nil, errors.Wrapf(err, "validate mapping key error")
195	}
196	ctx.progress(1)          // progress to mapping value token
197	tk := ctx.currentToken() // get mapping value token
198	if tk == nil {
199		return nil, errors.ErrSyntax("unexpected map", key.GetToken())
200	}
201	ctx.progress(1) // progress to value token
202	if err := p.setSameLineCommentIfExists(ctx, key); err != nil {
203		return nil, errors.Wrapf(err, "failed to set same line comment to node")
204	}
205	if key.GetComment() != nil {
206		// if current token is comment, GetComment() is not nil.
207		// then progress to value token
208		ctx.progressIgnoreComment(1)
209	}
210
211	value, err := p.parseMapValue(ctx, key, tk)
212	if err != nil {
213		return nil, errors.Wrapf(err, "failed to parse map value")
214	}
215	if err := p.validateMapValue(ctx, key, value); err != nil {
216		return nil, errors.Wrapf(err, "failed to validate map value")
217	}
218
219	mvnode := ast.MappingValue(tk, key, value)
220	node := ast.Mapping(tk, false, mvnode)
221
222	ntk := ctx.nextNotCommentToken()
223	antk := ctx.afterNextNotCommentToken()
224	for antk != nil && antk.Type == token.MappingValueType &&
225		ntk.Position.Column == key.GetToken().Position.Column {
226		ctx.progressIgnoreComment(1)
227		value, err := p.parseToken(ctx, ctx.currentToken())
228		if err != nil {
229			return nil, errors.Wrapf(err, "failed to parse mapping node")
230		}
231		switch value.Type() {
232		case ast.MappingType:
233			c := value.(*ast.MappingNode)
234			comment := c.GetComment()
235			for idx, v := range c.Values {
236				if idx == 0 && comment != nil {
237					if err := v.SetComment(comment); err != nil {
238						return nil, errors.Wrapf(err, "failed to set comment token to node")
239					}
240				}
241				node.Values = append(node.Values, v)
242			}
243		case ast.MappingValueType:
244			node.Values = append(node.Values, value.(*ast.MappingValueNode))
245		default:
246			return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type())
247		}
248		ntk = ctx.nextNotCommentToken()
249		antk = ctx.afterNextNotCommentToken()
250	}
251	if len(node.Values) == 1 {
252		return mvnode, nil
253	}
254	return node, nil
255}
256
257func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) {
258	tk := ctx.currentToken()
259	sequenceNode := ast.Sequence(tk, false)
260	curColumn := tk.Position.Column
261	for tk.Type == token.SequenceEntryType {
262		ctx.progress(1) // skip sequence token
263		value, err := p.parseToken(ctx, ctx.currentToken())
264		if err != nil {
265			return nil, errors.Wrapf(err, "failed to parse sequence")
266		}
267		sequenceNode.Values = append(sequenceNode.Values, value)
268		tk = ctx.nextNotCommentToken()
269		if tk == nil {
270			break
271		}
272		if tk.Type != token.SequenceEntryType {
273			break
274		}
275		if tk.Position.Column != curColumn {
276			break
277		}
278		ctx.progressIgnoreComment(1)
279	}
280	return sequenceNode, nil
281}
282
283func (p *parser) parseAnchor(ctx *context) (ast.Node, error) {
284	tk := ctx.currentToken()
285	anchor := ast.Anchor(tk)
286	ntk := ctx.nextToken()
287	if ntk == nil {
288		return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk)
289	}
290	ctx.progress(1) // skip anchor token
291	name, err := p.parseToken(ctx, ctx.currentToken())
292	if err != nil {
293		return nil, errors.Wrapf(err, "failed to parser anchor name node")
294	}
295	anchor.Name = name
296	ntk = ctx.nextToken()
297	if ntk == nil {
298		return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken())
299	}
300	ctx.progress(1)
301	value, err := p.parseToken(ctx, ctx.currentToken())
302	if err != nil {
303		return nil, errors.Wrapf(err, "failed to parser anchor name node")
304	}
305	anchor.Value = value
306	return anchor, nil
307}
308
309func (p *parser) parseAlias(ctx *context) (ast.Node, error) {
310	tk := ctx.currentToken()
311	alias := ast.Alias(tk)
312	ntk := ctx.nextToken()
313	if ntk == nil {
314		return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk)
315	}
316	ctx.progress(1) // skip alias token
317	name, err := p.parseToken(ctx, ctx.currentToken())
318	if err != nil {
319		return nil, errors.Wrapf(err, "failed to parser alias name node")
320	}
321	alias.Value = name
322	return alias, nil
323}
324
325func (p *parser) parseMapKey(ctx *context) (ast.Node, error) {
326	tk := ctx.currentToken()
327	if value := p.parseScalarValue(tk); value != nil {
328		return value, nil
329	}
330	switch tk.Type {
331	case token.MergeKeyType:
332		return ast.MergeKey(tk), nil
333	case token.MappingKeyType:
334		return p.parseMappingKey(ctx)
335	}
336	return nil, errors.ErrSyntax("unexpected mapping key", tk)
337}
338
339func (p *parser) parseStringValue(tk *token.Token) ast.Node {
340	switch tk.Type {
341	case token.StringType,
342		token.SingleQuoteType,
343		token.DoubleQuoteType:
344		return ast.String(tk)
345	}
346	return nil
347}
348
349func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) {
350	node := p.parseScalarValue(tk)
351	if node == nil {
352		return nil, nil
353	}
354	if p.isSameLineComment(ctx.nextToken(), node) {
355		ctx.progress(1)
356		if err := p.setSameLineCommentIfExists(ctx, node); err != nil {
357			return nil, errors.Wrapf(err, "failed to set same line comment to node")
358		}
359	}
360	return node, nil
361}
362
363func (p *parser) parseScalarValue(tk *token.Token) ast.Node {
364	if node := p.parseStringValue(tk); node != nil {
365		return node
366	}
367	switch tk.Type {
368	case token.NullType:
369		return ast.Null(tk)
370	case token.BoolType:
371		return ast.Bool(tk)
372	case token.IntegerType,
373		token.BinaryIntegerType,
374		token.OctetIntegerType,
375		token.HexIntegerType:
376		return ast.Integer(tk)
377	case token.FloatType:
378		return ast.Float(tk)
379	case token.InfinityType:
380		return ast.Infinity(tk)
381	case token.NanType:
382		return ast.Nan(tk)
383	}
384	return nil
385}
386
387func (p *parser) parseDirective(ctx *context) (ast.Node, error) {
388	node := ast.Directive(ctx.currentToken())
389	ctx.progress(1) // skip directive token
390	value, err := p.parseToken(ctx, ctx.currentToken())
391	if err != nil {
392		return nil, errors.Wrapf(err, "failed to parse directive value")
393	}
394	node.Value = value
395	ctx.progress(1)
396	tk := ctx.currentToken()
397	if tk == nil {
398		// Since current token is nil, use the previous token to specify
399		// the syntax error location.
400		return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.previousToken())
401	}
402	if tk.Type != token.DocumentHeaderType {
403		return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken())
404	}
405	return node, nil
406}
407
408func (p *parser) parseLiteral(ctx *context) (ast.Node, error) {
409	node := ast.Literal(ctx.currentToken())
410	ctx.progress(1) // skip literal/folded token
411	value, err := p.parseToken(ctx, ctx.currentToken())
412	if err != nil {
413		return nil, errors.Wrapf(err, "failed to parse literal/folded value")
414	}
415	snode, ok := value.(*ast.StringNode)
416	if !ok {
417		return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken())
418	}
419	node.Value = snode
420	return node, nil
421}
422
423func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool {
424	if tk == nil {
425		return false
426	}
427	if tk.Type != token.CommentType {
428		return false
429	}
430	return tk.Position.Line == node.GetToken().Position.Line
431}
432
433func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error {
434	tk := ctx.currentToken()
435	if !p.isSameLineComment(tk, node) {
436		return nil
437	}
438	if err := node.SetComment(tk); err != nil {
439		return errors.Wrapf(err, "failed to set comment token to ast.Node")
440	}
441	return nil
442}
443
444func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) {
445	startTk := ctx.currentToken()
446	ctx.progress(1) // skip document header token
447	body, err := p.parseToken(ctx, ctx.currentToken())
448	if err != nil {
449		return nil, errors.Wrapf(err, "failed to parse document body")
450	}
451	node := ast.Document(startTk, body)
452	if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType {
453		node.End = ntk
454		ctx.progress(1)
455	}
456	return node, nil
457}
458
459func (p *parser) parseComment(ctx *context) (ast.Node, error) {
460	commentTokens := []*token.Token{}
461	for {
462		tk := ctx.currentToken()
463		if tk == nil {
464			break
465		}
466		if tk.Type != token.CommentType {
467			break
468		}
469		commentTokens = append(commentTokens, tk)
470		ctx.progressIgnoreComment(1) // skip comment token
471	}
472	// TODO: support token group. currently merge tokens to one token
473	firstToken := commentTokens[0]
474	values := []string{}
475	origins := []string{}
476	for _, tk := range commentTokens {
477		values = append(values, tk.Value)
478		origins = append(origins, tk.Origin)
479	}
480	firstToken.Value = strings.Join(values, "")
481	firstToken.Value = strings.Join(origins, "")
482	node, err := p.parseToken(ctx, ctx.currentToken())
483	if err != nil {
484		return nil, errors.Wrapf(err, "failed to parse node after comment")
485	}
486	if node == nil {
487		return ast.Comment(firstToken), nil
488	}
489	if err := node.SetComment(firstToken); err != nil {
490		return nil, errors.Wrapf(err, "failed to set comment token to node")
491	}
492	return node, nil
493}
494
495func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) {
496	node := ast.MappingKey(ctx.currentToken())
497	ctx.progress(1) // skip mapping key token
498	value, err := p.parseToken(ctx, ctx.currentToken())
499	if err != nil {
500		return nil, errors.Wrapf(err, "failed to parse map key")
501	}
502	node.Value = value
503	return node, nil
504}
505
506func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) {
507	if tk == nil {
508		return nil, nil
509	}
510	if tk.NextType() == token.MappingValueType {
511		node, err := p.parseMappingValue(ctx)
512		return node, err
513	}
514	node, err := p.parseScalarValueWithComment(ctx, tk)
515	if err != nil {
516		return nil, errors.Wrapf(err, "failed to parse scalar value")
517	}
518	if node != nil {
519		return node, nil
520	}
521	switch tk.Type {
522	case token.CommentType:
523		return p.parseComment(ctx)
524	case token.MappingKeyType:
525		return p.parseMappingKey(ctx)
526	case token.DocumentHeaderType:
527		return p.parseDocument(ctx)
528	case token.MappingStartType:
529		return p.parseMapping(ctx)
530	case token.SequenceStartType:
531		return p.parseSequence(ctx)
532	case token.SequenceEntryType:
533		return p.parseSequenceEntry(ctx)
534	case token.AnchorType:
535		return p.parseAnchor(ctx)
536	case token.AliasType:
537		return p.parseAlias(ctx)
538	case token.DirectiveType:
539		return p.parseDirective(ctx)
540	case token.TagType:
541		return p.parseTag(ctx)
542	case token.LiteralType, token.FoldedType:
543		return p.parseLiteral(ctx)
544	}
545	return nil, nil
546}
547
548func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
549	ctx := newContext(tokens, mode)
550	file := &ast.File{Docs: []*ast.DocumentNode{}}
551	for ctx.next() {
552		node, err := p.parseToken(ctx, ctx.currentToken())
553		if err != nil {
554			return nil, errors.Wrapf(err, "failed to parse")
555		}
556		ctx.progressIgnoreComment(1)
557		if node == nil {
558			continue
559		}
560		if doc, ok := node.(*ast.DocumentNode); ok {
561			file.Docs = append(file.Docs, doc)
562		} else {
563			file.Docs = append(file.Docs, ast.Document(nil, node))
564		}
565	}
566	return file, nil
567}
568
569type Mode uint
570
571const (
572	ParseComments Mode = 1 << iota // parse comments and add them to AST
573)
574
575// ParseBytes parse from byte slice, and returns ast.File
576func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) {
577	tokens := lexer.Tokenize(string(bytes))
578	f, err := Parse(tokens, mode)
579	if err != nil {
580		return nil, errors.Wrapf(err, "failed to parse")
581	}
582	return f, nil
583}
584
585// Parse parse from token instances, and returns ast.File
586func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
587	var p parser
588	f, err := p.parse(tokens, mode)
589	if err != nil {
590		return nil, errors.Wrapf(err, "failed to parse")
591	}
592	return f, nil
593}
594
595// Parse parse from filename, and returns ast.File
596func ParseFile(filename string, mode Mode) (*ast.File, error) {
597	file, err := ioutil.ReadFile(filename)
598	if err != nil {
599		return nil, errors.Wrapf(err, "failed to read file: %s", filename)
600	}
601	f, err := ParseBytes(file, mode)
602	if err != nil {
603		return nil, errors.Wrapf(err, "failed to parse")
604	}
605	f.Name = filename
606	return f, nil
607}
608