1package parser
2
3import (
4	"fmt"
5	"io/ioutil"
6	"strings"
7
8	"github.com/goccy/go-yaml/ast"
9	"github.com/goccy/go-yaml/internal/errors"
10	"github.com/goccy/go-yaml/lexer"
11	"github.com/goccy/go-yaml/token"
12	"golang.org/x/xerrors"
13)
14
15type parser struct{}
16
17func (p *parser) parseMapping(ctx *context) (ast.Node, error) {
18	node := ast.Mapping(ctx.currentToken(), true)
19	ctx.progress(1) // skip MappingStart token
20	for ctx.next() {
21		tk := ctx.currentToken()
22		if tk.Type == token.MappingEndType {
23			node.End = tk
24			break
25		} else if tk.Type == token.CollectEntryType {
26			ctx.progress(1)
27			continue
28		}
29
30		value, err := p.parseMappingValue(ctx)
31		if err != nil {
32			return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node")
33		}
34		mvnode, ok := value.(*ast.MappingValueNode)
35		if !ok {
36			return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken())
37		}
38		node.Values = append(node.Values, mvnode)
39		ctx.progress(1)
40	}
41	return node, nil
42}
43
44func (p *parser) parseSequence(ctx *context) (ast.Node, error) {
45	node := ast.Sequence(ctx.currentToken(), true)
46	ctx.progress(1) // skip SequenceStart token
47	for ctx.next() {
48		tk := ctx.currentToken()
49		if tk.Type == token.SequenceEndType {
50			node.End = tk
51			break
52		} else if tk.Type == token.CollectEntryType {
53			ctx.progress(1)
54			continue
55		}
56
57		value, err := p.parseToken(ctx, tk)
58		if err != nil {
59			return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node")
60		}
61		node.Values = append(node.Values, value)
62		ctx.progress(1)
63	}
64	return node, nil
65}
66
67func (p *parser) parseTag(ctx *context) (ast.Node, error) {
68	tagToken := ctx.currentToken()
69	node := ast.Tag(tagToken)
70	ctx.progress(1) // skip tag token
71	var (
72		value ast.Node
73		err   error
74	)
75	switch token.ReservedTagKeyword(tagToken.Value) {
76	case token.MappingTag,
77		token.OrderedMapTag:
78		value, err = p.parseMapping(ctx)
79	case token.IntegerTag,
80		token.FloatTag,
81		token.StringTag,
82		token.BinaryTag,
83		token.TimestampTag,
84		token.NullTag:
85		typ := ctx.currentToken().Type
86		if typ == token.LiteralType || typ == token.FoldedType {
87			value, err = p.parseLiteral(ctx)
88		} else {
89			value = p.parseScalarValue(ctx.currentToken())
90		}
91	case token.SequenceTag,
92		token.SetTag:
93		err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken)
94	default:
95		// custom tag
96		value, err = p.parseToken(ctx, ctx.currentToken())
97	}
98	if err != nil {
99		return nil, errors.Wrapf(err, "failed to parse tag value")
100	}
101	node.Value = value
102	return node, nil
103}
104
105func (p *parser) removeLeftSideNewLineCharacter(src string) string {
106	// CR or LF or CRLF
107	return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n")
108}
109
110func (p *parser) existsNewLineCharacter(src string) bool {
111	if strings.Index(src, "\n") > 0 {
112		return true
113	}
114	if strings.Index(src, "\r") > 0 {
115		return true
116	}
117	return false
118}
119
120func (p *parser) validateMapKey(tk *token.Token) error {
121	if tk.Type != token.StringType {
122		return nil
123	}
124	origin := p.removeLeftSideNewLineCharacter(tk.Origin)
125	if p.existsNewLineCharacter(origin) {
126		return errors.ErrSyntax("unexpected key name", tk)
127	}
128	return nil
129}
130
131func (p *parser) createNullToken(base *token.Token) *token.Token {
132	pos := *(base.Position)
133	pos.Column++
134	return token.New("null", "null", &pos)
135}
136
137func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) {
138	tk := ctx.currentToken()
139	if tk == nil {
140		nullToken := p.createNullToken(colonToken)
141		ctx.insertToken(ctx.idx, nullToken)
142		return ast.Null(nullToken), nil
143	}
144
145	if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType {
146		// in this case,
147		// ----
148		// key: <value does not defined>
149		// next
150		nullToken := p.createNullToken(colonToken)
151		ctx.insertToken(ctx.idx, nullToken)
152		return ast.Null(nullToken), nil
153	}
154
155	if tk.Position.Column < key.GetToken().Position.Column {
156		// in this case,
157		// ----
158		//   key: <value does not defined>
159		// next
160		nullToken := p.createNullToken(colonToken)
161		ctx.insertToken(ctx.idx, nullToken)
162		return ast.Null(nullToken), nil
163	}
164
165	value, err := p.parseToken(ctx, ctx.currentToken())
166	if err != nil {
167		return nil, errors.Wrapf(err, "failed to parse mapping 'value' node")
168	}
169	return value, nil
170}
171
172func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error {
173	keyColumn := key.GetToken().Position.Column
174	valueColumn := value.GetToken().Position.Column
175	if keyColumn != valueColumn {
176		return nil
177	}
178	if value.Type() != ast.StringType {
179		return nil
180	}
181	ntk := ctx.nextToken()
182	if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) {
183		return errors.ErrSyntax("could not found expected ':' token", value.GetToken())
184	}
185	return nil
186}
187
188func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) {
189	key, err := p.parseMapKey(ctx)
190	if err != nil {
191		return nil, errors.Wrapf(err, "failed to parse map key")
192	}
193	if err := p.validateMapKey(key.GetToken()); err != nil {
194		return nil, errors.Wrapf(err, "validate mapping key error")
195	}
196	ctx.progress(1)          // progress to mapping value token
197	tk := ctx.currentToken() // get mapping value token
198	ctx.progress(1)          // progress to value token
199	if err := p.setSameLineCommentIfExists(ctx, key); err != nil {
200		return nil, errors.Wrapf(err, "failed to set same line comment to node")
201	}
202	if key.GetComment() != nil {
203		// if current token is comment, GetComment() is not nil.
204		// then progress to value token
205		ctx.progressIgnoreComment(1)
206	}
207
208	value, err := p.parseMapValue(ctx, key, tk)
209	if err != nil {
210		return nil, errors.Wrapf(err, "failed to parse map value")
211	}
212	if err := p.validateMapValue(ctx, key, value); err != nil {
213		return nil, errors.Wrapf(err, "failed to validate map value")
214	}
215
216	mvnode := ast.MappingValue(tk, key, value)
217	node := ast.Mapping(tk, false, mvnode)
218
219	ntk := ctx.nextNotCommentToken()
220	antk := ctx.afterNextNotCommentToken()
221	for antk != nil && antk.Type == token.MappingValueType &&
222		ntk.Position.Column == key.GetToken().Position.Column {
223		ctx.progressIgnoreComment(1)
224		value, err := p.parseToken(ctx, ctx.currentToken())
225		if err != nil {
226			return nil, errors.Wrapf(err, "failed to parse mapping node")
227		}
228		switch value.Type() {
229		case ast.MappingType:
230			c := value.(*ast.MappingNode)
231			comment := c.GetComment()
232			for idx, v := range c.Values {
233				if idx == 0 && comment != nil {
234					if err := v.SetComment(comment); err != nil {
235						return nil, errors.Wrapf(err, "failed to set comment token to node")
236					}
237				}
238				node.Values = append(node.Values, v)
239			}
240		case ast.MappingValueType:
241			node.Values = append(node.Values, value.(*ast.MappingValueNode))
242		default:
243			return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type())
244		}
245		ntk = ctx.nextNotCommentToken()
246		antk = ctx.afterNextNotCommentToken()
247	}
248	if len(node.Values) == 1 {
249		return mvnode, nil
250	}
251	return node, nil
252}
253
254func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) {
255	tk := ctx.currentToken()
256	sequenceNode := ast.Sequence(tk, false)
257	curColumn := tk.Position.Column
258	for tk.Type == token.SequenceEntryType {
259		ctx.progress(1) // skip sequence token
260		value, err := p.parseToken(ctx, ctx.currentToken())
261		if err != nil {
262			return nil, errors.Wrapf(err, "failed to parse sequence")
263		}
264		sequenceNode.Values = append(sequenceNode.Values, value)
265		tk = ctx.nextNotCommentToken()
266		if tk == nil {
267			break
268		}
269		if tk.Type != token.SequenceEntryType {
270			break
271		}
272		if tk.Position.Column != curColumn {
273			break
274		}
275		ctx.progressIgnoreComment(1)
276	}
277	return sequenceNode, nil
278}
279
280func (p *parser) parseAnchor(ctx *context) (ast.Node, error) {
281	tk := ctx.currentToken()
282	anchor := ast.Anchor(tk)
283	ntk := ctx.nextToken()
284	if ntk == nil {
285		return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk)
286	}
287	ctx.progress(1) // skip anchor token
288	name, err := p.parseToken(ctx, ctx.currentToken())
289	if err != nil {
290		return nil, errors.Wrapf(err, "failed to parser anchor name node")
291	}
292	anchor.Name = name
293	ntk = ctx.nextToken()
294	if ntk == nil {
295		return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken())
296	}
297	ctx.progress(1)
298	value, err := p.parseToken(ctx, ctx.currentToken())
299	if err != nil {
300		return nil, errors.Wrapf(err, "failed to parser anchor name node")
301	}
302	anchor.Value = value
303	return anchor, nil
304}
305
306func (p *parser) parseAlias(ctx *context) (ast.Node, error) {
307	tk := ctx.currentToken()
308	alias := ast.Alias(tk)
309	ntk := ctx.nextToken()
310	if ntk == nil {
311		return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk)
312	}
313	ctx.progress(1) // skip alias token
314	name, err := p.parseToken(ctx, ctx.currentToken())
315	if err != nil {
316		return nil, errors.Wrapf(err, "failed to parser alias name node")
317	}
318	alias.Value = name
319	return alias, nil
320}
321
322func (p *parser) parseMapKey(ctx *context) (ast.Node, error) {
323	tk := ctx.currentToken()
324	if value := p.parseScalarValue(tk); value != nil {
325		return value, nil
326	}
327	switch tk.Type {
328	case token.MergeKeyType:
329		return ast.MergeKey(tk), nil
330	case token.MappingKeyType:
331		return p.parseMappingKey(ctx)
332	}
333	return nil, errors.ErrSyntax("unexpected mapping key", tk)
334}
335
336func (p *parser) parseStringValue(tk *token.Token) ast.Node {
337	switch tk.Type {
338	case token.StringType,
339		token.SingleQuoteType,
340		token.DoubleQuoteType:
341		return ast.String(tk)
342	}
343	return nil
344}
345
346func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) {
347	node := p.parseScalarValue(tk)
348	if node == nil {
349		return nil, nil
350	}
351	if p.isSameLineComment(ctx.nextToken(), node) {
352		ctx.progress(1)
353		if err := p.setSameLineCommentIfExists(ctx, node); err != nil {
354			return nil, errors.Wrapf(err, "failed to set same line comment to node")
355		}
356	}
357	return node, nil
358}
359
360func (p *parser) parseScalarValue(tk *token.Token) ast.Node {
361	if node := p.parseStringValue(tk); node != nil {
362		return node
363	}
364	switch tk.Type {
365	case token.NullType:
366		return ast.Null(tk)
367	case token.BoolType:
368		return ast.Bool(tk)
369	case token.IntegerType,
370		token.BinaryIntegerType,
371		token.OctetIntegerType,
372		token.HexIntegerType:
373		return ast.Integer(tk)
374	case token.FloatType:
375		return ast.Float(tk)
376	case token.InfinityType:
377		return ast.Infinity(tk)
378	case token.NanType:
379		return ast.Nan(tk)
380	}
381	return nil
382}
383
384func (p *parser) parseDirective(ctx *context) (ast.Node, error) {
385	node := ast.Directive(ctx.currentToken())
386	ctx.progress(1) // skip directive token
387	value, err := p.parseToken(ctx, ctx.currentToken())
388	if err != nil {
389		return nil, errors.Wrapf(err, "failed to parse directive value")
390	}
391	node.Value = value
392	ctx.progress(1)
393	if ctx.currentToken().Type != token.DocumentHeaderType {
394		return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken())
395	}
396	return node, nil
397}
398
399func (p *parser) parseLiteral(ctx *context) (ast.Node, error) {
400	node := ast.Literal(ctx.currentToken())
401	ctx.progress(1) // skip literal/folded token
402	value, err := p.parseToken(ctx, ctx.currentToken())
403	if err != nil {
404		return nil, errors.Wrapf(err, "failed to parse literal/folded value")
405	}
406	snode, ok := value.(*ast.StringNode)
407	if !ok {
408		return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken())
409	}
410	node.Value = snode
411	return node, nil
412}
413
414func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool {
415	if tk == nil {
416		return false
417	}
418	if tk.Type != token.CommentType {
419		return false
420	}
421	return tk.Position.Line == node.GetToken().Position.Line
422}
423
424func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error {
425	tk := ctx.currentToken()
426	if !p.isSameLineComment(tk, node) {
427		return nil
428	}
429	if err := node.SetComment(tk); err != nil {
430		return errors.Wrapf(err, "failed to set comment token to ast.Node")
431	}
432	return nil
433}
434
435func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) {
436	startTk := ctx.currentToken()
437	ctx.progress(1) // skip document header token
438	body, err := p.parseToken(ctx, ctx.currentToken())
439	if err != nil {
440		return nil, errors.Wrapf(err, "failed to parse document body")
441	}
442	node := ast.Document(startTk, body)
443	if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType {
444		node.End = ntk
445		ctx.progress(1)
446	}
447	return node, nil
448}
449
450func (p *parser) parseComment(ctx *context) (ast.Node, error) {
451	commentTokens := []*token.Token{}
452	for {
453		tk := ctx.currentToken()
454		if tk == nil {
455			break
456		}
457		if tk.Type != token.CommentType {
458			break
459		}
460		commentTokens = append(commentTokens, tk)
461		ctx.progressIgnoreComment(1) // skip comment token
462	}
463	// TODO: support token group. currently merge tokens to one token
464	firstToken := commentTokens[0]
465	values := []string{}
466	origins := []string{}
467	for _, tk := range commentTokens {
468		values = append(values, tk.Value)
469		origins = append(origins, tk.Origin)
470	}
471	firstToken.Value = strings.Join(values, "")
472	firstToken.Value = strings.Join(origins, "")
473	node, err := p.parseToken(ctx, ctx.currentToken())
474	if err != nil {
475		return nil, errors.Wrapf(err, "failed to parse node after comment")
476	}
477	if node == nil {
478		return ast.Comment(firstToken), nil
479	}
480	if err := node.SetComment(firstToken); err != nil {
481		return nil, errors.Wrapf(err, "failed to set comment token to node")
482	}
483	return node, nil
484}
485
486func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) {
487	node := ast.MappingKey(ctx.currentToken())
488	ctx.progress(1) // skip mapping key token
489	value, err := p.parseToken(ctx, ctx.currentToken())
490	if err != nil {
491		return nil, errors.Wrapf(err, "failed to parse map key")
492	}
493	node.Value = value
494	return node, nil
495}
496
497func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) {
498	if tk == nil {
499		return nil, nil
500	}
501	if tk.NextType() == token.MappingValueType {
502		node, err := p.parseMappingValue(ctx)
503		return node, err
504	}
505	node, err := p.parseScalarValueWithComment(ctx, tk)
506	if err != nil {
507		return nil, errors.Wrapf(err, "failed to parse scalar value")
508	}
509	if node != nil {
510		return node, nil
511	}
512	switch tk.Type {
513	case token.CommentType:
514		return p.parseComment(ctx)
515	case token.MappingKeyType:
516		return p.parseMappingKey(ctx)
517	case token.DocumentHeaderType:
518		return p.parseDocument(ctx)
519	case token.MappingStartType:
520		return p.parseMapping(ctx)
521	case token.SequenceStartType:
522		return p.parseSequence(ctx)
523	case token.SequenceEntryType:
524		return p.parseSequenceEntry(ctx)
525	case token.AnchorType:
526		return p.parseAnchor(ctx)
527	case token.AliasType:
528		return p.parseAlias(ctx)
529	case token.DirectiveType:
530		return p.parseDirective(ctx)
531	case token.TagType:
532		return p.parseTag(ctx)
533	case token.LiteralType, token.FoldedType:
534		return p.parseLiteral(ctx)
535	}
536	return nil, nil
537}
538
539func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
540	ctx := newContext(tokens, mode)
541	file := &ast.File{Docs: []*ast.DocumentNode{}}
542	for ctx.next() {
543		node, err := p.parseToken(ctx, ctx.currentToken())
544		if err != nil {
545			return nil, errors.Wrapf(err, "failed to parse")
546		}
547		ctx.progressIgnoreComment(1)
548		if node == nil {
549			continue
550		}
551		if doc, ok := node.(*ast.DocumentNode); ok {
552			file.Docs = append(file.Docs, doc)
553		} else {
554			file.Docs = append(file.Docs, ast.Document(nil, node))
555		}
556	}
557	return file, nil
558}
559
560type Mode uint
561
562const (
563	ParseComments Mode = 1 << iota // parse comments and add them to AST
564)
565
566// ParseBytes parse from byte slice, and returns ast.File
567func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) {
568	tokens := lexer.Tokenize(string(bytes))
569	f, err := Parse(tokens, mode)
570	if err != nil {
571		return nil, errors.Wrapf(err, "failed to parse")
572	}
573	return f, nil
574}
575
576// Parse parse from token instances, and returns ast.File
577func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
578	var p parser
579	f, err := p.parse(tokens, mode)
580	if err != nil {
581		return nil, errors.Wrapf(err, "failed to parse")
582	}
583	return f, nil
584}
585
586// Parse parse from filename, and returns ast.File
587func ParseFile(filename string, mode Mode) (*ast.File, error) {
588	file, err := ioutil.ReadFile(filename)
589	if err != nil {
590		return nil, errors.Wrapf(err, "failed to read file: %s", filename)
591	}
592	f, err := ParseBytes(file, mode)
593	if err != nil {
594		return nil, errors.Wrapf(err, "failed to parse")
595	}
596	f.Name = filename
597	return f, nil
598}
599