1package parser 2 3import ( 4 "fmt" 5 "io/ioutil" 6 "strings" 7 8 "github.com/goccy/go-yaml/ast" 9 "github.com/goccy/go-yaml/internal/errors" 10 "github.com/goccy/go-yaml/lexer" 11 "github.com/goccy/go-yaml/token" 12 "golang.org/x/xerrors" 13) 14 15type parser struct{} 16 17func (p *parser) parseMapping(ctx *context) (ast.Node, error) { 18 node := ast.Mapping(ctx.currentToken(), true) 19 ctx.progress(1) // skip MappingStart token 20 for ctx.next() { 21 tk := ctx.currentToken() 22 if tk.Type == token.MappingEndType { 23 node.End = tk 24 break 25 } else if tk.Type == token.CollectEntryType { 26 ctx.progress(1) 27 continue 28 } 29 30 value, err := p.parseMappingValue(ctx) 31 if err != nil { 32 return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node") 33 } 34 mvnode, ok := value.(*ast.MappingValueNode) 35 if !ok { 36 return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken()) 37 } 38 node.Values = append(node.Values, mvnode) 39 ctx.progress(1) 40 } 41 return node, nil 42} 43 44func (p *parser) parseSequence(ctx *context) (ast.Node, error) { 45 node := ast.Sequence(ctx.currentToken(), true) 46 ctx.progress(1) // skip SequenceStart token 47 for ctx.next() { 48 tk := ctx.currentToken() 49 if tk.Type == token.SequenceEndType { 50 node.End = tk 51 break 52 } else if tk.Type == token.CollectEntryType { 53 ctx.progress(1) 54 continue 55 } 56 57 value, err := p.parseToken(ctx, tk) 58 if err != nil { 59 return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node") 60 } 61 node.Values = append(node.Values, value) 62 ctx.progress(1) 63 } 64 return node, nil 65} 66 67func (p *parser) parseTag(ctx *context) (ast.Node, error) { 68 tagToken := ctx.currentToken() 69 node := ast.Tag(tagToken) 70 ctx.progress(1) // skip tag token 71 var ( 72 value ast.Node 73 err error 74 ) 75 switch token.ReservedTagKeyword(tagToken.Value) { 76 case token.MappingTag, 77 token.OrderedMapTag: 78 value, err = p.parseMapping(ctx) 79 case token.IntegerTag, 80 token.FloatTag, 81 token.StringTag, 82 token.BinaryTag, 83 token.TimestampTag, 84 token.NullTag: 85 typ := ctx.currentToken().Type 86 if typ == token.LiteralType || typ == token.FoldedType { 87 value, err = p.parseLiteral(ctx) 88 } else { 89 value = p.parseScalarValue(ctx.currentToken()) 90 } 91 case token.SequenceTag, 92 token.SetTag: 93 err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken) 94 default: 95 // custom tag 96 value, err = p.parseToken(ctx, ctx.currentToken()) 97 } 98 if err != nil { 99 return nil, errors.Wrapf(err, "failed to parse tag value") 100 } 101 node.Value = value 102 return node, nil 103} 104 105func (p *parser) removeLeftSideNewLineCharacter(src string) string { 106 // CR or LF or CRLF 107 return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") 108} 109 110func (p *parser) existsNewLineCharacter(src string) bool { 111 if strings.Index(src, "\n") > 0 { 112 return true 113 } 114 if strings.Index(src, "\r") > 0 { 115 return true 116 } 117 return false 118} 119 120func (p *parser) validateMapKey(tk *token.Token) error { 121 if tk.Type != token.StringType { 122 return nil 123 } 124 origin := p.removeLeftSideNewLineCharacter(tk.Origin) 125 if p.existsNewLineCharacter(origin) { 126 return errors.ErrSyntax("unexpected key name", tk) 127 } 128 return nil 129} 130 131func (p *parser) createNullToken(base *token.Token) *token.Token { 132 pos := *(base.Position) 133 pos.Column++ 134 return token.New("null", "null", &pos) 135} 136 137func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) { 138 tk := ctx.currentToken() 139 if tk == nil { 140 nullToken := p.createNullToken(colonToken) 141 ctx.insertToken(ctx.idx, nullToken) 142 return ast.Null(nullToken), nil 143 } 144 145 if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType { 146 // in this case, 147 // ---- 148 // key: <value does not defined> 149 // next 150 nullToken := p.createNullToken(colonToken) 151 ctx.insertToken(ctx.idx, nullToken) 152 return ast.Null(nullToken), nil 153 } 154 155 if tk.Position.Column < key.GetToken().Position.Column { 156 // in this case, 157 // ---- 158 // key: <value does not defined> 159 // next 160 nullToken := p.createNullToken(colonToken) 161 ctx.insertToken(ctx.idx, nullToken) 162 return ast.Null(nullToken), nil 163 } 164 165 value, err := p.parseToken(ctx, ctx.currentToken()) 166 if err != nil { 167 return nil, errors.Wrapf(err, "failed to parse mapping 'value' node") 168 } 169 return value, nil 170} 171 172func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error { 173 keyColumn := key.GetToken().Position.Column 174 valueColumn := value.GetToken().Position.Column 175 if keyColumn != valueColumn { 176 return nil 177 } 178 if value.Type() != ast.StringType { 179 return nil 180 } 181 ntk := ctx.nextToken() 182 if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) { 183 return errors.ErrSyntax("could not found expected ':' token", value.GetToken()) 184 } 185 return nil 186} 187 188func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { 189 key, err := p.parseMapKey(ctx) 190 if err != nil { 191 return nil, errors.Wrapf(err, "failed to parse map key") 192 } 193 if err := p.validateMapKey(key.GetToken()); err != nil { 194 return nil, errors.Wrapf(err, "validate mapping key error") 195 } 196 ctx.progress(1) // progress to mapping value token 197 tk := ctx.currentToken() // get mapping value token 198 if tk == nil { 199 return nil, errors.ErrSyntax("unexpected map", key.GetToken()) 200 } 201 ctx.progress(1) // progress to value token 202 if err := p.setSameLineCommentIfExists(ctx, key); err != nil { 203 return nil, errors.Wrapf(err, "failed to set same line comment to node") 204 } 205 if key.GetComment() != nil { 206 // if current token is comment, GetComment() is not nil. 207 // then progress to value token 208 ctx.progressIgnoreComment(1) 209 } 210 211 value, err := p.parseMapValue(ctx, key, tk) 212 if err != nil { 213 return nil, errors.Wrapf(err, "failed to parse map value") 214 } 215 if err := p.validateMapValue(ctx, key, value); err != nil { 216 return nil, errors.Wrapf(err, "failed to validate map value") 217 } 218 219 mvnode := ast.MappingValue(tk, key, value) 220 node := ast.Mapping(tk, false, mvnode) 221 222 ntk := ctx.nextNotCommentToken() 223 antk := ctx.afterNextNotCommentToken() 224 for antk != nil && antk.Type == token.MappingValueType && 225 ntk.Position.Column == key.GetToken().Position.Column { 226 ctx.progressIgnoreComment(1) 227 value, err := p.parseToken(ctx, ctx.currentToken()) 228 if err != nil { 229 return nil, errors.Wrapf(err, "failed to parse mapping node") 230 } 231 switch value.Type() { 232 case ast.MappingType: 233 c := value.(*ast.MappingNode) 234 comment := c.GetComment() 235 for idx, v := range c.Values { 236 if idx == 0 && comment != nil { 237 if err := v.SetComment(comment); err != nil { 238 return nil, errors.Wrapf(err, "failed to set comment token to node") 239 } 240 } 241 node.Values = append(node.Values, v) 242 } 243 case ast.MappingValueType: 244 node.Values = append(node.Values, value.(*ast.MappingValueNode)) 245 default: 246 return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type()) 247 } 248 ntk = ctx.nextNotCommentToken() 249 antk = ctx.afterNextNotCommentToken() 250 } 251 if len(node.Values) == 1 { 252 return mvnode, nil 253 } 254 return node, nil 255} 256 257func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) { 258 tk := ctx.currentToken() 259 sequenceNode := ast.Sequence(tk, false) 260 curColumn := tk.Position.Column 261 for tk.Type == token.SequenceEntryType { 262 ctx.progress(1) // skip sequence token 263 value, err := p.parseToken(ctx, ctx.currentToken()) 264 if err != nil { 265 return nil, errors.Wrapf(err, "failed to parse sequence") 266 } 267 sequenceNode.Values = append(sequenceNode.Values, value) 268 tk = ctx.nextNotCommentToken() 269 if tk == nil { 270 break 271 } 272 if tk.Type != token.SequenceEntryType { 273 break 274 } 275 if tk.Position.Column != curColumn { 276 break 277 } 278 ctx.progressIgnoreComment(1) 279 } 280 return sequenceNode, nil 281} 282 283func (p *parser) parseAnchor(ctx *context) (ast.Node, error) { 284 tk := ctx.currentToken() 285 anchor := ast.Anchor(tk) 286 ntk := ctx.nextToken() 287 if ntk == nil { 288 return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk) 289 } 290 ctx.progress(1) // skip anchor token 291 name, err := p.parseToken(ctx, ctx.currentToken()) 292 if err != nil { 293 return nil, errors.Wrapf(err, "failed to parser anchor name node") 294 } 295 anchor.Name = name 296 ntk = ctx.nextToken() 297 if ntk == nil { 298 return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken()) 299 } 300 ctx.progress(1) 301 value, err := p.parseToken(ctx, ctx.currentToken()) 302 if err != nil { 303 return nil, errors.Wrapf(err, "failed to parser anchor name node") 304 } 305 anchor.Value = value 306 return anchor, nil 307} 308 309func (p *parser) parseAlias(ctx *context) (ast.Node, error) { 310 tk := ctx.currentToken() 311 alias := ast.Alias(tk) 312 ntk := ctx.nextToken() 313 if ntk == nil { 314 return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk) 315 } 316 ctx.progress(1) // skip alias token 317 name, err := p.parseToken(ctx, ctx.currentToken()) 318 if err != nil { 319 return nil, errors.Wrapf(err, "failed to parser alias name node") 320 } 321 alias.Value = name 322 return alias, nil 323} 324 325func (p *parser) parseMapKey(ctx *context) (ast.Node, error) { 326 tk := ctx.currentToken() 327 if value := p.parseScalarValue(tk); value != nil { 328 return value, nil 329 } 330 switch tk.Type { 331 case token.MergeKeyType: 332 return ast.MergeKey(tk), nil 333 case token.MappingKeyType: 334 return p.parseMappingKey(ctx) 335 } 336 return nil, errors.ErrSyntax("unexpected mapping key", tk) 337} 338 339func (p *parser) parseStringValue(tk *token.Token) ast.Node { 340 switch tk.Type { 341 case token.StringType, 342 token.SingleQuoteType, 343 token.DoubleQuoteType: 344 return ast.String(tk) 345 } 346 return nil 347} 348 349func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) { 350 node := p.parseScalarValue(tk) 351 if node == nil { 352 return nil, nil 353 } 354 if p.isSameLineComment(ctx.nextToken(), node) { 355 ctx.progress(1) 356 if err := p.setSameLineCommentIfExists(ctx, node); err != nil { 357 return nil, errors.Wrapf(err, "failed to set same line comment to node") 358 } 359 } 360 return node, nil 361} 362 363func (p *parser) parseScalarValue(tk *token.Token) ast.Node { 364 if node := p.parseStringValue(tk); node != nil { 365 return node 366 } 367 switch tk.Type { 368 case token.NullType: 369 return ast.Null(tk) 370 case token.BoolType: 371 return ast.Bool(tk) 372 case token.IntegerType, 373 token.BinaryIntegerType, 374 token.OctetIntegerType, 375 token.HexIntegerType: 376 return ast.Integer(tk) 377 case token.FloatType: 378 return ast.Float(tk) 379 case token.InfinityType: 380 return ast.Infinity(tk) 381 case token.NanType: 382 return ast.Nan(tk) 383 } 384 return nil 385} 386 387func (p *parser) parseDirective(ctx *context) (ast.Node, error) { 388 node := ast.Directive(ctx.currentToken()) 389 ctx.progress(1) // skip directive token 390 value, err := p.parseToken(ctx, ctx.currentToken()) 391 if err != nil { 392 return nil, errors.Wrapf(err, "failed to parse directive value") 393 } 394 node.Value = value 395 ctx.progress(1) 396 tk := ctx.currentToken() 397 if tk == nil { 398 // Since current token is nil, use the previous token to specify 399 // the syntax error location. 400 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.previousToken()) 401 } 402 if tk.Type != token.DocumentHeaderType { 403 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken()) 404 } 405 return node, nil 406} 407 408func (p *parser) parseLiteral(ctx *context) (ast.Node, error) { 409 node := ast.Literal(ctx.currentToken()) 410 ctx.progress(1) // skip literal/folded token 411 value, err := p.parseToken(ctx, ctx.currentToken()) 412 if err != nil { 413 return nil, errors.Wrapf(err, "failed to parse literal/folded value") 414 } 415 snode, ok := value.(*ast.StringNode) 416 if !ok { 417 return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken()) 418 } 419 node.Value = snode 420 return node, nil 421} 422 423func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool { 424 if tk == nil { 425 return false 426 } 427 if tk.Type != token.CommentType { 428 return false 429 } 430 return tk.Position.Line == node.GetToken().Position.Line 431} 432 433func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error { 434 tk := ctx.currentToken() 435 if !p.isSameLineComment(tk, node) { 436 return nil 437 } 438 if err := node.SetComment(tk); err != nil { 439 return errors.Wrapf(err, "failed to set comment token to ast.Node") 440 } 441 return nil 442} 443 444func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) { 445 startTk := ctx.currentToken() 446 ctx.progress(1) // skip document header token 447 body, err := p.parseToken(ctx, ctx.currentToken()) 448 if err != nil { 449 return nil, errors.Wrapf(err, "failed to parse document body") 450 } 451 node := ast.Document(startTk, body) 452 if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType { 453 node.End = ntk 454 ctx.progress(1) 455 } 456 return node, nil 457} 458 459func (p *parser) parseComment(ctx *context) (ast.Node, error) { 460 commentTokens := []*token.Token{} 461 for { 462 tk := ctx.currentToken() 463 if tk == nil { 464 break 465 } 466 if tk.Type != token.CommentType { 467 break 468 } 469 commentTokens = append(commentTokens, tk) 470 ctx.progressIgnoreComment(1) // skip comment token 471 } 472 // TODO: support token group. currently merge tokens to one token 473 firstToken := commentTokens[0] 474 values := []string{} 475 origins := []string{} 476 for _, tk := range commentTokens { 477 values = append(values, tk.Value) 478 origins = append(origins, tk.Origin) 479 } 480 firstToken.Value = strings.Join(values, "") 481 firstToken.Value = strings.Join(origins, "") 482 node, err := p.parseToken(ctx, ctx.currentToken()) 483 if err != nil { 484 return nil, errors.Wrapf(err, "failed to parse node after comment") 485 } 486 if node == nil { 487 return ast.Comment(firstToken), nil 488 } 489 if err := node.SetComment(firstToken); err != nil { 490 return nil, errors.Wrapf(err, "failed to set comment token to node") 491 } 492 return node, nil 493} 494 495func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) { 496 node := ast.MappingKey(ctx.currentToken()) 497 ctx.progress(1) // skip mapping key token 498 value, err := p.parseToken(ctx, ctx.currentToken()) 499 if err != nil { 500 return nil, errors.Wrapf(err, "failed to parse map key") 501 } 502 node.Value = value 503 return node, nil 504} 505 506func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) { 507 if tk == nil { 508 return nil, nil 509 } 510 if tk.NextType() == token.MappingValueType { 511 node, err := p.parseMappingValue(ctx) 512 return node, err 513 } 514 node, err := p.parseScalarValueWithComment(ctx, tk) 515 if err != nil { 516 return nil, errors.Wrapf(err, "failed to parse scalar value") 517 } 518 if node != nil { 519 return node, nil 520 } 521 switch tk.Type { 522 case token.CommentType: 523 return p.parseComment(ctx) 524 case token.MappingKeyType: 525 return p.parseMappingKey(ctx) 526 case token.DocumentHeaderType: 527 return p.parseDocument(ctx) 528 case token.MappingStartType: 529 return p.parseMapping(ctx) 530 case token.SequenceStartType: 531 return p.parseSequence(ctx) 532 case token.SequenceEntryType: 533 return p.parseSequenceEntry(ctx) 534 case token.AnchorType: 535 return p.parseAnchor(ctx) 536 case token.AliasType: 537 return p.parseAlias(ctx) 538 case token.DirectiveType: 539 return p.parseDirective(ctx) 540 case token.TagType: 541 return p.parseTag(ctx) 542 case token.LiteralType, token.FoldedType: 543 return p.parseLiteral(ctx) 544 } 545 return nil, nil 546} 547 548func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 549 ctx := newContext(tokens, mode) 550 file := &ast.File{Docs: []*ast.DocumentNode{}} 551 for ctx.next() { 552 node, err := p.parseToken(ctx, ctx.currentToken()) 553 if err != nil { 554 return nil, errors.Wrapf(err, "failed to parse") 555 } 556 ctx.progressIgnoreComment(1) 557 if node == nil { 558 continue 559 } 560 if doc, ok := node.(*ast.DocumentNode); ok { 561 file.Docs = append(file.Docs, doc) 562 } else { 563 file.Docs = append(file.Docs, ast.Document(nil, node)) 564 } 565 } 566 return file, nil 567} 568 569type Mode uint 570 571const ( 572 ParseComments Mode = 1 << iota // parse comments and add them to AST 573) 574 575// ParseBytes parse from byte slice, and returns ast.File 576func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) { 577 tokens := lexer.Tokenize(string(bytes)) 578 f, err := Parse(tokens, mode) 579 if err != nil { 580 return nil, errors.Wrapf(err, "failed to parse") 581 } 582 return f, nil 583} 584 585// Parse parse from token instances, and returns ast.File 586func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 587 var p parser 588 f, err := p.parse(tokens, mode) 589 if err != nil { 590 return nil, errors.Wrapf(err, "failed to parse") 591 } 592 return f, nil 593} 594 595// Parse parse from filename, and returns ast.File 596func ParseFile(filename string, mode Mode) (*ast.File, error) { 597 file, err := ioutil.ReadFile(filename) 598 if err != nil { 599 return nil, errors.Wrapf(err, "failed to read file: %s", filename) 600 } 601 f, err := ParseBytes(file, mode) 602 if err != nil { 603 return nil, errors.Wrapf(err, "failed to parse") 604 } 605 f.Name = filename 606 return f, nil 607} 608