1package parser 2 3import ( 4 "fmt" 5 "io/ioutil" 6 "strings" 7 8 "github.com/goccy/go-yaml/ast" 9 "github.com/goccy/go-yaml/internal/errors" 10 "github.com/goccy/go-yaml/lexer" 11 "github.com/goccy/go-yaml/token" 12 "golang.org/x/xerrors" 13) 14 15type parser struct{} 16 17func (p *parser) parseMapping(ctx *context) (ast.Node, error) { 18 node := ast.Mapping(ctx.currentToken(), true) 19 ctx.progress(1) // skip MappingStart token 20 for ctx.next() { 21 tk := ctx.currentToken() 22 if tk.Type == token.MappingEndType { 23 node.End = tk 24 break 25 } else if tk.Type == token.CollectEntryType { 26 ctx.progress(1) 27 continue 28 } 29 30 value, err := p.parseMappingValue(ctx) 31 if err != nil { 32 return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node") 33 } 34 mvnode, ok := value.(*ast.MappingValueNode) 35 if !ok { 36 return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken()) 37 } 38 node.Values = append(node.Values, mvnode) 39 ctx.progress(1) 40 } 41 return node, nil 42} 43 44func (p *parser) parseSequence(ctx *context) (ast.Node, error) { 45 node := ast.Sequence(ctx.currentToken(), true) 46 ctx.progress(1) // skip SequenceStart token 47 for ctx.next() { 48 tk := ctx.currentToken() 49 if tk.Type == token.SequenceEndType { 50 node.End = tk 51 break 52 } else if tk.Type == token.CollectEntryType { 53 ctx.progress(1) 54 continue 55 } 56 57 value, err := p.parseToken(ctx, tk) 58 if err != nil { 59 return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node") 60 } 61 node.Values = append(node.Values, value) 62 ctx.progress(1) 63 } 64 return node, nil 65} 66 67func (p *parser) parseTag(ctx *context) (ast.Node, error) { 68 tagToken := ctx.currentToken() 69 node := ast.Tag(tagToken) 70 ctx.progress(1) // skip tag token 71 var ( 72 value ast.Node 73 err error 74 ) 75 switch token.ReservedTagKeyword(tagToken.Value) { 76 case token.MappingTag, 77 token.OrderedMapTag: 78 value, err = p.parseMapping(ctx) 79 case token.IntegerTag, 80 token.FloatTag, 81 token.StringTag, 82 token.BinaryTag, 83 token.TimestampTag, 84 token.NullTag: 85 typ := ctx.currentToken().Type 86 if typ == token.LiteralType || typ == token.FoldedType { 87 value, err = p.parseLiteral(ctx) 88 } else { 89 value = p.parseScalarValue(ctx.currentToken()) 90 } 91 case token.SequenceTag, 92 token.SetTag: 93 err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken) 94 default: 95 // custom tag 96 value, err = p.parseToken(ctx, ctx.currentToken()) 97 } 98 if err != nil { 99 return nil, errors.Wrapf(err, "failed to parse tag value") 100 } 101 node.Value = value 102 return node, nil 103} 104 105func (p *parser) removeLeftSideNewLineCharacter(src string) string { 106 // CR or LF or CRLF 107 return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") 108} 109 110func (p *parser) existsNewLineCharacter(src string) bool { 111 if strings.Index(src, "\n") > 0 { 112 return true 113 } 114 if strings.Index(src, "\r") > 0 { 115 return true 116 } 117 return false 118} 119 120func (p *parser) validateMapKey(tk *token.Token) error { 121 if tk.Type != token.StringType { 122 return nil 123 } 124 origin := p.removeLeftSideNewLineCharacter(tk.Origin) 125 if p.existsNewLineCharacter(origin) { 126 return errors.ErrSyntax("unexpected key name", tk) 127 } 128 return nil 129} 130 131func (p *parser) createNullToken(base *token.Token) *token.Token { 132 pos := *(base.Position) 133 pos.Column++ 134 return token.New("null", "null", &pos) 135} 136 137func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) { 138 tk := ctx.currentToken() 139 if tk == nil { 140 nullToken := p.createNullToken(colonToken) 141 ctx.insertToken(ctx.idx, nullToken) 142 return ast.Null(nullToken), nil 143 } 144 145 if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType { 146 // in this case, 147 // ---- 148 // key: <value does not defined> 149 // next 150 nullToken := p.createNullToken(colonToken) 151 ctx.insertToken(ctx.idx, nullToken) 152 return ast.Null(nullToken), nil 153 } 154 155 if tk.Position.Column < key.GetToken().Position.Column { 156 // in this case, 157 // ---- 158 // key: <value does not defined> 159 // next 160 nullToken := p.createNullToken(colonToken) 161 ctx.insertToken(ctx.idx, nullToken) 162 return ast.Null(nullToken), nil 163 } 164 165 value, err := p.parseToken(ctx, ctx.currentToken()) 166 if err != nil { 167 return nil, errors.Wrapf(err, "failed to parse mapping 'value' node") 168 } 169 return value, nil 170} 171 172func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error { 173 keyColumn := key.GetToken().Position.Column 174 valueColumn := value.GetToken().Position.Column 175 if keyColumn != valueColumn { 176 return nil 177 } 178 if value.Type() != ast.StringType { 179 return nil 180 } 181 ntk := ctx.nextToken() 182 if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) { 183 return errors.ErrSyntax("could not found expected ':' token", value.GetToken()) 184 } 185 return nil 186} 187 188func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { 189 key, err := p.parseMapKey(ctx) 190 if err != nil { 191 return nil, errors.Wrapf(err, "failed to parse map key") 192 } 193 if err := p.validateMapKey(key.GetToken()); err != nil { 194 return nil, errors.Wrapf(err, "validate mapping key error") 195 } 196 ctx.progress(1) // progress to mapping value token 197 tk := ctx.currentToken() // get mapping value token 198 ctx.progress(1) // progress to value token 199 if err := p.setSameLineCommentIfExists(ctx, key); err != nil { 200 return nil, errors.Wrapf(err, "failed to set same line comment to node") 201 } 202 if key.GetComment() != nil { 203 // if current token is comment, GetComment() is not nil. 204 // then progress to value token 205 ctx.progressIgnoreComment(1) 206 } 207 208 value, err := p.parseMapValue(ctx, key, tk) 209 if err != nil { 210 return nil, errors.Wrapf(err, "failed to parse map value") 211 } 212 if err := p.validateMapValue(ctx, key, value); err != nil { 213 return nil, errors.Wrapf(err, "failed to validate map value") 214 } 215 216 mvnode := ast.MappingValue(tk, key, value) 217 node := ast.Mapping(tk, false, mvnode) 218 219 ntk := ctx.nextNotCommentToken() 220 antk := ctx.afterNextNotCommentToken() 221 for antk != nil && antk.Type == token.MappingValueType && 222 ntk.Position.Column == key.GetToken().Position.Column { 223 ctx.progressIgnoreComment(1) 224 value, err := p.parseToken(ctx, ctx.currentToken()) 225 if err != nil { 226 return nil, errors.Wrapf(err, "failed to parse mapping node") 227 } 228 switch value.Type() { 229 case ast.MappingType: 230 c := value.(*ast.MappingNode) 231 comment := c.GetComment() 232 for idx, v := range c.Values { 233 if idx == 0 && comment != nil { 234 if err := v.SetComment(comment); err != nil { 235 return nil, errors.Wrapf(err, "failed to set comment token to node") 236 } 237 } 238 node.Values = append(node.Values, v) 239 } 240 case ast.MappingValueType: 241 node.Values = append(node.Values, value.(*ast.MappingValueNode)) 242 default: 243 return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type()) 244 } 245 ntk = ctx.nextNotCommentToken() 246 antk = ctx.afterNextNotCommentToken() 247 } 248 if len(node.Values) == 1 { 249 return mvnode, nil 250 } 251 return node, nil 252} 253 254func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) { 255 tk := ctx.currentToken() 256 sequenceNode := ast.Sequence(tk, false) 257 curColumn := tk.Position.Column 258 for tk.Type == token.SequenceEntryType { 259 ctx.progress(1) // skip sequence token 260 value, err := p.parseToken(ctx, ctx.currentToken()) 261 if err != nil { 262 return nil, errors.Wrapf(err, "failed to parse sequence") 263 } 264 sequenceNode.Values = append(sequenceNode.Values, value) 265 tk = ctx.nextNotCommentToken() 266 if tk == nil { 267 break 268 } 269 if tk.Type != token.SequenceEntryType { 270 break 271 } 272 if tk.Position.Column != curColumn { 273 break 274 } 275 ctx.progressIgnoreComment(1) 276 } 277 return sequenceNode, nil 278} 279 280func (p *parser) parseAnchor(ctx *context) (ast.Node, error) { 281 tk := ctx.currentToken() 282 anchor := ast.Anchor(tk) 283 ntk := ctx.nextToken() 284 if ntk == nil { 285 return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk) 286 } 287 ctx.progress(1) // skip anchor token 288 name, err := p.parseToken(ctx, ctx.currentToken()) 289 if err != nil { 290 return nil, errors.Wrapf(err, "failed to parser anchor name node") 291 } 292 anchor.Name = name 293 ntk = ctx.nextToken() 294 if ntk == nil { 295 return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken()) 296 } 297 ctx.progress(1) 298 value, err := p.parseToken(ctx, ctx.currentToken()) 299 if err != nil { 300 return nil, errors.Wrapf(err, "failed to parser anchor name node") 301 } 302 anchor.Value = value 303 return anchor, nil 304} 305 306func (p *parser) parseAlias(ctx *context) (ast.Node, error) { 307 tk := ctx.currentToken() 308 alias := ast.Alias(tk) 309 ntk := ctx.nextToken() 310 if ntk == nil { 311 return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk) 312 } 313 ctx.progress(1) // skip alias token 314 name, err := p.parseToken(ctx, ctx.currentToken()) 315 if err != nil { 316 return nil, errors.Wrapf(err, "failed to parser alias name node") 317 } 318 alias.Value = name 319 return alias, nil 320} 321 322func (p *parser) parseMapKey(ctx *context) (ast.Node, error) { 323 tk := ctx.currentToken() 324 if value := p.parseScalarValue(tk); value != nil { 325 return value, nil 326 } 327 switch tk.Type { 328 case token.MergeKeyType: 329 return ast.MergeKey(tk), nil 330 case token.MappingKeyType: 331 return p.parseMappingKey(ctx) 332 } 333 return nil, errors.ErrSyntax("unexpected mapping key", tk) 334} 335 336func (p *parser) parseStringValue(tk *token.Token) ast.Node { 337 switch tk.Type { 338 case token.StringType, 339 token.SingleQuoteType, 340 token.DoubleQuoteType: 341 return ast.String(tk) 342 } 343 return nil 344} 345 346func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) { 347 node := p.parseScalarValue(tk) 348 if node == nil { 349 return nil, nil 350 } 351 if p.isSameLineComment(ctx.nextToken(), node) { 352 ctx.progress(1) 353 if err := p.setSameLineCommentIfExists(ctx, node); err != nil { 354 return nil, errors.Wrapf(err, "failed to set same line comment to node") 355 } 356 } 357 return node, nil 358} 359 360func (p *parser) parseScalarValue(tk *token.Token) ast.Node { 361 if node := p.parseStringValue(tk); node != nil { 362 return node 363 } 364 switch tk.Type { 365 case token.NullType: 366 return ast.Null(tk) 367 case token.BoolType: 368 return ast.Bool(tk) 369 case token.IntegerType, 370 token.BinaryIntegerType, 371 token.OctetIntegerType, 372 token.HexIntegerType: 373 return ast.Integer(tk) 374 case token.FloatType: 375 return ast.Float(tk) 376 case token.InfinityType: 377 return ast.Infinity(tk) 378 case token.NanType: 379 return ast.Nan(tk) 380 } 381 return nil 382} 383 384func (p *parser) parseDirective(ctx *context) (ast.Node, error) { 385 node := ast.Directive(ctx.currentToken()) 386 ctx.progress(1) // skip directive token 387 value, err := p.parseToken(ctx, ctx.currentToken()) 388 if err != nil { 389 return nil, errors.Wrapf(err, "failed to parse directive value") 390 } 391 node.Value = value 392 ctx.progress(1) 393 if ctx.currentToken().Type != token.DocumentHeaderType { 394 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken()) 395 } 396 return node, nil 397} 398 399func (p *parser) parseLiteral(ctx *context) (ast.Node, error) { 400 node := ast.Literal(ctx.currentToken()) 401 ctx.progress(1) // skip literal/folded token 402 value, err := p.parseToken(ctx, ctx.currentToken()) 403 if err != nil { 404 return nil, errors.Wrapf(err, "failed to parse literal/folded value") 405 } 406 snode, ok := value.(*ast.StringNode) 407 if !ok { 408 return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken()) 409 } 410 node.Value = snode 411 return node, nil 412} 413 414func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool { 415 if tk == nil { 416 return false 417 } 418 if tk.Type != token.CommentType { 419 return false 420 } 421 return tk.Position.Line == node.GetToken().Position.Line 422} 423 424func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error { 425 tk := ctx.currentToken() 426 if !p.isSameLineComment(tk, node) { 427 return nil 428 } 429 if err := node.SetComment(tk); err != nil { 430 return errors.Wrapf(err, "failed to set comment token to ast.Node") 431 } 432 return nil 433} 434 435func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) { 436 startTk := ctx.currentToken() 437 ctx.progress(1) // skip document header token 438 body, err := p.parseToken(ctx, ctx.currentToken()) 439 if err != nil { 440 return nil, errors.Wrapf(err, "failed to parse document body") 441 } 442 node := ast.Document(startTk, body) 443 if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType { 444 node.End = ntk 445 ctx.progress(1) 446 } 447 return node, nil 448} 449 450func (p *parser) parseComment(ctx *context) (ast.Node, error) { 451 commentTokens := []*token.Token{} 452 for { 453 tk := ctx.currentToken() 454 if tk == nil { 455 break 456 } 457 if tk.Type != token.CommentType { 458 break 459 } 460 commentTokens = append(commentTokens, tk) 461 ctx.progressIgnoreComment(1) // skip comment token 462 } 463 // TODO: support token group. currently merge tokens to one token 464 firstToken := commentTokens[0] 465 values := []string{} 466 origins := []string{} 467 for _, tk := range commentTokens { 468 values = append(values, tk.Value) 469 origins = append(origins, tk.Origin) 470 } 471 firstToken.Value = strings.Join(values, "") 472 firstToken.Value = strings.Join(origins, "") 473 node, err := p.parseToken(ctx, ctx.currentToken()) 474 if err != nil { 475 return nil, errors.Wrapf(err, "failed to parse node after comment") 476 } 477 if node == nil { 478 return ast.Comment(firstToken), nil 479 } 480 if err := node.SetComment(firstToken); err != nil { 481 return nil, errors.Wrapf(err, "failed to set comment token to node") 482 } 483 return node, nil 484} 485 486func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) { 487 node := ast.MappingKey(ctx.currentToken()) 488 ctx.progress(1) // skip mapping key token 489 value, err := p.parseToken(ctx, ctx.currentToken()) 490 if err != nil { 491 return nil, errors.Wrapf(err, "failed to parse map key") 492 } 493 node.Value = value 494 return node, nil 495} 496 497func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) { 498 if tk == nil { 499 return nil, nil 500 } 501 if tk.NextType() == token.MappingValueType { 502 node, err := p.parseMappingValue(ctx) 503 return node, err 504 } 505 node, err := p.parseScalarValueWithComment(ctx, tk) 506 if err != nil { 507 return nil, errors.Wrapf(err, "failed to parse scalar value") 508 } 509 if node != nil { 510 return node, nil 511 } 512 switch tk.Type { 513 case token.CommentType: 514 return p.parseComment(ctx) 515 case token.MappingKeyType: 516 return p.parseMappingKey(ctx) 517 case token.DocumentHeaderType: 518 return p.parseDocument(ctx) 519 case token.MappingStartType: 520 return p.parseMapping(ctx) 521 case token.SequenceStartType: 522 return p.parseSequence(ctx) 523 case token.SequenceEntryType: 524 return p.parseSequenceEntry(ctx) 525 case token.AnchorType: 526 return p.parseAnchor(ctx) 527 case token.AliasType: 528 return p.parseAlias(ctx) 529 case token.DirectiveType: 530 return p.parseDirective(ctx) 531 case token.TagType: 532 return p.parseTag(ctx) 533 case token.LiteralType, token.FoldedType: 534 return p.parseLiteral(ctx) 535 } 536 return nil, nil 537} 538 539func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 540 ctx := newContext(tokens, mode) 541 file := &ast.File{Docs: []*ast.DocumentNode{}} 542 for ctx.next() { 543 node, err := p.parseToken(ctx, ctx.currentToken()) 544 if err != nil { 545 return nil, errors.Wrapf(err, "failed to parse") 546 } 547 ctx.progressIgnoreComment(1) 548 if node == nil { 549 continue 550 } 551 if doc, ok := node.(*ast.DocumentNode); ok { 552 file.Docs = append(file.Docs, doc) 553 } else { 554 file.Docs = append(file.Docs, ast.Document(nil, node)) 555 } 556 } 557 return file, nil 558} 559 560type Mode uint 561 562const ( 563 ParseComments Mode = 1 << iota // parse comments and add them to AST 564) 565 566// ParseBytes parse from byte slice, and returns ast.File 567func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) { 568 tokens := lexer.Tokenize(string(bytes)) 569 f, err := Parse(tokens, mode) 570 if err != nil { 571 return nil, errors.Wrapf(err, "failed to parse") 572 } 573 return f, nil 574} 575 576// Parse parse from token instances, and returns ast.File 577func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 578 var p parser 579 f, err := p.parse(tokens, mode) 580 if err != nil { 581 return nil, errors.Wrapf(err, "failed to parse") 582 } 583 return f, nil 584} 585 586// Parse parse from filename, and returns ast.File 587func ParseFile(filename string, mode Mode) (*ast.File, error) { 588 file, err := ioutil.ReadFile(filename) 589 if err != nil { 590 return nil, errors.Wrapf(err, "failed to read file: %s", filename) 591 } 592 f, err := ParseBytes(file, mode) 593 if err != nil { 594 return nil, errors.Wrapf(err, "failed to parse") 595 } 596 f.Name = filename 597 return f, nil 598} 599