1// 2// Copyright (c) 2011-2019 Canonical Ltd 3// Copyright (c) 2006-2010 Kirill Simonov 4// 5// Permission is hereby granted, free of charge, to any person obtaining a copy of 6// this software and associated documentation files (the "Software"), to deal in 7// the Software without restriction, including without limitation the rights to 8// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9// of the Software, and to permit persons to whom the Software is furnished to do 10// so, subject to the following conditions: 11// 12// The above copyright notice and this permission notice shall be included in all 13// copies or substantial portions of the Software. 14// 15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21// SOFTWARE. 22 23package yaml 24 25import ( 26 "bytes" 27 "fmt" 28) 29 30// Introduction 31// ************ 32// 33// The following notes assume that you are familiar with the YAML specification 34// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in 35// some cases we are less restrictive that it requires. 36// 37// The process of transforming a YAML stream into a sequence of events is 38// divided on two steps: Scanning and Parsing. 39// 40// The Scanner transforms the input stream into a sequence of tokens, while the 41// parser transform the sequence of tokens produced by the Scanner into a 42// sequence of parsing events. 43// 44// The Scanner is rather clever and complicated. The Parser, on the contrary, 45// is a straightforward implementation of a recursive-descendant parser (or, 46// LL(1) parser, as it is usually called). 47// 48// Actually there are two issues of Scanning that might be called "clever", the 49// rest is quite straightforward. The issues are "block collection start" and 50// "simple keys". Both issues are explained below in details. 51// 52// Here the Scanning step is explained and implemented. We start with the list 53// of all the tokens produced by the Scanner together with short descriptions. 54// 55// Now, tokens: 56// 57// STREAM-START(encoding) # The stream start. 58// STREAM-END # The stream end. 59// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 60// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 61// DOCUMENT-START # '---' 62// DOCUMENT-END # '...' 63// BLOCK-SEQUENCE-START # Indentation increase denoting a block 64// BLOCK-MAPPING-START # sequence or a block mapping. 65// BLOCK-END # Indentation decrease. 66// FLOW-SEQUENCE-START # '[' 67// FLOW-SEQUENCE-END # ']' 68// BLOCK-SEQUENCE-START # '{' 69// BLOCK-SEQUENCE-END # '}' 70// BLOCK-ENTRY # '-' 71// FLOW-ENTRY # ',' 72// KEY # '?' or nothing (simple keys). 73// VALUE # ':' 74// ALIAS(anchor) # '*anchor' 75// ANCHOR(anchor) # '&anchor' 76// TAG(handle,suffix) # '!handle!suffix' 77// SCALAR(value,style) # A scalar. 78// 79// The following two tokens are "virtual" tokens denoting the beginning and the 80// end of the stream: 81// 82// STREAM-START(encoding) 83// STREAM-END 84// 85// We pass the information about the input stream encoding with the 86// STREAM-START token. 87// 88// The next two tokens are responsible for tags: 89// 90// VERSION-DIRECTIVE(major,minor) 91// TAG-DIRECTIVE(handle,prefix) 92// 93// Example: 94// 95// %YAML 1.1 96// %TAG ! !foo 97// %TAG !yaml! tag:yaml.org,2002: 98// --- 99// 100// The correspoding sequence of tokens: 101// 102// STREAM-START(utf-8) 103// VERSION-DIRECTIVE(1,1) 104// TAG-DIRECTIVE("!","!foo") 105// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 106// DOCUMENT-START 107// STREAM-END 108// 109// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 110// line. 111// 112// The document start and end indicators are represented by: 113// 114// DOCUMENT-START 115// DOCUMENT-END 116// 117// Note that if a YAML stream contains an implicit document (without '---' 118// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 119// produced. 120// 121// In the following examples, we present whole documents together with the 122// produced tokens. 123// 124// 1. An implicit document: 125// 126// 'a scalar' 127// 128// Tokens: 129// 130// STREAM-START(utf-8) 131// SCALAR("a scalar",single-quoted) 132// STREAM-END 133// 134// 2. An explicit document: 135// 136// --- 137// 'a scalar' 138// ... 139// 140// Tokens: 141// 142// STREAM-START(utf-8) 143// DOCUMENT-START 144// SCALAR("a scalar",single-quoted) 145// DOCUMENT-END 146// STREAM-END 147// 148// 3. Several documents in a stream: 149// 150// 'a scalar' 151// --- 152// 'another scalar' 153// --- 154// 'yet another scalar' 155// 156// Tokens: 157// 158// STREAM-START(utf-8) 159// SCALAR("a scalar",single-quoted) 160// DOCUMENT-START 161// SCALAR("another scalar",single-quoted) 162// DOCUMENT-START 163// SCALAR("yet another scalar",single-quoted) 164// STREAM-END 165// 166// We have already introduced the SCALAR token above. The following tokens are 167// used to describe aliases, anchors, tag, and scalars: 168// 169// ALIAS(anchor) 170// ANCHOR(anchor) 171// TAG(handle,suffix) 172// SCALAR(value,style) 173// 174// The following series of examples illustrate the usage of these tokens: 175// 176// 1. A recursive sequence: 177// 178// &A [ *A ] 179// 180// Tokens: 181// 182// STREAM-START(utf-8) 183// ANCHOR("A") 184// FLOW-SEQUENCE-START 185// ALIAS("A") 186// FLOW-SEQUENCE-END 187// STREAM-END 188// 189// 2. A tagged scalar: 190// 191// !!float "3.14" # A good approximation. 192// 193// Tokens: 194// 195// STREAM-START(utf-8) 196// TAG("!!","float") 197// SCALAR("3.14",double-quoted) 198// STREAM-END 199// 200// 3. Various scalar styles: 201// 202// --- # Implicit empty plain scalars do not produce tokens. 203// --- a plain scalar 204// --- 'a single-quoted scalar' 205// --- "a double-quoted scalar" 206// --- |- 207// a literal scalar 208// --- >- 209// a folded 210// scalar 211// 212// Tokens: 213// 214// STREAM-START(utf-8) 215// DOCUMENT-START 216// DOCUMENT-START 217// SCALAR("a plain scalar",plain) 218// DOCUMENT-START 219// SCALAR("a single-quoted scalar",single-quoted) 220// DOCUMENT-START 221// SCALAR("a double-quoted scalar",double-quoted) 222// DOCUMENT-START 223// SCALAR("a literal scalar",literal) 224// DOCUMENT-START 225// SCALAR("a folded scalar",folded) 226// STREAM-END 227// 228// Now it's time to review collection-related tokens. We will start with 229// flow collections: 230// 231// FLOW-SEQUENCE-START 232// FLOW-SEQUENCE-END 233// FLOW-MAPPING-START 234// FLOW-MAPPING-END 235// FLOW-ENTRY 236// KEY 237// VALUE 238// 239// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 240// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 241// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 242// indicators '?' and ':', which are used for denoting mapping keys and values, 243// are represented by the KEY and VALUE tokens. 244// 245// The following examples show flow collections: 246// 247// 1. A flow sequence: 248// 249// [item 1, item 2, item 3] 250// 251// Tokens: 252// 253// STREAM-START(utf-8) 254// FLOW-SEQUENCE-START 255// SCALAR("item 1",plain) 256// FLOW-ENTRY 257// SCALAR("item 2",plain) 258// FLOW-ENTRY 259// SCALAR("item 3",plain) 260// FLOW-SEQUENCE-END 261// STREAM-END 262// 263// 2. A flow mapping: 264// 265// { 266// a simple key: a value, # Note that the KEY token is produced. 267// ? a complex key: another value, 268// } 269// 270// Tokens: 271// 272// STREAM-START(utf-8) 273// FLOW-MAPPING-START 274// KEY 275// SCALAR("a simple key",plain) 276// VALUE 277// SCALAR("a value",plain) 278// FLOW-ENTRY 279// KEY 280// SCALAR("a complex key",plain) 281// VALUE 282// SCALAR("another value",plain) 283// FLOW-ENTRY 284// FLOW-MAPPING-END 285// STREAM-END 286// 287// A simple key is a key which is not denoted by the '?' indicator. Note that 288// the Scanner still produce the KEY token whenever it encounters a simple key. 289// 290// For scanning block collections, the following tokens are used (note that we 291// repeat KEY and VALUE here): 292// 293// BLOCK-SEQUENCE-START 294// BLOCK-MAPPING-START 295// BLOCK-END 296// BLOCK-ENTRY 297// KEY 298// VALUE 299// 300// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 301// increase that precedes a block collection (cf. the INDENT token in Python). 302// The token BLOCK-END denote indentation decrease that ends a block collection 303// (cf. the DEDENT token in Python). However YAML has some syntax pecularities 304// that makes detections of these tokens more complex. 305// 306// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 307// '-', '?', and ':' correspondingly. 308// 309// The following examples show how the tokens BLOCK-SEQUENCE-START, 310// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 311// 312// 1. Block sequences: 313// 314// - item 1 315// - item 2 316// - 317// - item 3.1 318// - item 3.2 319// - 320// key 1: value 1 321// key 2: value 2 322// 323// Tokens: 324// 325// STREAM-START(utf-8) 326// BLOCK-SEQUENCE-START 327// BLOCK-ENTRY 328// SCALAR("item 1",plain) 329// BLOCK-ENTRY 330// SCALAR("item 2",plain) 331// BLOCK-ENTRY 332// BLOCK-SEQUENCE-START 333// BLOCK-ENTRY 334// SCALAR("item 3.1",plain) 335// BLOCK-ENTRY 336// SCALAR("item 3.2",plain) 337// BLOCK-END 338// BLOCK-ENTRY 339// BLOCK-MAPPING-START 340// KEY 341// SCALAR("key 1",plain) 342// VALUE 343// SCALAR("value 1",plain) 344// KEY 345// SCALAR("key 2",plain) 346// VALUE 347// SCALAR("value 2",plain) 348// BLOCK-END 349// BLOCK-END 350// STREAM-END 351// 352// 2. Block mappings: 353// 354// a simple key: a value # The KEY token is produced here. 355// ? a complex key 356// : another value 357// a mapping: 358// key 1: value 1 359// key 2: value 2 360// a sequence: 361// - item 1 362// - item 2 363// 364// Tokens: 365// 366// STREAM-START(utf-8) 367// BLOCK-MAPPING-START 368// KEY 369// SCALAR("a simple key",plain) 370// VALUE 371// SCALAR("a value",plain) 372// KEY 373// SCALAR("a complex key",plain) 374// VALUE 375// SCALAR("another value",plain) 376// KEY 377// SCALAR("a mapping",plain) 378// BLOCK-MAPPING-START 379// KEY 380// SCALAR("key 1",plain) 381// VALUE 382// SCALAR("value 1",plain) 383// KEY 384// SCALAR("key 2",plain) 385// VALUE 386// SCALAR("value 2",plain) 387// BLOCK-END 388// KEY 389// SCALAR("a sequence",plain) 390// VALUE 391// BLOCK-SEQUENCE-START 392// BLOCK-ENTRY 393// SCALAR("item 1",plain) 394// BLOCK-ENTRY 395// SCALAR("item 2",plain) 396// BLOCK-END 397// BLOCK-END 398// STREAM-END 399// 400// YAML does not always require to start a new block collection from a new 401// line. If the current line contains only '-', '?', and ':' indicators, a new 402// block collection may start at the current line. The following examples 403// illustrate this case: 404// 405// 1. Collections in a sequence: 406// 407// - - item 1 408// - item 2 409// - key 1: value 1 410// key 2: value 2 411// - ? complex key 412// : complex value 413// 414// Tokens: 415// 416// STREAM-START(utf-8) 417// BLOCK-SEQUENCE-START 418// BLOCK-ENTRY 419// BLOCK-SEQUENCE-START 420// BLOCK-ENTRY 421// SCALAR("item 1",plain) 422// BLOCK-ENTRY 423// SCALAR("item 2",plain) 424// BLOCK-END 425// BLOCK-ENTRY 426// BLOCK-MAPPING-START 427// KEY 428// SCALAR("key 1",plain) 429// VALUE 430// SCALAR("value 1",plain) 431// KEY 432// SCALAR("key 2",plain) 433// VALUE 434// SCALAR("value 2",plain) 435// BLOCK-END 436// BLOCK-ENTRY 437// BLOCK-MAPPING-START 438// KEY 439// SCALAR("complex key") 440// VALUE 441// SCALAR("complex value") 442// BLOCK-END 443// BLOCK-END 444// STREAM-END 445// 446// 2. Collections in a mapping: 447// 448// ? a sequence 449// : - item 1 450// - item 2 451// ? a mapping 452// : key 1: value 1 453// key 2: value 2 454// 455// Tokens: 456// 457// STREAM-START(utf-8) 458// BLOCK-MAPPING-START 459// KEY 460// SCALAR("a sequence",plain) 461// VALUE 462// BLOCK-SEQUENCE-START 463// BLOCK-ENTRY 464// SCALAR("item 1",plain) 465// BLOCK-ENTRY 466// SCALAR("item 2",plain) 467// BLOCK-END 468// KEY 469// SCALAR("a mapping",plain) 470// VALUE 471// BLOCK-MAPPING-START 472// KEY 473// SCALAR("key 1",plain) 474// VALUE 475// SCALAR("value 1",plain) 476// KEY 477// SCALAR("key 2",plain) 478// VALUE 479// SCALAR("value 2",plain) 480// BLOCK-END 481// BLOCK-END 482// STREAM-END 483// 484// YAML also permits non-indented sequences if they are included into a block 485// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 486// 487// key: 488// - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 489// - item 2 490// 491// Tokens: 492// 493// STREAM-START(utf-8) 494// BLOCK-MAPPING-START 495// KEY 496// SCALAR("key",plain) 497// VALUE 498// BLOCK-ENTRY 499// SCALAR("item 1",plain) 500// BLOCK-ENTRY 501// SCALAR("item 2",plain) 502// BLOCK-END 503// 504 505// Ensure that the buffer contains the required number of characters. 506// Return true on success, false on failure (reader error or memory error). 507func cache(parser *yaml_parser_t, length int) bool { 508 // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) 509 return parser.unread >= length || yaml_parser_update_buffer(parser, length) 510} 511 512// Advance the buffer pointer. 513func skip(parser *yaml_parser_t) { 514 if !is_blank(parser.buffer, parser.buffer_pos) { 515 parser.newlines = 0 516 } 517 parser.mark.index++ 518 parser.mark.column++ 519 parser.unread-- 520 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 521} 522 523func skip_line(parser *yaml_parser_t) { 524 if is_crlf(parser.buffer, parser.buffer_pos) { 525 parser.mark.index += 2 526 parser.mark.column = 0 527 parser.mark.line++ 528 parser.unread -= 2 529 parser.buffer_pos += 2 530 parser.newlines++ 531 } else if is_break(parser.buffer, parser.buffer_pos) { 532 parser.mark.index++ 533 parser.mark.column = 0 534 parser.mark.line++ 535 parser.unread-- 536 parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) 537 parser.newlines++ 538 } 539} 540 541// Copy a character to a string buffer and advance pointers. 542func read(parser *yaml_parser_t, s []byte) []byte { 543 if !is_blank(parser.buffer, parser.buffer_pos) { 544 parser.newlines = 0 545 } 546 w := width(parser.buffer[parser.buffer_pos]) 547 if w == 0 { 548 panic("invalid character sequence") 549 } 550 if len(s) == 0 { 551 s = make([]byte, 0, 32) 552 } 553 if w == 1 && len(s)+w <= cap(s) { 554 s = s[:len(s)+1] 555 s[len(s)-1] = parser.buffer[parser.buffer_pos] 556 parser.buffer_pos++ 557 } else { 558 s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) 559 parser.buffer_pos += w 560 } 561 parser.mark.index++ 562 parser.mark.column++ 563 parser.unread-- 564 return s 565} 566 567// Copy a line break character to a string buffer and advance pointers. 568func read_line(parser *yaml_parser_t, s []byte) []byte { 569 buf := parser.buffer 570 pos := parser.buffer_pos 571 switch { 572 case buf[pos] == '\r' && buf[pos+1] == '\n': 573 // CR LF . LF 574 s = append(s, '\n') 575 parser.buffer_pos += 2 576 parser.mark.index++ 577 parser.unread-- 578 case buf[pos] == '\r' || buf[pos] == '\n': 579 // CR|LF . LF 580 s = append(s, '\n') 581 parser.buffer_pos += 1 582 case buf[pos] == '\xC2' && buf[pos+1] == '\x85': 583 // NEL . LF 584 s = append(s, '\n') 585 parser.buffer_pos += 2 586 case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): 587 // LS|PS . LS|PS 588 s = append(s, buf[parser.buffer_pos:pos+3]...) 589 parser.buffer_pos += 3 590 default: 591 return s 592 } 593 parser.mark.index++ 594 parser.mark.column = 0 595 parser.mark.line++ 596 parser.unread-- 597 parser.newlines++ 598 return s 599} 600 601// Get the next token. 602func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { 603 // Erase the token object. 604 *token = yaml_token_t{} // [Go] Is this necessary? 605 606 // No tokens after STREAM-END or error. 607 if parser.stream_end_produced || parser.error != yaml_NO_ERROR { 608 return true 609 } 610 611 // Ensure that the tokens queue contains enough tokens. 612 if !parser.token_available { 613 if !yaml_parser_fetch_more_tokens(parser) { 614 return false 615 } 616 } 617 618 // Fetch the next token from the queue. 619 *token = parser.tokens[parser.tokens_head] 620 parser.tokens_head++ 621 parser.tokens_parsed++ 622 parser.token_available = false 623 624 if token.typ == yaml_STREAM_END_TOKEN { 625 parser.stream_end_produced = true 626 } 627 return true 628} 629 630// Set the scanner error and return false. 631func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { 632 parser.error = yaml_SCANNER_ERROR 633 parser.context = context 634 parser.context_mark = context_mark 635 parser.problem = problem 636 parser.problem_mark = parser.mark 637 return false 638} 639 640func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { 641 context := "while parsing a tag" 642 if directive { 643 context = "while parsing a %TAG directive" 644 } 645 return yaml_parser_set_scanner_error(parser, context, context_mark, problem) 646} 647 648func trace(args ...interface{}) func() { 649 pargs := append([]interface{}{"+++"}, args...) 650 fmt.Println(pargs...) 651 pargs = append([]interface{}{"---"}, args...) 652 return func() { fmt.Println(pargs...) } 653} 654 655// Ensure that the tokens queue contains at least one token which can be 656// returned to the Parser. 657func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { 658 // While we need more tokens to fetch, do it. 659 for { 660 // [Go] The comment parsing logic requires a lookahead of two tokens 661 // so that foot comments may be parsed in time of associating them 662 // with the tokens that are parsed before them, and also for line 663 // comments to be transformed into head comments in some edge cases. 664 if parser.tokens_head < len(parser.tokens)-2 { 665 // If a potential simple key is at the head position, we need to fetch 666 // the next token to disambiguate it. 667 head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] 668 if !ok { 669 break 670 } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { 671 return false 672 } else if !valid { 673 break 674 } 675 } 676 // Fetch the next token. 677 if !yaml_parser_fetch_next_token(parser) { 678 return false 679 } 680 } 681 682 parser.token_available = true 683 return true 684} 685 686// The dispatcher for token fetchers. 687func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) { 688 // Ensure that the buffer is initialized. 689 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 690 return false 691 } 692 693 // Check if we just started scanning. Fetch STREAM-START then. 694 if !parser.stream_start_produced { 695 return yaml_parser_fetch_stream_start(parser) 696 } 697 698 scan_mark := parser.mark 699 700 // Eat whitespaces and comments until we reach the next token. 701 if !yaml_parser_scan_to_next_token(parser) { 702 return false 703 } 704 705 // [Go] While unrolling indents, transform the head comments of prior 706 // indentation levels observed after scan_start into foot comments at 707 // the respective indexes. 708 709 // Check the indentation level against the current column. 710 if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) { 711 return false 712 } 713 714 // Ensure that the buffer contains at least 4 characters. 4 is the length 715 // of the longest indicators ('--- ' and '... '). 716 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 717 return false 718 } 719 720 // Is it the end of the stream? 721 if is_z(parser.buffer, parser.buffer_pos) { 722 return yaml_parser_fetch_stream_end(parser) 723 } 724 725 // Is it a directive? 726 if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { 727 return yaml_parser_fetch_directive(parser) 728 } 729 730 buf := parser.buffer 731 pos := parser.buffer_pos 732 733 // Is it the document start indicator? 734 if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { 735 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) 736 } 737 738 // Is it the document end indicator? 739 if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { 740 return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) 741 } 742 743 comment_mark := parser.mark 744 if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') { 745 // Associate any following comments with the prior token. 746 comment_mark = parser.tokens[len(parser.tokens)-1].start_mark 747 } 748 defer func() { 749 if !ok { 750 return 751 } 752 if !yaml_parser_scan_line_comment(parser, comment_mark) { 753 ok = false 754 return 755 } 756 }() 757 758 // Is it the flow sequence start indicator? 759 if buf[pos] == '[' { 760 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) 761 } 762 763 // Is it the flow mapping start indicator? 764 if parser.buffer[parser.buffer_pos] == '{' { 765 return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) 766 } 767 768 // Is it the flow sequence end indicator? 769 if parser.buffer[parser.buffer_pos] == ']' { 770 return yaml_parser_fetch_flow_collection_end(parser, 771 yaml_FLOW_SEQUENCE_END_TOKEN) 772 } 773 774 // Is it the flow mapping end indicator? 775 if parser.buffer[parser.buffer_pos] == '}' { 776 return yaml_parser_fetch_flow_collection_end(parser, 777 yaml_FLOW_MAPPING_END_TOKEN) 778 } 779 780 // Is it the flow entry indicator? 781 if parser.buffer[parser.buffer_pos] == ',' { 782 return yaml_parser_fetch_flow_entry(parser) 783 } 784 785 // Is it the block entry indicator? 786 if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { 787 return yaml_parser_fetch_block_entry(parser) 788 } 789 790 // Is it the key indicator? 791 if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 792 return yaml_parser_fetch_key(parser) 793 } 794 795 // Is it the value indicator? 796 if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { 797 return yaml_parser_fetch_value(parser) 798 } 799 800 // Is it an alias? 801 if parser.buffer[parser.buffer_pos] == '*' { 802 return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) 803 } 804 805 // Is it an anchor? 806 if parser.buffer[parser.buffer_pos] == '&' { 807 return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) 808 } 809 810 // Is it a tag? 811 if parser.buffer[parser.buffer_pos] == '!' { 812 return yaml_parser_fetch_tag(parser) 813 } 814 815 // Is it a literal scalar? 816 if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { 817 return yaml_parser_fetch_block_scalar(parser, true) 818 } 819 820 // Is it a folded scalar? 821 if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { 822 return yaml_parser_fetch_block_scalar(parser, false) 823 } 824 825 // Is it a single-quoted scalar? 826 if parser.buffer[parser.buffer_pos] == '\'' { 827 return yaml_parser_fetch_flow_scalar(parser, true) 828 } 829 830 // Is it a double-quoted scalar? 831 if parser.buffer[parser.buffer_pos] == '"' { 832 return yaml_parser_fetch_flow_scalar(parser, false) 833 } 834 835 // Is it a plain scalar? 836 // 837 // A plain scalar may start with any non-blank characters except 838 // 839 // '-', '?', ':', ',', '[', ']', '{', '}', 840 // '#', '&', '*', '!', '|', '>', '\'', '\"', 841 // '%', '@', '`'. 842 // 843 // In the block context (and, for the '-' indicator, in the flow context 844 // too), it may also start with the characters 845 // 846 // '-', '?', ':' 847 // 848 // if it is followed by a non-space character. 849 // 850 // The last rule is more restrictive than the specification requires. 851 // [Go] TODO Make this logic more reasonable. 852 //switch parser.buffer[parser.buffer_pos] { 853 //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': 854 //} 855 if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || 856 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || 857 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || 858 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 859 parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || 860 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || 861 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || 862 parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || 863 parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || 864 parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || 865 (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || 866 (parser.flow_level == 0 && 867 (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && 868 !is_blankz(parser.buffer, parser.buffer_pos+1)) { 869 return yaml_parser_fetch_plain_scalar(parser) 870 } 871 872 // If we don't determine the token type so far, it is an error. 873 return yaml_parser_set_scanner_error(parser, 874 "while scanning for the next token", parser.mark, 875 "found character that cannot start any token") 876} 877 878func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { 879 if !simple_key.possible { 880 return false, true 881 } 882 883 // The 1.2 specification says: 884 // 885 // "If the ? indicator is omitted, parsing needs to see past the 886 // implicit key to recognize it as such. To limit the amount of 887 // lookahead required, the “:” indicator must appear at most 1024 888 // Unicode characters beyond the start of the key. In addition, the key 889 // is restricted to a single line." 890 // 891 if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { 892 // Check if the potential simple key to be removed is required. 893 if simple_key.required { 894 return false, yaml_parser_set_scanner_error(parser, 895 "while scanning a simple key", simple_key.mark, 896 "could not find expected ':'") 897 } 898 simple_key.possible = false 899 return false, true 900 } 901 return true, true 902} 903 904// Check if a simple key may start at the current position and add it if 905// needed. 906func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { 907 // A simple key is required at the current position if the scanner is in 908 // the block context and the current column coincides with the indentation 909 // level. 910 911 required := parser.flow_level == 0 && parser.indent == parser.mark.column 912 913 // 914 // If the current position may start a simple key, save it. 915 // 916 if parser.simple_key_allowed { 917 simple_key := yaml_simple_key_t{ 918 possible: true, 919 required: required, 920 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 921 mark: parser.mark, 922 } 923 924 if !yaml_parser_remove_simple_key(parser) { 925 return false 926 } 927 parser.simple_keys[len(parser.simple_keys)-1] = simple_key 928 parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 929 } 930 return true 931} 932 933// Remove a potential simple key at the current flow level. 934func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { 935 i := len(parser.simple_keys) - 1 936 if parser.simple_keys[i].possible { 937 // If the key is required, it is an error. 938 if parser.simple_keys[i].required { 939 return yaml_parser_set_scanner_error(parser, 940 "while scanning a simple key", parser.simple_keys[i].mark, 941 "could not find expected ':'") 942 } 943 // Remove the key from the stack. 944 parser.simple_keys[i].possible = false 945 delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) 946 } 947 return true 948} 949 950// max_flow_level limits the flow_level 951const max_flow_level = 10000 952 953// Increase the flow level and resize the simple key list if needed. 954func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { 955 // Reset the simple key on the next level. 956 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ 957 possible: false, 958 required: false, 959 token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), 960 mark: parser.mark, 961 }) 962 963 // Increase the flow level. 964 parser.flow_level++ 965 if parser.flow_level > max_flow_level { 966 return yaml_parser_set_scanner_error(parser, 967 "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, 968 fmt.Sprintf("exceeded max depth of %d", max_flow_level)) 969 } 970 return true 971} 972 973// Decrease the flow level. 974func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { 975 if parser.flow_level > 0 { 976 parser.flow_level-- 977 last := len(parser.simple_keys) - 1 978 delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) 979 parser.simple_keys = parser.simple_keys[:last] 980 } 981 return true 982} 983 984// max_indents limits the indents stack size 985const max_indents = 10000 986 987// Push the current indentation level to the stack and set the new level 988// the current column is greater than the indentation level. In this case, 989// append or insert the specified token into the token queue. 990func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { 991 // In the flow context, do nothing. 992 if parser.flow_level > 0 { 993 return true 994 } 995 996 if parser.indent < column { 997 // Push the current indentation level to the stack and set the new 998 // indentation level. 999 parser.indents = append(parser.indents, parser.indent) 1000 parser.indent = column 1001 if len(parser.indents) > max_indents { 1002 return yaml_parser_set_scanner_error(parser, 1003 "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, 1004 fmt.Sprintf("exceeded max depth of %d", max_indents)) 1005 } 1006 1007 // Create a token and insert it into the queue. 1008 token := yaml_token_t{ 1009 typ: typ, 1010 start_mark: mark, 1011 end_mark: mark, 1012 } 1013 if number > -1 { 1014 number -= parser.tokens_parsed 1015 } 1016 yaml_insert_token(parser, number, &token) 1017 } 1018 return true 1019} 1020 1021// Pop indentation levels from the indents stack until the current level 1022// becomes less or equal to the column. For each indentation level, append 1023// the BLOCK-END token. 1024func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool { 1025 // In the flow context, do nothing. 1026 if parser.flow_level > 0 { 1027 return true 1028 } 1029 1030 block_mark := scan_mark 1031 block_mark.index-- 1032 1033 // Loop through the indentation levels in the stack. 1034 for parser.indent > column { 1035 1036 // [Go] Reposition the end token before potential following 1037 // foot comments of parent blocks. For that, search 1038 // backwards for recent comments that were at the same 1039 // indent as the block that is ending now. 1040 stop_index := block_mark.index 1041 for i := len(parser.comments) - 1; i >= 0; i-- { 1042 comment := &parser.comments[i] 1043 1044 if comment.end_mark.index < stop_index { 1045 // Don't go back beyond the start of the comment/whitespace scan, unless column < 0. 1046 // If requested indent column is < 0, then the document is over and everything else 1047 // is a foot anyway. 1048 break 1049 } 1050 if comment.start_mark.column == parser.indent+1 { 1051 // This is a good match. But maybe there's a former comment 1052 // at that same indent level, so keep searching. 1053 block_mark = comment.start_mark 1054 } 1055 1056 // While the end of the former comment matches with 1057 // the start of the following one, we know there's 1058 // nothing in between and scanning is still safe. 1059 stop_index = comment.scan_mark.index 1060 } 1061 1062 // Create a token and append it to the queue. 1063 token := yaml_token_t{ 1064 typ: yaml_BLOCK_END_TOKEN, 1065 start_mark: block_mark, 1066 end_mark: block_mark, 1067 } 1068 yaml_insert_token(parser, -1, &token) 1069 1070 // Pop the indentation level. 1071 parser.indent = parser.indents[len(parser.indents)-1] 1072 parser.indents = parser.indents[:len(parser.indents)-1] 1073 } 1074 return true 1075} 1076 1077// Initialize the scanner and produce the STREAM-START token. 1078func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { 1079 1080 // Set the initial indentation. 1081 parser.indent = -1 1082 1083 // Initialize the simple key stack. 1084 parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) 1085 1086 parser.simple_keys_by_tok = make(map[int]int) 1087 1088 // A simple key is allowed at the beginning of the stream. 1089 parser.simple_key_allowed = true 1090 1091 // We have started. 1092 parser.stream_start_produced = true 1093 1094 // Create the STREAM-START token and append it to the queue. 1095 token := yaml_token_t{ 1096 typ: yaml_STREAM_START_TOKEN, 1097 start_mark: parser.mark, 1098 end_mark: parser.mark, 1099 encoding: parser.encoding, 1100 } 1101 yaml_insert_token(parser, -1, &token) 1102 return true 1103} 1104 1105// Produce the STREAM-END token and shut down the scanner. 1106func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { 1107 1108 // Force new line. 1109 if parser.mark.column != 0 { 1110 parser.mark.column = 0 1111 parser.mark.line++ 1112 } 1113 1114 // Reset the indentation level. 1115 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1116 return false 1117 } 1118 1119 // Reset simple keys. 1120 if !yaml_parser_remove_simple_key(parser) { 1121 return false 1122 } 1123 1124 parser.simple_key_allowed = false 1125 1126 // Create the STREAM-END token and append it to the queue. 1127 token := yaml_token_t{ 1128 typ: yaml_STREAM_END_TOKEN, 1129 start_mark: parser.mark, 1130 end_mark: parser.mark, 1131 } 1132 yaml_insert_token(parser, -1, &token) 1133 return true 1134} 1135 1136// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 1137func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { 1138 // Reset the indentation level. 1139 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1140 return false 1141 } 1142 1143 // Reset simple keys. 1144 if !yaml_parser_remove_simple_key(parser) { 1145 return false 1146 } 1147 1148 parser.simple_key_allowed = false 1149 1150 // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. 1151 token := yaml_token_t{} 1152 if !yaml_parser_scan_directive(parser, &token) { 1153 return false 1154 } 1155 // Append the token to the queue. 1156 yaml_insert_token(parser, -1, &token) 1157 return true 1158} 1159 1160// Produce the DOCUMENT-START or DOCUMENT-END token. 1161func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1162 // Reset the indentation level. 1163 if !yaml_parser_unroll_indent(parser, -1, parser.mark) { 1164 return false 1165 } 1166 1167 // Reset simple keys. 1168 if !yaml_parser_remove_simple_key(parser) { 1169 return false 1170 } 1171 1172 parser.simple_key_allowed = false 1173 1174 // Consume the token. 1175 start_mark := parser.mark 1176 1177 skip(parser) 1178 skip(parser) 1179 skip(parser) 1180 1181 end_mark := parser.mark 1182 1183 // Create the DOCUMENT-START or DOCUMENT-END token. 1184 token := yaml_token_t{ 1185 typ: typ, 1186 start_mark: start_mark, 1187 end_mark: end_mark, 1188 } 1189 // Append the token to the queue. 1190 yaml_insert_token(parser, -1, &token) 1191 return true 1192} 1193 1194// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 1195func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1196 1197 // The indicators '[' and '{' may start a simple key. 1198 if !yaml_parser_save_simple_key(parser) { 1199 return false 1200 } 1201 1202 // Increase the flow level. 1203 if !yaml_parser_increase_flow_level(parser) { 1204 return false 1205 } 1206 1207 // A simple key may follow the indicators '[' and '{'. 1208 parser.simple_key_allowed = true 1209 1210 // Consume the token. 1211 start_mark := parser.mark 1212 skip(parser) 1213 end_mark := parser.mark 1214 1215 // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. 1216 token := yaml_token_t{ 1217 typ: typ, 1218 start_mark: start_mark, 1219 end_mark: end_mark, 1220 } 1221 // Append the token to the queue. 1222 yaml_insert_token(parser, -1, &token) 1223 return true 1224} 1225 1226// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 1227func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1228 // Reset any potential simple key on the current flow level. 1229 if !yaml_parser_remove_simple_key(parser) { 1230 return false 1231 } 1232 1233 // Decrease the flow level. 1234 if !yaml_parser_decrease_flow_level(parser) { 1235 return false 1236 } 1237 1238 // No simple keys after the indicators ']' and '}'. 1239 parser.simple_key_allowed = false 1240 1241 // Consume the token. 1242 1243 start_mark := parser.mark 1244 skip(parser) 1245 end_mark := parser.mark 1246 1247 // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. 1248 token := yaml_token_t{ 1249 typ: typ, 1250 start_mark: start_mark, 1251 end_mark: end_mark, 1252 } 1253 // Append the token to the queue. 1254 yaml_insert_token(parser, -1, &token) 1255 return true 1256} 1257 1258// Produce the FLOW-ENTRY token. 1259func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { 1260 // Reset any potential simple keys on the current flow level. 1261 if !yaml_parser_remove_simple_key(parser) { 1262 return false 1263 } 1264 1265 // Simple keys are allowed after ','. 1266 parser.simple_key_allowed = true 1267 1268 // Consume the token. 1269 start_mark := parser.mark 1270 skip(parser) 1271 end_mark := parser.mark 1272 1273 // Create the FLOW-ENTRY token and append it to the queue. 1274 token := yaml_token_t{ 1275 typ: yaml_FLOW_ENTRY_TOKEN, 1276 start_mark: start_mark, 1277 end_mark: end_mark, 1278 } 1279 yaml_insert_token(parser, -1, &token) 1280 return true 1281} 1282 1283// Produce the BLOCK-ENTRY token. 1284func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { 1285 // Check if the scanner is in the block context. 1286 if parser.flow_level == 0 { 1287 // Check if we are allowed to start a new entry. 1288 if !parser.simple_key_allowed { 1289 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1290 "block sequence entries are not allowed in this context") 1291 } 1292 // Add the BLOCK-SEQUENCE-START token if needed. 1293 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { 1294 return false 1295 } 1296 } else { 1297 // It is an error for the '-' indicator to occur in the flow context, 1298 // but we let the Parser detect and report about it because the Parser 1299 // is able to point to the context. 1300 } 1301 1302 // Reset any potential simple keys on the current flow level. 1303 if !yaml_parser_remove_simple_key(parser) { 1304 return false 1305 } 1306 1307 // Simple keys are allowed after '-'. 1308 parser.simple_key_allowed = true 1309 1310 // Consume the token. 1311 start_mark := parser.mark 1312 skip(parser) 1313 end_mark := parser.mark 1314 1315 // Create the BLOCK-ENTRY token and append it to the queue. 1316 token := yaml_token_t{ 1317 typ: yaml_BLOCK_ENTRY_TOKEN, 1318 start_mark: start_mark, 1319 end_mark: end_mark, 1320 } 1321 yaml_insert_token(parser, -1, &token) 1322 return true 1323} 1324 1325// Produce the KEY token. 1326func yaml_parser_fetch_key(parser *yaml_parser_t) bool { 1327 1328 // In the block context, additional checks are required. 1329 if parser.flow_level == 0 { 1330 // Check if we are allowed to start a new key (not nessesary simple). 1331 if !parser.simple_key_allowed { 1332 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1333 "mapping keys are not allowed in this context") 1334 } 1335 // Add the BLOCK-MAPPING-START token if needed. 1336 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1337 return false 1338 } 1339 } 1340 1341 // Reset any potential simple keys on the current flow level. 1342 if !yaml_parser_remove_simple_key(parser) { 1343 return false 1344 } 1345 1346 // Simple keys are allowed after '?' in the block context. 1347 parser.simple_key_allowed = parser.flow_level == 0 1348 1349 // Consume the token. 1350 start_mark := parser.mark 1351 skip(parser) 1352 end_mark := parser.mark 1353 1354 // Create the KEY token and append it to the queue. 1355 token := yaml_token_t{ 1356 typ: yaml_KEY_TOKEN, 1357 start_mark: start_mark, 1358 end_mark: end_mark, 1359 } 1360 yaml_insert_token(parser, -1, &token) 1361 return true 1362} 1363 1364// Produce the VALUE token. 1365func yaml_parser_fetch_value(parser *yaml_parser_t) bool { 1366 1367 simple_key := &parser.simple_keys[len(parser.simple_keys)-1] 1368 1369 // Have we found a simple key? 1370 if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { 1371 return false 1372 1373 } else if valid { 1374 1375 // Create the KEY token and insert it into the queue. 1376 token := yaml_token_t{ 1377 typ: yaml_KEY_TOKEN, 1378 start_mark: simple_key.mark, 1379 end_mark: simple_key.mark, 1380 } 1381 yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) 1382 1383 // In the block context, we may need to add the BLOCK-MAPPING-START token. 1384 if !yaml_parser_roll_indent(parser, simple_key.mark.column, 1385 simple_key.token_number, 1386 yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { 1387 return false 1388 } 1389 1390 // Remove the simple key. 1391 simple_key.possible = false 1392 delete(parser.simple_keys_by_tok, simple_key.token_number) 1393 1394 // A simple key cannot follow another simple key. 1395 parser.simple_key_allowed = false 1396 1397 } else { 1398 // The ':' indicator follows a complex key. 1399 1400 // In the block context, extra checks are required. 1401 if parser.flow_level == 0 { 1402 1403 // Check if we are allowed to start a complex value. 1404 if !parser.simple_key_allowed { 1405 return yaml_parser_set_scanner_error(parser, "", parser.mark, 1406 "mapping values are not allowed in this context") 1407 } 1408 1409 // Add the BLOCK-MAPPING-START token if needed. 1410 if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { 1411 return false 1412 } 1413 } 1414 1415 // Simple keys after ':' are allowed in the block context. 1416 parser.simple_key_allowed = parser.flow_level == 0 1417 } 1418 1419 // Consume the token. 1420 start_mark := parser.mark 1421 skip(parser) 1422 end_mark := parser.mark 1423 1424 // Create the VALUE token and append it to the queue. 1425 token := yaml_token_t{ 1426 typ: yaml_VALUE_TOKEN, 1427 start_mark: start_mark, 1428 end_mark: end_mark, 1429 } 1430 yaml_insert_token(parser, -1, &token) 1431 return true 1432} 1433 1434// Produce the ALIAS or ANCHOR token. 1435func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { 1436 // An anchor or an alias could be a simple key. 1437 if !yaml_parser_save_simple_key(parser) { 1438 return false 1439 } 1440 1441 // A simple key cannot follow an anchor or an alias. 1442 parser.simple_key_allowed = false 1443 1444 // Create the ALIAS or ANCHOR token and append it to the queue. 1445 var token yaml_token_t 1446 if !yaml_parser_scan_anchor(parser, &token, typ) { 1447 return false 1448 } 1449 yaml_insert_token(parser, -1, &token) 1450 return true 1451} 1452 1453// Produce the TAG token. 1454func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { 1455 // A tag could be a simple key. 1456 if !yaml_parser_save_simple_key(parser) { 1457 return false 1458 } 1459 1460 // A simple key cannot follow a tag. 1461 parser.simple_key_allowed = false 1462 1463 // Create the TAG token and append it to the queue. 1464 var token yaml_token_t 1465 if !yaml_parser_scan_tag(parser, &token) { 1466 return false 1467 } 1468 yaml_insert_token(parser, -1, &token) 1469 return true 1470} 1471 1472// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 1473func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { 1474 // Remove any potential simple keys. 1475 if !yaml_parser_remove_simple_key(parser) { 1476 return false 1477 } 1478 1479 // A simple key may follow a block scalar. 1480 parser.simple_key_allowed = true 1481 1482 // Create the SCALAR token and append it to the queue. 1483 var token yaml_token_t 1484 if !yaml_parser_scan_block_scalar(parser, &token, literal) { 1485 return false 1486 } 1487 yaml_insert_token(parser, -1, &token) 1488 return true 1489} 1490 1491// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 1492func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { 1493 // A plain scalar could be a simple key. 1494 if !yaml_parser_save_simple_key(parser) { 1495 return false 1496 } 1497 1498 // A simple key cannot follow a flow scalar. 1499 parser.simple_key_allowed = false 1500 1501 // Create the SCALAR token and append it to the queue. 1502 var token yaml_token_t 1503 if !yaml_parser_scan_flow_scalar(parser, &token, single) { 1504 return false 1505 } 1506 yaml_insert_token(parser, -1, &token) 1507 return true 1508} 1509 1510// Produce the SCALAR(...,plain) token. 1511func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { 1512 // A plain scalar could be a simple key. 1513 if !yaml_parser_save_simple_key(parser) { 1514 return false 1515 } 1516 1517 // A simple key cannot follow a flow scalar. 1518 parser.simple_key_allowed = false 1519 1520 // Create the SCALAR token and append it to the queue. 1521 var token yaml_token_t 1522 if !yaml_parser_scan_plain_scalar(parser, &token) { 1523 return false 1524 } 1525 yaml_insert_token(parser, -1, &token) 1526 return true 1527} 1528 1529// Eat whitespaces and comments until the next token is found. 1530func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { 1531 1532 scan_mark := parser.mark 1533 1534 // Until the next token is not found. 1535 for { 1536 // Allow the BOM mark to start a line. 1537 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1538 return false 1539 } 1540 if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { 1541 skip(parser) 1542 } 1543 1544 // Eat whitespaces. 1545 // Tabs are allowed: 1546 // - in the flow context 1547 // - in the block context, but not at the beginning of the line or 1548 // after '-', '?', or ':' (complex value). 1549 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1550 return false 1551 } 1552 1553 for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { 1554 skip(parser) 1555 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1556 return false 1557 } 1558 } 1559 1560 // Check if we just had a line comment under a sequence entry that 1561 // looks more like a header to the following content. Similar to this: 1562 // 1563 // - # The comment 1564 // - Some data 1565 // 1566 // If so, transform the line comment to a head comment and reposition. 1567 if len(parser.comments) > 0 && len(parser.tokens) > 1 { 1568 tokenA := parser.tokens[len(parser.tokens)-2] 1569 tokenB := parser.tokens[len(parser.tokens)-1] 1570 comment := &parser.comments[len(parser.comments)-1] 1571 if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) { 1572 // If it was in the prior line, reposition so it becomes a 1573 // header of the follow up token. Otherwise, keep it in place 1574 // so it becomes a header of the former. 1575 comment.head = comment.line 1576 comment.line = nil 1577 if comment.start_mark.line == parser.mark.line-1 { 1578 comment.token_mark = parser.mark 1579 } 1580 } 1581 } 1582 1583 // Eat a comment until a line break. 1584 if parser.buffer[parser.buffer_pos] == '#' { 1585 if !yaml_parser_scan_comments(parser, scan_mark) { 1586 return false 1587 } 1588 } 1589 1590 // If it is a line break, eat it. 1591 if is_break(parser.buffer, parser.buffer_pos) { 1592 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1593 return false 1594 } 1595 skip_line(parser) 1596 1597 // In the block context, a new line may start a simple key. 1598 if parser.flow_level == 0 { 1599 parser.simple_key_allowed = true 1600 } 1601 } else { 1602 break // We have found a token. 1603 } 1604 } 1605 1606 return true 1607} 1608 1609// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 1610// 1611// Scope: 1612// %YAML 1.1 # a comment \n 1613// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1614// %TAG !yaml! tag:yaml.org,2002: \n 1615// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1616// 1617func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { 1618 // Eat '%'. 1619 start_mark := parser.mark 1620 skip(parser) 1621 1622 // Scan the directive name. 1623 var name []byte 1624 if !yaml_parser_scan_directive_name(parser, start_mark, &name) { 1625 return false 1626 } 1627 1628 // Is it a YAML directive? 1629 if bytes.Equal(name, []byte("YAML")) { 1630 // Scan the VERSION directive value. 1631 var major, minor int8 1632 if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { 1633 return false 1634 } 1635 end_mark := parser.mark 1636 1637 // Create a VERSION-DIRECTIVE token. 1638 *token = yaml_token_t{ 1639 typ: yaml_VERSION_DIRECTIVE_TOKEN, 1640 start_mark: start_mark, 1641 end_mark: end_mark, 1642 major: major, 1643 minor: minor, 1644 } 1645 1646 // Is it a TAG directive? 1647 } else if bytes.Equal(name, []byte("TAG")) { 1648 // Scan the TAG directive value. 1649 var handle, prefix []byte 1650 if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { 1651 return false 1652 } 1653 end_mark := parser.mark 1654 1655 // Create a TAG-DIRECTIVE token. 1656 *token = yaml_token_t{ 1657 typ: yaml_TAG_DIRECTIVE_TOKEN, 1658 start_mark: start_mark, 1659 end_mark: end_mark, 1660 value: handle, 1661 prefix: prefix, 1662 } 1663 1664 // Unknown directive. 1665 } else { 1666 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1667 start_mark, "found unknown directive name") 1668 return false 1669 } 1670 1671 // Eat the rest of the line including any comments. 1672 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1673 return false 1674 } 1675 1676 for is_blank(parser.buffer, parser.buffer_pos) { 1677 skip(parser) 1678 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1679 return false 1680 } 1681 } 1682 1683 if parser.buffer[parser.buffer_pos] == '#' { 1684 // [Go] Discard this inline comment for the time being. 1685 //if !yaml_parser_scan_line_comment(parser, start_mark) { 1686 // return false 1687 //} 1688 for !is_breakz(parser.buffer, parser.buffer_pos) { 1689 skip(parser) 1690 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1691 return false 1692 } 1693 } 1694 } 1695 1696 // Check if we are at the end of the line. 1697 if !is_breakz(parser.buffer, parser.buffer_pos) { 1698 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1699 start_mark, "did not find expected comment or line break") 1700 return false 1701 } 1702 1703 // Eat a line break. 1704 if is_break(parser.buffer, parser.buffer_pos) { 1705 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1706 return false 1707 } 1708 skip_line(parser) 1709 } 1710 1711 return true 1712} 1713 1714// Scan the directive name. 1715// 1716// Scope: 1717// %YAML 1.1 # a comment \n 1718// ^^^^ 1719// %TAG !yaml! tag:yaml.org,2002: \n 1720// ^^^ 1721// 1722func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { 1723 // Consume the directive name. 1724 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1725 return false 1726 } 1727 1728 var s []byte 1729 for is_alpha(parser.buffer, parser.buffer_pos) { 1730 s = read(parser, s) 1731 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1732 return false 1733 } 1734 } 1735 1736 // Check if the name is empty. 1737 if len(s) == 0 { 1738 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1739 start_mark, "could not find expected directive name") 1740 return false 1741 } 1742 1743 // Check for an blank character after the name. 1744 if !is_blankz(parser.buffer, parser.buffer_pos) { 1745 yaml_parser_set_scanner_error(parser, "while scanning a directive", 1746 start_mark, "found unexpected non-alphabetical character") 1747 return false 1748 } 1749 *name = s 1750 return true 1751} 1752 1753// Scan the value of VERSION-DIRECTIVE. 1754// 1755// Scope: 1756// %YAML 1.1 # a comment \n 1757// ^^^^^^ 1758func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { 1759 // Eat whitespaces. 1760 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1761 return false 1762 } 1763 for is_blank(parser.buffer, parser.buffer_pos) { 1764 skip(parser) 1765 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1766 return false 1767 } 1768 } 1769 1770 // Consume the major version number. 1771 if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { 1772 return false 1773 } 1774 1775 // Eat '.'. 1776 if parser.buffer[parser.buffer_pos] != '.' { 1777 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1778 start_mark, "did not find expected digit or '.' character") 1779 } 1780 1781 skip(parser) 1782 1783 // Consume the minor version number. 1784 if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { 1785 return false 1786 } 1787 return true 1788} 1789 1790const max_number_length = 2 1791 1792// Scan the version number of VERSION-DIRECTIVE. 1793// 1794// Scope: 1795// %YAML 1.1 # a comment \n 1796// ^ 1797// %YAML 1.1 # a comment \n 1798// ^ 1799func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { 1800 1801 // Repeat while the next character is digit. 1802 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1803 return false 1804 } 1805 var value, length int8 1806 for is_digit(parser.buffer, parser.buffer_pos) { 1807 // Check if the number is too long. 1808 length++ 1809 if length > max_number_length { 1810 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1811 start_mark, "found extremely long version number") 1812 } 1813 value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) 1814 skip(parser) 1815 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1816 return false 1817 } 1818 } 1819 1820 // Check if the number was present. 1821 if length == 0 { 1822 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 1823 start_mark, "did not find expected version number") 1824 } 1825 *number = value 1826 return true 1827} 1828 1829// Scan the value of a TAG-DIRECTIVE token. 1830// 1831// Scope: 1832// %TAG !yaml! tag:yaml.org,2002: \n 1833// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1834// 1835func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { 1836 var handle_value, prefix_value []byte 1837 1838 // Eat whitespaces. 1839 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1840 return false 1841 } 1842 1843 for is_blank(parser.buffer, parser.buffer_pos) { 1844 skip(parser) 1845 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1846 return false 1847 } 1848 } 1849 1850 // Scan a handle. 1851 if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { 1852 return false 1853 } 1854 1855 // Expect a whitespace. 1856 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1857 return false 1858 } 1859 if !is_blank(parser.buffer, parser.buffer_pos) { 1860 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1861 start_mark, "did not find expected whitespace") 1862 return false 1863 } 1864 1865 // Eat whitespaces. 1866 for is_blank(parser.buffer, parser.buffer_pos) { 1867 skip(parser) 1868 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1869 return false 1870 } 1871 } 1872 1873 // Scan a prefix. 1874 if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { 1875 return false 1876 } 1877 1878 // Expect a whitespace or line break. 1879 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1880 return false 1881 } 1882 if !is_blankz(parser.buffer, parser.buffer_pos) { 1883 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 1884 start_mark, "did not find expected whitespace or line break") 1885 return false 1886 } 1887 1888 *handle = handle_value 1889 *prefix = prefix_value 1890 return true 1891} 1892 1893func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { 1894 var s []byte 1895 1896 // Eat the indicator character. 1897 start_mark := parser.mark 1898 skip(parser) 1899 1900 // Consume the value. 1901 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1902 return false 1903 } 1904 1905 for is_alpha(parser.buffer, parser.buffer_pos) { 1906 s = read(parser, s) 1907 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 1908 return false 1909 } 1910 } 1911 1912 end_mark := parser.mark 1913 1914 /* 1915 * Check if length of the anchor is greater than 0 and it is followed by 1916 * a whitespace character or one of the indicators: 1917 * 1918 * '?', ':', ',', ']', '}', '%', '@', '`'. 1919 */ 1920 1921 if len(s) == 0 || 1922 !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || 1923 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || 1924 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || 1925 parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || 1926 parser.buffer[parser.buffer_pos] == '`') { 1927 context := "while scanning an alias" 1928 if typ == yaml_ANCHOR_TOKEN { 1929 context = "while scanning an anchor" 1930 } 1931 yaml_parser_set_scanner_error(parser, context, start_mark, 1932 "did not find expected alphabetic or numeric character") 1933 return false 1934 } 1935 1936 // Create a token. 1937 *token = yaml_token_t{ 1938 typ: typ, 1939 start_mark: start_mark, 1940 end_mark: end_mark, 1941 value: s, 1942 } 1943 1944 return true 1945} 1946 1947/* 1948 * Scan a TAG token. 1949 */ 1950 1951func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { 1952 var handle, suffix []byte 1953 1954 start_mark := parser.mark 1955 1956 // Check if the tag is in the canonical form. 1957 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 1958 return false 1959 } 1960 1961 if parser.buffer[parser.buffer_pos+1] == '<' { 1962 // Keep the handle as '' 1963 1964 // Eat '!<' 1965 skip(parser) 1966 skip(parser) 1967 1968 // Consume the tag value. 1969 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1970 return false 1971 } 1972 1973 // Check for '>' and eat it. 1974 if parser.buffer[parser.buffer_pos] != '>' { 1975 yaml_parser_set_scanner_error(parser, "while scanning a tag", 1976 start_mark, "did not find the expected '>'") 1977 return false 1978 } 1979 1980 skip(parser) 1981 } else { 1982 // The tag has either the '!suffix' or the '!handle!suffix' form. 1983 1984 // First, try to scan a handle. 1985 if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { 1986 return false 1987 } 1988 1989 // Check if it is, indeed, handle. 1990 if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { 1991 // Scan the suffix now. 1992 if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { 1993 return false 1994 } 1995 } else { 1996 // It wasn't a handle after all. Scan the rest of the tag. 1997 if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { 1998 return false 1999 } 2000 2001 // Set the handle to '!'. 2002 handle = []byte{'!'} 2003 2004 // A special case: the '!' tag. Set the handle to '' and the 2005 // suffix to '!'. 2006 if len(suffix) == 0 { 2007 handle, suffix = suffix, handle 2008 } 2009 } 2010 } 2011 2012 // Check the character which ends the tag. 2013 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2014 return false 2015 } 2016 if !is_blankz(parser.buffer, parser.buffer_pos) { 2017 yaml_parser_set_scanner_error(parser, "while scanning a tag", 2018 start_mark, "did not find expected whitespace or line break") 2019 return false 2020 } 2021 2022 end_mark := parser.mark 2023 2024 // Create a token. 2025 *token = yaml_token_t{ 2026 typ: yaml_TAG_TOKEN, 2027 start_mark: start_mark, 2028 end_mark: end_mark, 2029 value: handle, 2030 suffix: suffix, 2031 } 2032 return true 2033} 2034 2035// Scan a tag handle. 2036func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { 2037 // Check the initial '!' character. 2038 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2039 return false 2040 } 2041 if parser.buffer[parser.buffer_pos] != '!' { 2042 yaml_parser_set_scanner_tag_error(parser, directive, 2043 start_mark, "did not find expected '!'") 2044 return false 2045 } 2046 2047 var s []byte 2048 2049 // Copy the '!' character. 2050 s = read(parser, s) 2051 2052 // Copy all subsequent alphabetical and numerical characters. 2053 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2054 return false 2055 } 2056 for is_alpha(parser.buffer, parser.buffer_pos) { 2057 s = read(parser, s) 2058 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2059 return false 2060 } 2061 } 2062 2063 // Check if the trailing character is '!' and copy it. 2064 if parser.buffer[parser.buffer_pos] == '!' { 2065 s = read(parser, s) 2066 } else { 2067 // It's either the '!' tag or not really a tag handle. If it's a %TAG 2068 // directive, it's an error. If it's a tag token, it must be a part of URI. 2069 if directive && string(s) != "!" { 2070 yaml_parser_set_scanner_tag_error(parser, directive, 2071 start_mark, "did not find expected '!'") 2072 return false 2073 } 2074 } 2075 2076 *handle = s 2077 return true 2078} 2079 2080// Scan a tag. 2081func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { 2082 //size_t length = head ? strlen((char *)head) : 0 2083 var s []byte 2084 hasTag := len(head) > 0 2085 2086 // Copy the head if needed. 2087 // 2088 // Note that we don't copy the leading '!' character. 2089 if len(head) > 1 { 2090 s = append(s, head[1:]...) 2091 } 2092 2093 // Scan the tag. 2094 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2095 return false 2096 } 2097 2098 // The set of characters that may appear in URI is as follows: 2099 // 2100 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 2101 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 2102 // '%'. 2103 // [Go] TODO Convert this into more reasonable logic. 2104 for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || 2105 parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || 2106 parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || 2107 parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || 2108 parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || 2109 parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || 2110 parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || 2111 parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || 2112 parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || 2113 parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || 2114 parser.buffer[parser.buffer_pos] == '%' { 2115 // Check if it is a URI-escape sequence. 2116 if parser.buffer[parser.buffer_pos] == '%' { 2117 if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { 2118 return false 2119 } 2120 } else { 2121 s = read(parser, s) 2122 } 2123 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2124 return false 2125 } 2126 hasTag = true 2127 } 2128 2129 if !hasTag { 2130 yaml_parser_set_scanner_tag_error(parser, directive, 2131 start_mark, "did not find expected tag URI") 2132 return false 2133 } 2134 *uri = s 2135 return true 2136} 2137 2138// Decode an URI-escape sequence corresponding to a single UTF-8 character. 2139func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { 2140 2141 // Decode the required number of characters. 2142 w := 1024 2143 for w > 0 { 2144 // Check for a URI-escaped octet. 2145 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2146 return false 2147 } 2148 2149 if !(parser.buffer[parser.buffer_pos] == '%' && 2150 is_hex(parser.buffer, parser.buffer_pos+1) && 2151 is_hex(parser.buffer, parser.buffer_pos+2)) { 2152 return yaml_parser_set_scanner_tag_error(parser, directive, 2153 start_mark, "did not find URI escaped octet") 2154 } 2155 2156 // Get the octet. 2157 octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) 2158 2159 // If it is the leading octet, determine the length of the UTF-8 sequence. 2160 if w == 1024 { 2161 w = width(octet) 2162 if w == 0 { 2163 return yaml_parser_set_scanner_tag_error(parser, directive, 2164 start_mark, "found an incorrect leading UTF-8 octet") 2165 } 2166 } else { 2167 // Check if the trailing octet is correct. 2168 if octet&0xC0 != 0x80 { 2169 return yaml_parser_set_scanner_tag_error(parser, directive, 2170 start_mark, "found an incorrect trailing UTF-8 octet") 2171 } 2172 } 2173 2174 // Copy the octet and move the pointers. 2175 *s = append(*s, octet) 2176 skip(parser) 2177 skip(parser) 2178 skip(parser) 2179 w-- 2180 } 2181 return true 2182} 2183 2184// Scan a block scalar. 2185func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { 2186 // Eat the indicator '|' or '>'. 2187 start_mark := parser.mark 2188 skip(parser) 2189 2190 // Scan the additional block scalar indicators. 2191 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2192 return false 2193 } 2194 2195 // Check for a chomping indicator. 2196 var chomping, increment int 2197 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2198 // Set the chomping method and eat the indicator. 2199 if parser.buffer[parser.buffer_pos] == '+' { 2200 chomping = +1 2201 } else { 2202 chomping = -1 2203 } 2204 skip(parser) 2205 2206 // Check for an indentation indicator. 2207 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2208 return false 2209 } 2210 if is_digit(parser.buffer, parser.buffer_pos) { 2211 // Check that the indentation is greater than 0. 2212 if parser.buffer[parser.buffer_pos] == '0' { 2213 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2214 start_mark, "found an indentation indicator equal to 0") 2215 return false 2216 } 2217 2218 // Get the indentation level and eat the indicator. 2219 increment = as_digit(parser.buffer, parser.buffer_pos) 2220 skip(parser) 2221 } 2222 2223 } else if is_digit(parser.buffer, parser.buffer_pos) { 2224 // Do the same as above, but in the opposite order. 2225 2226 if parser.buffer[parser.buffer_pos] == '0' { 2227 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2228 start_mark, "found an indentation indicator equal to 0") 2229 return false 2230 } 2231 increment = as_digit(parser.buffer, parser.buffer_pos) 2232 skip(parser) 2233 2234 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2235 return false 2236 } 2237 if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { 2238 if parser.buffer[parser.buffer_pos] == '+' { 2239 chomping = +1 2240 } else { 2241 chomping = -1 2242 } 2243 skip(parser) 2244 } 2245 } 2246 2247 // Eat whitespaces and comments to the end of the line. 2248 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2249 return false 2250 } 2251 for is_blank(parser.buffer, parser.buffer_pos) { 2252 skip(parser) 2253 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2254 return false 2255 } 2256 } 2257 if parser.buffer[parser.buffer_pos] == '#' { 2258 // TODO Test this and then re-enable it. 2259 //if !yaml_parser_scan_line_comment(parser, start_mark) { 2260 // return false 2261 //} 2262 for !is_breakz(parser.buffer, parser.buffer_pos) { 2263 skip(parser) 2264 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2265 return false 2266 } 2267 } 2268 } 2269 2270 // Check if we are at the end of the line. 2271 if !is_breakz(parser.buffer, parser.buffer_pos) { 2272 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2273 start_mark, "did not find expected comment or line break") 2274 return false 2275 } 2276 2277 // Eat a line break. 2278 if is_break(parser.buffer, parser.buffer_pos) { 2279 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2280 return false 2281 } 2282 skip_line(parser) 2283 } 2284 2285 end_mark := parser.mark 2286 2287 // Set the indentation level if it was specified. 2288 var indent int 2289 if increment > 0 { 2290 if parser.indent >= 0 { 2291 indent = parser.indent + increment 2292 } else { 2293 indent = increment 2294 } 2295 } 2296 2297 // Scan the leading line breaks and determine the indentation level if needed. 2298 var s, leading_break, trailing_breaks []byte 2299 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2300 return false 2301 } 2302 2303 // Scan the block scalar content. 2304 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2305 return false 2306 } 2307 var leading_blank, trailing_blank bool 2308 for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { 2309 // We are at the beginning of a non-empty line. 2310 2311 // Is it a trailing whitespace? 2312 trailing_blank = is_blank(parser.buffer, parser.buffer_pos) 2313 2314 // Check if we need to fold the leading line break. 2315 if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { 2316 // Do we need to join the lines by space? 2317 if len(trailing_breaks) == 0 { 2318 s = append(s, ' ') 2319 } 2320 } else { 2321 s = append(s, leading_break...) 2322 } 2323 leading_break = leading_break[:0] 2324 2325 // Append the remaining line breaks. 2326 s = append(s, trailing_breaks...) 2327 trailing_breaks = trailing_breaks[:0] 2328 2329 // Is it a leading whitespace? 2330 leading_blank = is_blank(parser.buffer, parser.buffer_pos) 2331 2332 // Consume the current line. 2333 for !is_breakz(parser.buffer, parser.buffer_pos) { 2334 s = read(parser, s) 2335 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2336 return false 2337 } 2338 } 2339 2340 // Consume the line break. 2341 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2342 return false 2343 } 2344 2345 leading_break = read_line(parser, leading_break) 2346 2347 // Eat the following indentation spaces and line breaks. 2348 if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { 2349 return false 2350 } 2351 } 2352 2353 // Chomp the tail. 2354 if chomping != -1 { 2355 s = append(s, leading_break...) 2356 } 2357 if chomping == 1 { 2358 s = append(s, trailing_breaks...) 2359 } 2360 2361 // Create a token. 2362 *token = yaml_token_t{ 2363 typ: yaml_SCALAR_TOKEN, 2364 start_mark: start_mark, 2365 end_mark: end_mark, 2366 value: s, 2367 style: yaml_LITERAL_SCALAR_STYLE, 2368 } 2369 if !literal { 2370 token.style = yaml_FOLDED_SCALAR_STYLE 2371 } 2372 return true 2373} 2374 2375// Scan indentation spaces and line breaks for a block scalar. Determine the 2376// indentation level if needed. 2377func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { 2378 *end_mark = parser.mark 2379 2380 // Eat the indentation spaces and line breaks. 2381 max_indent := 0 2382 for { 2383 // Eat the indentation spaces. 2384 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2385 return false 2386 } 2387 for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { 2388 skip(parser) 2389 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2390 return false 2391 } 2392 } 2393 if parser.mark.column > max_indent { 2394 max_indent = parser.mark.column 2395 } 2396 2397 // Check for a tab character messing the indentation. 2398 if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { 2399 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 2400 start_mark, "found a tab character where an indentation space is expected") 2401 } 2402 2403 // Have we found a non-empty line? 2404 if !is_break(parser.buffer, parser.buffer_pos) { 2405 break 2406 } 2407 2408 // Consume the line break. 2409 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2410 return false 2411 } 2412 // [Go] Should really be returning breaks instead. 2413 *breaks = read_line(parser, *breaks) 2414 *end_mark = parser.mark 2415 } 2416 2417 // Determine the indentation level if needed. 2418 if *indent == 0 { 2419 *indent = max_indent 2420 if *indent < parser.indent+1 { 2421 *indent = parser.indent + 1 2422 } 2423 if *indent < 1 { 2424 *indent = 1 2425 } 2426 } 2427 return true 2428} 2429 2430// Scan a quoted scalar. 2431func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { 2432 // Eat the left quote. 2433 start_mark := parser.mark 2434 skip(parser) 2435 2436 // Consume the content of the quoted scalar. 2437 var s, leading_break, trailing_breaks, whitespaces []byte 2438 for { 2439 // Check that there are no document indicators at the beginning of the line. 2440 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2441 return false 2442 } 2443 2444 if parser.mark.column == 0 && 2445 ((parser.buffer[parser.buffer_pos+0] == '-' && 2446 parser.buffer[parser.buffer_pos+1] == '-' && 2447 parser.buffer[parser.buffer_pos+2] == '-') || 2448 (parser.buffer[parser.buffer_pos+0] == '.' && 2449 parser.buffer[parser.buffer_pos+1] == '.' && 2450 parser.buffer[parser.buffer_pos+2] == '.')) && 2451 is_blankz(parser.buffer, parser.buffer_pos+3) { 2452 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2453 start_mark, "found unexpected document indicator") 2454 return false 2455 } 2456 2457 // Check for EOF. 2458 if is_z(parser.buffer, parser.buffer_pos) { 2459 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 2460 start_mark, "found unexpected end of stream") 2461 return false 2462 } 2463 2464 // Consume non-blank characters. 2465 leading_blanks := false 2466 for !is_blankz(parser.buffer, parser.buffer_pos) { 2467 if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { 2468 // Is is an escaped single quote. 2469 s = append(s, '\'') 2470 skip(parser) 2471 skip(parser) 2472 2473 } else if single && parser.buffer[parser.buffer_pos] == '\'' { 2474 // It is a right single quote. 2475 break 2476 } else if !single && parser.buffer[parser.buffer_pos] == '"' { 2477 // It is a right double quote. 2478 break 2479 2480 } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { 2481 // It is an escaped line break. 2482 if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { 2483 return false 2484 } 2485 skip(parser) 2486 skip_line(parser) 2487 leading_blanks = true 2488 break 2489 2490 } else if !single && parser.buffer[parser.buffer_pos] == '\\' { 2491 // It is an escape sequence. 2492 code_length := 0 2493 2494 // Check the escape character. 2495 switch parser.buffer[parser.buffer_pos+1] { 2496 case '0': 2497 s = append(s, 0) 2498 case 'a': 2499 s = append(s, '\x07') 2500 case 'b': 2501 s = append(s, '\x08') 2502 case 't', '\t': 2503 s = append(s, '\x09') 2504 case 'n': 2505 s = append(s, '\x0A') 2506 case 'v': 2507 s = append(s, '\x0B') 2508 case 'f': 2509 s = append(s, '\x0C') 2510 case 'r': 2511 s = append(s, '\x0D') 2512 case 'e': 2513 s = append(s, '\x1B') 2514 case ' ': 2515 s = append(s, '\x20') 2516 case '"': 2517 s = append(s, '"') 2518 case '\'': 2519 s = append(s, '\'') 2520 case '\\': 2521 s = append(s, '\\') 2522 case 'N': // NEL (#x85) 2523 s = append(s, '\xC2') 2524 s = append(s, '\x85') 2525 case '_': // #xA0 2526 s = append(s, '\xC2') 2527 s = append(s, '\xA0') 2528 case 'L': // LS (#x2028) 2529 s = append(s, '\xE2') 2530 s = append(s, '\x80') 2531 s = append(s, '\xA8') 2532 case 'P': // PS (#x2029) 2533 s = append(s, '\xE2') 2534 s = append(s, '\x80') 2535 s = append(s, '\xA9') 2536 case 'x': 2537 code_length = 2 2538 case 'u': 2539 code_length = 4 2540 case 'U': 2541 code_length = 8 2542 default: 2543 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2544 start_mark, "found unknown escape character") 2545 return false 2546 } 2547 2548 skip(parser) 2549 skip(parser) 2550 2551 // Consume an arbitrary escape code. 2552 if code_length > 0 { 2553 var value int 2554 2555 // Scan the character value. 2556 if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { 2557 return false 2558 } 2559 for k := 0; k < code_length; k++ { 2560 if !is_hex(parser.buffer, parser.buffer_pos+k) { 2561 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2562 start_mark, "did not find expected hexdecimal number") 2563 return false 2564 } 2565 value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) 2566 } 2567 2568 // Check the value and write the character. 2569 if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { 2570 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 2571 start_mark, "found invalid Unicode character escape code") 2572 return false 2573 } 2574 if value <= 0x7F { 2575 s = append(s, byte(value)) 2576 } else if value <= 0x7FF { 2577 s = append(s, byte(0xC0+(value>>6))) 2578 s = append(s, byte(0x80+(value&0x3F))) 2579 } else if value <= 0xFFFF { 2580 s = append(s, byte(0xE0+(value>>12))) 2581 s = append(s, byte(0x80+((value>>6)&0x3F))) 2582 s = append(s, byte(0x80+(value&0x3F))) 2583 } else { 2584 s = append(s, byte(0xF0+(value>>18))) 2585 s = append(s, byte(0x80+((value>>12)&0x3F))) 2586 s = append(s, byte(0x80+((value>>6)&0x3F))) 2587 s = append(s, byte(0x80+(value&0x3F))) 2588 } 2589 2590 // Advance the pointer. 2591 for k := 0; k < code_length; k++ { 2592 skip(parser) 2593 } 2594 } 2595 } else { 2596 // It is a non-escaped non-blank character. 2597 s = read(parser, s) 2598 } 2599 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2600 return false 2601 } 2602 } 2603 2604 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2605 return false 2606 } 2607 2608 // Check if we are at the end of the scalar. 2609 if single { 2610 if parser.buffer[parser.buffer_pos] == '\'' { 2611 break 2612 } 2613 } else { 2614 if parser.buffer[parser.buffer_pos] == '"' { 2615 break 2616 } 2617 } 2618 2619 // Consume blank characters. 2620 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2621 if is_blank(parser.buffer, parser.buffer_pos) { 2622 // Consume a space or a tab character. 2623 if !leading_blanks { 2624 whitespaces = read(parser, whitespaces) 2625 } else { 2626 skip(parser) 2627 } 2628 } else { 2629 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2630 return false 2631 } 2632 2633 // Check if it is a first line break. 2634 if !leading_blanks { 2635 whitespaces = whitespaces[:0] 2636 leading_break = read_line(parser, leading_break) 2637 leading_blanks = true 2638 } else { 2639 trailing_breaks = read_line(parser, trailing_breaks) 2640 } 2641 } 2642 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2643 return false 2644 } 2645 } 2646 2647 // Join the whitespaces or fold line breaks. 2648 if leading_blanks { 2649 // Do we need to fold line breaks? 2650 if len(leading_break) > 0 && leading_break[0] == '\n' { 2651 if len(trailing_breaks) == 0 { 2652 s = append(s, ' ') 2653 } else { 2654 s = append(s, trailing_breaks...) 2655 } 2656 } else { 2657 s = append(s, leading_break...) 2658 s = append(s, trailing_breaks...) 2659 } 2660 trailing_breaks = trailing_breaks[:0] 2661 leading_break = leading_break[:0] 2662 } else { 2663 s = append(s, whitespaces...) 2664 whitespaces = whitespaces[:0] 2665 } 2666 } 2667 2668 // Eat the right quote. 2669 skip(parser) 2670 end_mark := parser.mark 2671 2672 // Create a token. 2673 *token = yaml_token_t{ 2674 typ: yaml_SCALAR_TOKEN, 2675 start_mark: start_mark, 2676 end_mark: end_mark, 2677 value: s, 2678 style: yaml_SINGLE_QUOTED_SCALAR_STYLE, 2679 } 2680 if !single { 2681 token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE 2682 } 2683 return true 2684} 2685 2686// Scan a plain scalar. 2687func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { 2688 2689 var s, leading_break, trailing_breaks, whitespaces []byte 2690 var leading_blanks bool 2691 var indent = parser.indent + 1 2692 2693 start_mark := parser.mark 2694 end_mark := parser.mark 2695 2696 // Consume the content of the plain scalar. 2697 for { 2698 // Check for a document indicator. 2699 if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { 2700 return false 2701 } 2702 if parser.mark.column == 0 && 2703 ((parser.buffer[parser.buffer_pos+0] == '-' && 2704 parser.buffer[parser.buffer_pos+1] == '-' && 2705 parser.buffer[parser.buffer_pos+2] == '-') || 2706 (parser.buffer[parser.buffer_pos+0] == '.' && 2707 parser.buffer[parser.buffer_pos+1] == '.' && 2708 parser.buffer[parser.buffer_pos+2] == '.')) && 2709 is_blankz(parser.buffer, parser.buffer_pos+3) { 2710 break 2711 } 2712 2713 // Check for a comment. 2714 if parser.buffer[parser.buffer_pos] == '#' { 2715 break 2716 } 2717 2718 // Consume non-blank characters. 2719 for !is_blankz(parser.buffer, parser.buffer_pos) { 2720 2721 // Check for indicators that may end a plain scalar. 2722 if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || 2723 (parser.flow_level > 0 && 2724 (parser.buffer[parser.buffer_pos] == ',' || 2725 parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || 2726 parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || 2727 parser.buffer[parser.buffer_pos] == '}')) { 2728 break 2729 } 2730 2731 // Check if we need to join whitespaces and breaks. 2732 if leading_blanks || len(whitespaces) > 0 { 2733 if leading_blanks { 2734 // Do we need to fold line breaks? 2735 if leading_break[0] == '\n' { 2736 if len(trailing_breaks) == 0 { 2737 s = append(s, ' ') 2738 } else { 2739 s = append(s, trailing_breaks...) 2740 } 2741 } else { 2742 s = append(s, leading_break...) 2743 s = append(s, trailing_breaks...) 2744 } 2745 trailing_breaks = trailing_breaks[:0] 2746 leading_break = leading_break[:0] 2747 leading_blanks = false 2748 } else { 2749 s = append(s, whitespaces...) 2750 whitespaces = whitespaces[:0] 2751 } 2752 } 2753 2754 // Copy the character. 2755 s = read(parser, s) 2756 2757 end_mark = parser.mark 2758 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2759 return false 2760 } 2761 } 2762 2763 // Is it the end? 2764 if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { 2765 break 2766 } 2767 2768 // Consume blank characters. 2769 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2770 return false 2771 } 2772 2773 for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { 2774 if is_blank(parser.buffer, parser.buffer_pos) { 2775 2776 // Check for tab characters that abuse indentation. 2777 if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { 2778 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 2779 start_mark, "found a tab character that violates indentation") 2780 return false 2781 } 2782 2783 // Consume a space or a tab character. 2784 if !leading_blanks { 2785 whitespaces = read(parser, whitespaces) 2786 } else { 2787 skip(parser) 2788 } 2789 } else { 2790 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2791 return false 2792 } 2793 2794 // Check if it is a first line break. 2795 if !leading_blanks { 2796 whitespaces = whitespaces[:0] 2797 leading_break = read_line(parser, leading_break) 2798 leading_blanks = true 2799 } else { 2800 trailing_breaks = read_line(parser, trailing_breaks) 2801 } 2802 } 2803 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2804 return false 2805 } 2806 } 2807 2808 // Check indentation level. 2809 if parser.flow_level == 0 && parser.mark.column < indent { 2810 break 2811 } 2812 } 2813 2814 // Create a token. 2815 *token = yaml_token_t{ 2816 typ: yaml_SCALAR_TOKEN, 2817 start_mark: start_mark, 2818 end_mark: end_mark, 2819 value: s, 2820 style: yaml_PLAIN_SCALAR_STYLE, 2821 } 2822 2823 // Note that we change the 'simple_key_allowed' flag. 2824 if leading_blanks { 2825 parser.simple_key_allowed = true 2826 } 2827 return true 2828} 2829 2830func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool { 2831 if parser.newlines > 0 { 2832 return true 2833 } 2834 2835 var start_mark yaml_mark_t 2836 var text []byte 2837 2838 for peek := 0; peek < 512; peek++ { 2839 if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { 2840 break 2841 } 2842 if is_blank(parser.buffer, parser.buffer_pos+peek) { 2843 continue 2844 } 2845 if parser.buffer[parser.buffer_pos+peek] == '#' { 2846 seen := parser.mark.index+peek 2847 for { 2848 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2849 return false 2850 } 2851 if is_breakz(parser.buffer, parser.buffer_pos) { 2852 if parser.mark.index >= seen { 2853 break 2854 } 2855 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2856 return false 2857 } 2858 skip_line(parser) 2859 } else { 2860 if parser.mark.index >= seen { 2861 if len(text) == 0 { 2862 start_mark = parser.mark 2863 } 2864 text = append(text, parser.buffer[parser.buffer_pos]) 2865 } 2866 skip(parser) 2867 } 2868 } 2869 } 2870 break 2871 } 2872 if len(text) > 0 { 2873 parser.comments = append(parser.comments, yaml_comment_t{ 2874 token_mark: token_mark, 2875 start_mark: start_mark, 2876 line: text, 2877 }) 2878 } 2879 return true 2880} 2881 2882func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool { 2883 token := parser.tokens[len(parser.tokens)-1] 2884 2885 if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 { 2886 token = parser.tokens[len(parser.tokens)-2] 2887 } 2888 2889 var token_mark = token.start_mark 2890 var start_mark yaml_mark_t 2891 2892 var recent_empty = false 2893 var first_empty = parser.newlines <= 1 2894 2895 var line = parser.mark.line 2896 var column = parser.mark.column 2897 2898 var text []byte 2899 2900 // The foot line is the place where a comment must start to 2901 // still be considered as a foot of the prior content. 2902 // If there's some content in the currently parsed line, then 2903 // the foot is the line below it. 2904 var foot_line = -1 2905 if scan_mark.line > 0 { 2906 foot_line = parser.mark.line-parser.newlines+1 2907 if parser.newlines == 0 && parser.mark.column > 1 { 2908 foot_line++ 2909 } 2910 } 2911 2912 var peek = 0 2913 for ; peek < 512; peek++ { 2914 if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) { 2915 break 2916 } 2917 column++ 2918 if is_blank(parser.buffer, parser.buffer_pos+peek) { 2919 continue 2920 } 2921 c := parser.buffer[parser.buffer_pos+peek] 2922 if is_breakz(parser.buffer, parser.buffer_pos+peek) || parser.flow_level > 0 && (c == ']' || c == '}') { 2923 // Got line break or terminator. 2924 if !recent_empty { 2925 if first_empty && (start_mark.line == foot_line || start_mark.column-1 < parser.indent) { 2926 // This is the first empty line and there were no empty lines before, 2927 // so this initial part of the comment is a foot of the prior token 2928 // instead of being a head for the following one. Split it up. 2929 if len(text) > 0 { 2930 if start_mark.column-1 < parser.indent { 2931 // If dedented it's unrelated to the prior token. 2932 token_mark = start_mark 2933 } 2934 parser.comments = append(parser.comments, yaml_comment_t{ 2935 scan_mark: scan_mark, 2936 token_mark: token_mark, 2937 start_mark: start_mark, 2938 end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, 2939 foot: text, 2940 }) 2941 scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2942 token_mark = scan_mark 2943 text = nil 2944 } 2945 } else { 2946 if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 { 2947 text = append(text, '\n') 2948 } 2949 } 2950 } 2951 if !is_break(parser.buffer, parser.buffer_pos+peek) { 2952 break 2953 } 2954 first_empty = false 2955 recent_empty = true 2956 column = 0 2957 line++ 2958 continue 2959 } 2960 2961 if len(text) > 0 && column < parser.indent+1 && column != start_mark.column { 2962 // The comment at the different indentation is a foot of the 2963 // preceding data rather than a head of the upcoming one. 2964 parser.comments = append(parser.comments, yaml_comment_t{ 2965 scan_mark: scan_mark, 2966 token_mark: token_mark, 2967 start_mark: start_mark, 2968 end_mark: yaml_mark_t{parser.mark.index + peek, line, column}, 2969 foot: text, 2970 }) 2971 scan_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2972 token_mark = scan_mark 2973 text = nil 2974 } 2975 2976 if parser.buffer[parser.buffer_pos+peek] != '#' { 2977 break 2978 } 2979 2980 if len(text) == 0 { 2981 start_mark = yaml_mark_t{parser.mark.index + peek, line, column} 2982 } else { 2983 text = append(text, '\n') 2984 } 2985 2986 recent_empty = false 2987 2988 // Consume until after the consumed comment line. 2989 seen := parser.mark.index+peek 2990 for { 2991 if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { 2992 return false 2993 } 2994 if is_breakz(parser.buffer, parser.buffer_pos) { 2995 if parser.mark.index >= seen { 2996 break 2997 } 2998 if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { 2999 return false 3000 } 3001 skip_line(parser) 3002 } else { 3003 if parser.mark.index >= seen { 3004 text = append(text, parser.buffer[parser.buffer_pos]) 3005 } 3006 skip(parser) 3007 } 3008 } 3009 3010 peek = 0 3011 column = 0 3012 line = parser.mark.line 3013 } 3014 3015 if len(text) > 0 { 3016 parser.comments = append(parser.comments, yaml_comment_t{ 3017 scan_mark: scan_mark, 3018 token_mark: start_mark, 3019 start_mark: start_mark, 3020 end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column}, 3021 head: text, 3022 }) 3023 } 3024 return true 3025} 3026