1%- 2% Copyright (c) 2012-2014 Yakaz 3% Copyright (c) 2016-2018 Jean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr> 4% All rights reserved. 5% 6% Redistribution and use in source and binary forms, with or without 7% modification, are permitted provided that the following conditions 8% are met: 9% 1. Redistributions of source code must retain the above copyright 10% notice, this list of conditions and the following disclaimer. 11% 2. Redistributions in binary form must reproduce the above copyright 12% notice, this list of conditions and the following disclaimer in the 13% documentation and/or other materials provided with the distribution. 14% 15% THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16% ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18% ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19% FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21% OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22% HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23% LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24% OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25% SUCH DAMAGE. 26 27%% @author Jean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr> 28%% @copyright 29%% 2012-2014 Yakaz, 30%% 2016-2018 Jean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr> 31%% 32%% @doc {@module} implements a YAML parser. It is not meant to be used 33%% directly. Instead, you should use {@link yamerl_constr}. 34%% 35%% The `yamerl' application must be started to use the parser. 36 37-module(yamerl_parser). 38 39-include("yamerl_errors.hrl"). 40-include("yamerl_tokens.hrl"). 41-include("internal/yamerl_parser.hrl"). 42 43%% Public API. 44-export([ 45 new/1, 46 new/2, 47 string/1, 48 string/2, 49 file/1, 50 file/2, 51 next_chunk/2, 52 next_chunk/3, 53 last_chunk/2, 54 get_token_fun/1, 55 set_token_fun/2, 56 option_names/0 57 ]). 58 59%% ------------------------------------------------------------------- 60%% Exported types. 61%% ------------------------------------------------------------------- 62 63%% FIXME: 64%% This type should be "-opaque". However, up-to Erlang R15B03, an issue 65%% with either this code or Dialyzer prevents us from declaring it 66%% properly: Dialyzer reports warning regarding the stream_state_fun() 67%% type and several guard expression which will never match. 68-type yamerl_parser() :: #yamerl_parser{}. 69 70-export_type([ 71 yamerl_parser/0, 72 yamerl_parser_option/0, 73 yamerl_parser_token_fun/0 74 ]). 75 76%% ------------------------------------------------------------------- 77%% Secondary records to store the scanner state. 78%% ------------------------------------------------------------------- 79 80-record(directive_ctx, { 81 line = 1 :: position(), 82 col = 1 :: position(), 83 name = "" :: string() 84 }). 85 86-record(yaml_directive_ctx, { 87 line = 1 :: position(), 88 col = 1 :: position(), 89 major :: non_neg_integer() | undefined, 90 minor :: non_neg_integer() | undefined 91 }). 92 93-record(tag_directive_ctx, { 94 line = 1 :: position(), 95 col = 1 :: position(), 96 handle :: tag_handle() | [] | undefined, 97 prefix :: tag_prefix() | [] | undefined 98 }). 99 100-record(reserved_directive_ctx, { 101 line = 1 :: position(), 102 col = 1 :: position(), 103 name = "" :: string(), 104 current :: string() | undefined, 105 args = [] :: [string()], 106 args_count = 0 :: non_neg_integer() 107 }). 108 109-type whitespace() :: [9 | 10 | 32]. 110 111-record(block_scalar_hd_ctx, { 112 style = literal :: literal | folded, 113 line = 1 :: position(), %% Line where the token starts. 114 col = 1 :: position(), %% Column where the token starts. 115 chomp :: strip | keep | undefined, %% Chomping indicator. 116 indent :: pos_integer() %% Indentation indicator. 117 | {tmp, pos_integer()} | undefined, 118 in_comment = false :: boolean() %% Trailing comment. 119 }). 120 121-record(block_scalar_ctx, { 122 style = literal :: literal | folded, 123 line = 1 :: position(), %% Line where the token starts. 124 col = 1 :: position(), %% Column where the token starts. 125 endline = 1 :: position(), %% Line where the token ends. 126 endcol = 1 :: position(), %% Column where the token ends. 127 chomp = clip :: strip | keep | clip, %% Chomping method. 128 indent :: pos_integer() | undefined, %% Block indent. 129 longest_empty = 0 :: non_neg_integer(), %% Longest leading empty line. 130 newline = false :: boolean(), %% Met a newline character. 131 spaces = "" :: whitespace(), %% Last white spaces seen. 132 more_indent = false :: boolean(), %% Last line is more indented. 133 output = "" :: string() %% Already parsed characters. 134 }). 135 136-record(flow_scalar_ctx, { 137 style = plain :: double_quoted | single_quoted | plain, 138 line = 1 :: position(), %% Line where the token starts. 139 col = 1 :: position(), %% Column where the token starts. 140 endline = 1 :: position(), %% Line where the token ends. 141 endcol = 1 :: position(), %% Column where the token ends. 142 surrogate :: 16#d800..16#dbff | undefined, %% High surrogate. 143 newline = false :: boolean(), %% Met a newline character. 144 spaces = "" :: whitespace(), %% Last white spaces seen. 145 output = "" :: string() %% Already parsed characters. 146 }). 147 148-record(anchor_ctx, { 149 type :: anchor | alias, 150 line :: position(), 151 col :: position(), 152 output = "" :: string() 153 }). 154 155-record(tag_ctx, { 156 line :: position(), 157 col :: position(), 158 prefix :: string() | undefined, 159 suffix :: string() | tag_uri() 160 }). 161 162-define(IO_BLOCKSIZE, 4096). %% Common filesystem blocksize. 163 164-define(FAKE_IMPL_KEY, #impl_key{}). 165 166-define(IN_BLOCK_CTX(P), (is_record(P#yamerl_parser.cur_coll, bcoll))). 167-define(IN_FLOW_CTX(P), (is_record(P#yamerl_parser.cur_coll, fcoll))). 168 169-define(IS_SPACE(C), (C == $\s orelse C == $\t)). 170-define(IS_NEWLINE(C), (C == $\n orelse C == $\r)). 171-define(IS_NEWLINE_11(C), 172 (C == 16#85 orelse C == 16#2028 orelse C == 16#2029)). 173 174-define(IS_FLOW_INDICATOR(C), ( 175 C == $[ orelse C == $] orelse 176 C == ${ orelse C == $} orelse 177 C == $,)). 178 179-define(IS_HEXADECIMAL(C), ( 180 (O1 >= $0 andalso O1 =< $9) orelse 181 (O1 >= $a andalso O1 =< $f) orelse 182 (O1 >= $A andalso O1 =< $F) 183 )). 184 185-define(IS_URI_CHAR(C), 186 ( 187 (C >= $a andalso C =< $z) orelse 188 (C >= $A andalso C =< $Z) orelse 189 (C >= $0 andalso C =< $9) orelse 190 C == $- orelse 191 C == $% orelse C == $# orelse C == $; orelse C == $/ orelse C == $? orelse 192 C == $: orelse C == $@ orelse C == $& orelse C == $= orelse C == $+ orelse 193 C == $$ orelse C == $, orelse C == $_ orelse C == $. orelse C == $! orelse 194 C == $~ orelse C == $* orelse C == $' orelse C == $( orelse C == $) orelse 195 C == $[ orelse C == $] 196 )). 197 198-define(IS_BOM(C), (C == 16#feff)). 199-define(IS_HIGH_SURROGATE(C), (C >= 16#d800 andalso C =< 16#dbff)). 200-define(IS_LOW_SURROGATE(C), (C >= 16#dc00 andalso C =< 16#dfff)). 201 202-define(MISSING_ENTRY(S), ( 203 S#yamerl_parser.pending_entry andalso 204 S#yamerl_parser.last_tag == undefined 205 )). 206-define(MISSING_KVPAIR(S), ( 207 S#yamerl_parser.waiting_for_kvpair andalso 208 not element(#impl_key.possible, hd(S#yamerl_parser.ik_stack)) 209 )). 210 211-define(IS_JSON_LIKE(T), ( 212 (is_record(T, yamerl_scalar) andalso 213 (T#yamerl_scalar.substyle == single_quoted orelse 214 T#yamerl_scalar.substyle == double_quoted)) orelse 215 (is_record(T, yamerl_collection_end) andalso 216 T#yamerl_collection_end.style == flow) 217 )). 218 219-define(DEFAULT_TAG(U, L, C), 220 #yamerl_tag{ 221 uri = U, 222 line = L, 223 column = C 224 }). 225 226-define(NEXT_COL(Co, De, Count), 227 {Co + Count, De + Count}). 228 229-define(NEXT_LINE(Ch, Li, De, P), 230 case Ch of 231 [$\r, $\n | R] -> {R, Li + 1, 1, De + 2}; 232 [_ | R] -> {R, Li + 1, 1, De + 1} 233 end). 234 235%% 236%% We use macros instead of functions for a few #yamerl_parser updates 237%% to take advantage of the optimization described in §3.5 in the 238%% Efficiency Guide. 239%% 240 241-define(PUSH_FAKE_IMPL_KEY(P), 242 P#yamerl_parser{ik_stack = [?FAKE_IMPL_KEY | P#yamerl_parser.ik_stack]}). 243 244-define(POP_IMPL_KEY(P), 245 P#yamerl_parser{ik_stack = tl(P#yamerl_parser.ik_stack)}). 246 247-define(ALLOW_IMPL_KEY(P, F), 248 P#yamerl_parser{ik_allowed = F}). 249 250-define(FLUSH_TO_PARSER(Ch, Li, Co, De, P), 251 P#yamerl_parser{ 252 chars = Ch, 253 chars_len = P#yamerl_parser.chars_len - De, 254 chars_idx = P#yamerl_parser.chars_idx + De, 255 line = Li, 256 col = Co 257 }). 258 259-define(WARN_IF_NON_ASCII_LINE_BREAK(Ch, Li, Co, P), 260 case Ch of 261 [NL | _] when ?IS_NEWLINE_11(NL) -> 262 %% Non-ASCII line break in a YAML 1.2 document. 263 Err = #yamerl_parsing_error{ 264 type = warning, 265 name = non_ascii_line_break, 266 line = Li, 267 column = Co 268 }, 269 add_error(P, Err, 270 "Use of non-ASCII line break is not supported anymore starting " 271 "with YAML 1.2; treated as non-break character", []); 272 _ -> 273 P 274 end). 275 276-define(BLOCK_SCALAR_DEFAULT_TAG(L, C), 277 ?DEFAULT_TAG({non_specific, "!"}, L, C)). 278-define(PLAIN_SCALAR_DEFAULT_TAG(L, C), 279 ?DEFAULT_TAG({non_specific, "?"}, L, C)). 280-define(FLOW_SCALAR_DEFAULT_TAG(L, C), 281 ?DEFAULT_TAG({non_specific, "!"}, L, C)). 282-define(COLL_SCALAR_DEFAULT_TAG(L, C), 283 ?DEFAULT_TAG({non_specific, "?"}, L, C)). 284 285%% ------------------------------------------------------------------- 286%% Public API: chunked stream scanning. 287%% ------------------------------------------------------------------- 288 289%% @equiv new(Source, []) 290 291-spec new(Source) -> 292 Parser | no_return() when 293 Source :: term(), 294 Parser :: yamerl_parser(). 295 296new(Source) -> 297 new(Source, []). 298 299%% @doc Creates and returns a new YAML parser state. 300 301-spec new(Source, Options) -> 302 Parser | no_return() when 303 Source :: term(), 304 Options :: [yamerl_parser_option()], 305 Parser :: yamerl_parser(). 306 307new(Source, Options) -> 308 Options0 = proplists:unfold(Options), 309 check_options(Options0), 310 #yamerl_parser{ 311 source = Source, 312 options = Options0, 313 stream_state = fun start_stream/5, 314 token_fun = proplists:get_value(token_fun, Options0, acc) 315 }. 316 317%% @equiv next_chunk(Parser, Chunk, false) 318 319-spec next_chunk(Parser, Chunk) -> 320 Ret | no_return() when 321 Parser :: yamerl_parser(), 322 Chunk :: unicode_binary(), 323 Ret :: {continue, New_Parser}, 324 New_Parser :: yamerl_parser(). 325 326next_chunk(Parser, Chunk) -> 327 next_chunk(Parser, Chunk, false). 328 329%% @doc Feeds the parser with the next chunk from the YAML stream. 330 331-spec next_chunk(Parser, Chunk, Last_Chunk) -> 332 Ret | no_return() when 333 Parser :: yamerl_parser(), 334 Chunk :: unicode_binary(), 335 Last_Chunk :: boolean(), 336 Ret :: {continue, New_Parser} | New_Parser, 337 New_Parser :: yamerl_parser(). 338 339next_chunk(Parser, <<>>, false) -> 340 %% No need to proceed further without any data. 341 do_return(Parser); 342next_chunk(#yamerl_parser{raw_data = Data} = Parser, Chunk, EOS) -> 343 %% Append new data to the remaining data. Those data must then be 344 %% decoded to Unicode characters. 345 New_Data = list_to_binary([Data, Chunk]), 346 Parser1 = Parser#yamerl_parser{ 347 raw_data = New_Data, 348 raw_eos = EOS 349 }, 350 decode_unicode(Parser1). 351 352%% @equiv next_chunk(Parser, Chunk, true) 353 354-spec last_chunk(Parser, Chunk) -> 355 Ret | no_return() when 356 Parser :: yamerl_parser(), 357 Chunk :: unicode_binary(), 358 Ret :: {continue, New_Parser} | New_Parser, 359 New_Parser :: yamerl_parser(). 360 361last_chunk(Parser, Chunk) -> 362 next_chunk(Parser, Chunk, true). 363 364%% ------------------------------------------------------------------- 365%% Public API: common stream sources. 366%% ------------------------------------------------------------------- 367 368%% @equiv string(String, []) 369 370-spec string(String) -> 371 Parser | no_return() when 372 String :: unicode_data(), 373 Parser :: yamerl_parser(). 374 375string(String) -> 376 string(String, []). 377 378%% @doc Parses a YAML document from an in-memory YAML string. 379 380-spec string(String, Options) -> 381 Parser | no_return() when 382 String :: unicode_data(), 383 Options :: [yamerl_parser_option()], 384 Parser :: yamerl_parser(). 385 386string(String, Options) when is_binary(String) -> 387 Parser = new(string, Options), 388 next_chunk(Parser, String, true); 389string(String, Options) when is_list(String) -> 390 string(unicode:characters_to_binary(String), Options). 391 392%% @equiv file(Filename, []) 393 394-spec file(Filename) -> 395 Parser | no_return() when 396 Filename :: string(), 397 Parser :: yamerl_parser(). 398 399file(Filename) -> 400 file(Filename, []). 401 402%% @doc Parses a YAML document from a regular file. 403 404-spec file(Filename, Options) -> 405 Parser | no_return() when 406 Filename :: string(), 407 Options :: [yamerl_parser_option()], 408 Parser :: yamerl_parser(). 409 410file(Filename, Options) -> 411 Parser = new({file, Filename}, Options), 412 Blocksize = proplists:get_value(io_blocksize, Options, ?IO_BLOCKSIZE), 413 case file:open(Filename, [read, binary]) of 414 {ok, FD} -> 415 %% The file is read in binary mode. The scanner is 416 %% responsible for determining the encoding and converting 417 %% the stream accordingly. 418 file2(Parser, FD, Blocksize); 419 {error, Reason} -> 420 Error2 = #yamerl_parsing_error{ 421 name = file_open_failure, 422 extra = [{error, Reason}] 423 }, 424 Parser2 = add_error(Parser, Error2, 425 "Failed to open file \"~s\": ~s", 426 [Filename, file:format_error(Reason)]), 427 do_return(Parser2) 428 end. 429 430file2(#yamerl_parser{source = {file, Filename}} = Parser, FD, Blocksize) -> 431 case file:read(FD, Blocksize) of 432 {ok, Data} -> 433 %% If the chunk is smaller than the requested size, we 434 %% reached EOS. 435 EOS = byte_size(Data) < Blocksize, 436 if 437 EOS -> file:close(FD); 438 true -> ok 439 end, 440 try 441 case next_chunk(Parser, Data, EOS) of 442 {continue, Parser1} -> 443 file2(Parser1, FD, Blocksize); 444 Parser1 -> 445 Parser1 446 end 447 catch 448 throw:{yamerl_parser, _} = Exception -> 449 %% Close the file and throw the exception again. 450 file:close(FD), 451 throw(Exception) 452 end; 453 eof -> 454 file:close(FD), 455 next_chunk(Parser, <<>>, true); 456 {error, Reason} -> 457 Error = #yamerl_parsing_error{ 458 name = file_read_failure, 459 extra = [{error, Reason}] 460 }, 461 Parser1 = add_error(Parser, Error, 462 "Failed to read file \"~s\": ~s", 463 [Filename, file:format_error(Reason)]), 464 do_return(Parser1) 465 end. 466 467%% ------------------------------------------------------------------- 468%% Public API: get/set the token function. 469%% ------------------------------------------------------------------- 470 471%% @doc Returns the constructor callback function 472 473get_token_fun(#yamerl_parser{token_fun = Fun}) -> 474 Fun. 475 476%% @doc Sets the constructor callback function 477 478set_token_fun(Parser, Fun) when is_function(Fun, 1) -> 479 Parser#yamerl_parser{token_fun = Fun}. 480 481%% ------------------------------------------------------------------- 482%% Determine encoding and decode Unicode. 483%% ------------------------------------------------------------------- 484 485decode_unicode(#yamerl_parser{stream_state = State, 486 encoding = Encoding, raw_data = Data, raw_idx = Raw_Index, 487 chars = Chars, chars_len = Chars_Count} = Parser) 488 when Encoding /= undefined -> 489 %% We have previously determined the encoding of the stream. We can 490 %% decode the Unicode characters from the raw data. 491 Ret = unicode:characters_to_list(Data, Encoding), 492 {Parser2, Chars2} = case Ret of 493 {Reason, New_Chars, Remaining_Data} -> 494 %% Ok, we have more characters to scan! 495 Raw_Index1 = Raw_Index + 496 (byte_size(Data) - byte_size(Remaining_Data)), 497 Parser1 = Parser#yamerl_parser{ 498 raw_data = Remaining_Data, 499 raw_idx = Raw_Index1, 500 chars_len = Chars_Count + length(New_Chars) 501 }, 502 Chars1 = Chars ++ New_Chars, 503 case Reason of 504 incomplete -> 505 {Parser1, Chars1}; 506 error -> 507 Error = #yamerl_parsing_error{ 508 name = invalid_unicode, 509 extra = [{byte, Raw_Index1 + 1}] 510 }, 511 { 512 add_error(Parser1, Error, 513 "Invalid Unicode character at byte #~b", 514 [Raw_Index1 + 1]), 515 Chars1 516 } 517 end; 518 New_Chars -> 519 %% Ok, we have more characters to scan! 520 Raw_Index1 = Raw_Index + byte_size(Data), 521 Parser1 = Parser#yamerl_parser{ 522 raw_data = <<>>, 523 raw_idx = Raw_Index1, 524 chars_len = Chars_Count + length(New_Chars) 525 }, 526 Chars1 = Chars ++ New_Chars, 527 {Parser1, Chars1} 528 end, 529 State(Chars2, Parser2#yamerl_parser.line, Parser2#yamerl_parser.col, 0, 530 Parser2); 531decode_unicode(#yamerl_parser{raw_data = Data, raw_eos = EOS} = Parser) 532 when ((EOS == false andalso byte_size(Data) >= 4) orelse EOS == true) -> 533 %% We have enough (maybe even all) data to determine the encoding. 534 %% Let's check if the stream starts with a BOM. 535 {Encoding, Length} = get_encoding(Data), 536 %% The stream may start with a BOM: remove it. 537 <<_:Length/binary, New_Data/binary>> = Data, 538 Parser1 = Parser#yamerl_parser{ 539 encoding = Encoding, 540 raw_data = New_Data, 541 raw_idx = Length, 542 chars_idx = 1 543 }, 544 decode_unicode(Parser1); 545decode_unicode(Parser) -> 546 %% We don't have enough data to determine the encoding. We ask for 547 %% more data. 548 do_return(Parser). 549 550get_encoding(<<16#00, 16#00, 16#fe, 16#ff, _/binary>>) -> {{utf32, big}, 4}; 551get_encoding(<<16#00, 16#00, 16#00, _, _/binary>>) -> {{utf32, big}, 0}; 552get_encoding(<<16#ff, 16#fe, 16#00, 16#00, _/binary>>) -> {{utf32, little}, 4}; 553get_encoding(<<_, 16#00, 16#00, 16#00, _/binary>>) -> {{utf32, little}, 0}; 554get_encoding(<<16#fe, 16#ff, _, _, _/binary>>) -> {{utf16, big}, 2}; 555get_encoding(<<16#00, _, _, _, _/binary>>) -> {{utf16, big}, 0}; 556get_encoding(<<16#ff, 16#fe, _, _, _/binary>>) -> {{utf16, little}, 2}; 557get_encoding(<<_, 16#00, _, _, _/binary>>) -> {{utf16, little}, 0}; 558get_encoding(<<16#ef, 16#bb, 16#bf, _, _/binary>>) -> {utf8, 3}; 559get_encoding(_) -> {utf8, 0}. 560 561%% ------------------------------------------------------------------- 562%% Scan characters and emit tokens. 563%% ------------------------------------------------------------------- 564 565%% 566%% Stream start/end. 567%% 568 569start_stream(Chars, Line, Col, Delta, 570 #yamerl_parser{encoding = Encoding} = Parser) -> 571 %% The very first token to emit is the stream start. The stream 572 %% encoding is provided as an attribute. The encoding may appear at 573 %% the start of each document but can't be changed: all documents 574 %% must have the same encoding! 575 Parser1 = ?PUSH_FAKE_IMPL_KEY(Parser), 576 Parser2 = ?ALLOW_IMPL_KEY(Parser1, true), 577 Parser3 = setup_default_tags(Parser2), 578 Token = #yamerl_stream_start{ 579 encoding = Encoding, 580 line = Line, 581 column = Col 582 }, 583 Parser4 = queue_token(Parser3, Token), 584 find_next_token(Chars, Line, Col, Delta, Parser4). 585 586end_stream(Chars, Line, Col, Delta, 587 #yamerl_parser{last_token_endline = Last_Line, 588 last_token_endcol = Last_Col} = Parser) -> 589 %% Reset cursor on column 0 to close all opened block collections. 590 Parser1 = check_for_closed_block_collections(Chars, Line, Col, Delta, 591 Parser, 0), 592 Parser2 = remove_impl_key_pos(Parser1), 593 Parser3 = ?ALLOW_IMPL_KEY(Parser2, false), 594 %% Set the line and column number to the last token endline/endcol 595 %% number. This is useful when parsing a file: the last line is 596 %% often terminated by a newline character. Thanks to this, the 597 %% stream_end token will be on the last token line. 598 Token = #yamerl_stream_end{ 599 line = Last_Line, 600 column = Last_Col 601 }, 602 Parser4 = queue_token(Parser3, Token), 603 return(Chars, Line, Col, Delta, Parser4). 604 605%% 606%% Next token. 607%% 608 609find_next_token(Chars, Line, Col, Delta, 610 #yamerl_parser{endpos_set_by_token = true} = Parser) -> 611 %% The line and column numbers where the last token ends was already 612 %% set during token parsing. 613 Parser1 = Parser#yamerl_parser{ 614 endpos_set_by_token = false 615 }, 616 do_find_next_token(Chars, Line, Col, Delta, Parser1); 617find_next_token(Chars, Line, Col, Delta, Parser) -> 618 %% Record the line and columns numbers where the last token ends. 619 %% It's used to determine if an implicit key would span several 620 %% lines and therefore would be unacceptable. 621 Parser1 = Parser#yamerl_parser{ 622 endpos_set_by_token = false, 623 last_token_endline = Line, 624 last_token_endcol = Col 625 }, 626 do_find_next_token(Chars, Line, Col, Delta, Parser1). 627 628%% Skip spaces. 629do_find_next_token([$\s | Rest], Line, Col, Delta, Parser) -> 630 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 631 do_find_next_token(Rest, Line, Col1, Delta1, Parser); 632 633%% Skip tabs only when they're separation spaces, not indentation. 634do_find_next_token([$\t | Rest], Line, Col, Delta, 635 #yamerl_parser{ik_allowed = IK_Allowed} = Parser) 636 when ?IN_FLOW_CTX(Parser) orelse not IK_Allowed -> 637 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 638 do_find_next_token(Rest, Line, Col1, Delta1, Parser); 639 640%% Skip comments. 641do_find_next_token([$# | _] = Chars, Line, Col, Delta, Parser) -> 642 parse_comment(Chars, Line, Col, Delta, Parser); 643 644%% Continue with next line. 645do_find_next_token(Chars, Line, Col, Delta, 646 #yamerl_parser{missed_nl = true} = Parser) -> 647 Parser1 = Parser#yamerl_parser{ 648 missed_nl = false 649 }, 650 Parser2 = if 651 ?IN_BLOCK_CTX(Parser1) -> ?ALLOW_IMPL_KEY(Parser1, true); 652 true -> Parser1 653 end, 654 do_find_next_token(Chars, Line, Col, Delta, Parser2); 655 656do_find_next_token([$\r] = Chars, Line, Col, Delta, 657 #yamerl_parser{raw_eos = false} = Parser) -> 658 %% Can't be sure it's a newline. It may be followed by a LF. 659 suspend_parsing(Chars, Line, Col, Delta, Parser, fun do_find_next_token/5); 660do_find_next_token([C | _] = Chars, Line, _, Delta, 661 #yamerl_parser{doc_version = Version} = Parser) 662 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 663 {Chars1, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 664 Parser1 = if 665 ?IN_BLOCK_CTX(Parser) -> ?ALLOW_IMPL_KEY(Parser, true); 666 true -> Parser 667 end, 668 do_find_next_token(Chars1, Line1, Col1, Delta1, Parser1); 669 670%% End-of-stream reached. 671do_find_next_token([] = Chars, Line, Col, Delta, 672 #yamerl_parser{raw_eos = true} = Parser) -> 673 end_stream(Chars, Line, Col, Delta, Parser); 674 675%% Wait for more data. 676do_find_next_token([] = Chars, Line, Col, Delta, Parser) -> 677 suspend_parsing(Chars, Line, Col, Delta, Parser, fun do_find_next_token/5); 678 679%% Next token found! 680do_find_next_token(Chars, Line, Col, Delta, Parser) -> 681 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 682 Parser2 = check_for_closed_block_collections(Chars, Line, Col, Delta, 683 Parser1, Col), 684 determine_token_type(Chars, Line, Col, Delta, Parser2). 685 686%% 687%% Token type. 688%% 689 690%% Not enough data to determine the token type. 691determine_token_type(Chars, Line, Col, Delta, 692 #yamerl_parser{chars_len = Len, raw_eos = false} = Parser) 693 when (Len - Delta) < 4 -> 694 suspend_parsing(Chars, Line, Col, Delta, Parser, 695 fun determine_token_type/5); 696 697%% BOM, before a document only! 698determine_token_type([C | Rest], Line, Col, Delta, 699 #yamerl_parser{doc_started = false} = Parser) 700 when ?IS_BOM(C) -> 701 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 702 find_next_token(Rest, Line, Col1, Delta1, Parser); 703determine_token_type([C | Rest], Line, Col, Delta, 704 #yamerl_parser{doc_started = true} = Parser) 705 when ?IS_BOM(C) -> 706 %% A BOM is forbidden after the document start. Because it's not 707 %% fatal during parsing, we only add a warning. Note that the YAML 708 %% specification considers this to be an error. 709 Error = #yamerl_parsing_error{ 710 type = warning, 711 name = bom_after_doc_start, 712 line = Line, 713 column = Col 714 }, 715 Parser1 = add_error(Parser, Error, 716 "A BOM must not appear inside a document", []), 717 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 718 find_next_token(Rest, Line, Col1, Delta1, Parser1); 719 720%% Directives end indicator. 721determine_token_type([$-, $-, $-, C | _] = Chars, Line, 1 = Col, Delta, 722 #yamerl_parser{doc_version = Version} = Parser) 723 when ?IS_NEWLINE(C) orelse ?IS_SPACE(C) orelse 724 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 725 parse_document_sep(Chars, Line, Col, Delta, Parser, directives_end); 726determine_token_type([$-, $-, $-] = Chars, Line, 1 = Col, Delta, 727 #yamerl_parser{raw_eos = true} = Parser) -> 728 parse_document_sep(Chars, Line, Col, Delta, Parser, directives_end); 729 730%% Document end indicator. 731determine_token_type([$., $., $., C | _] = Chars, Line, 1 = Col, Delta, 732 #yamerl_parser{doc_version = Version} = Parser) 733 when ?IS_NEWLINE(C) orelse ?IS_SPACE(C) orelse 734 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 735 parse_document_sep(Chars, Line, Col, Delta, Parser, document_end); 736determine_token_type([$., $., $.] = Chars, Line, 1 = Col, Delta, 737 #yamerl_parser{raw_eos = true} = Parser) -> 738 parse_document_sep(Chars, Line, Col, Delta, Parser, document_end); 739 740%% Directive indicator. 741determine_token_type([$% | _] = Chars, Line, 1 = Col, Delta, 742 #yamerl_parser{doc_started = false} = Parser) -> 743 parse_directive(Chars, Line, Col, Delta, Parser); 744 745%% Flow sequence indicators. 746determine_token_type([$[ | _] = Chars, Line, Col, Delta, Parser) -> 747 parse_flow_collection_start(Chars, Line, Col, Delta, Parser, sequence); 748determine_token_type([$] | _] = Chars, Line, Col, Delta, Parser) -> 749 parse_flow_collection_end(Chars, Line, Col, Delta, Parser, sequence); 750 751%% Flow mapping indicators. 752determine_token_type([${ | _] = Chars, Line, Col, Delta, Parser) -> 753 parse_flow_collection_start(Chars, Line, Col, Delta, Parser, mapping); 754determine_token_type([$} | _] = Chars, Line, Col, Delta, Parser) -> 755 parse_flow_collection_end(Chars, Line, Col, Delta, Parser, mapping); 756 757%% Flow collection entry indicator. 758determine_token_type([$, | _] = Chars, Line, Col, Delta, Parser) -> 759 parse_flow_entry(Chars, Line, Col, Delta, Parser); 760 761%% Block collection entry indicator. 762determine_token_type([$-, C | _] = Chars, Line, Col, Delta, 763 #yamerl_parser{doc_version = Version} = Parser) 764 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 765 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 766 parse_block_entry(Chars, Line, Col, Delta, Parser); 767 768%% Mapping key indicator. 769determine_token_type([$?, C | _] = Chars, Line, Col, Delta, 770 #yamerl_parser{doc_version = Version} = Parser) 771 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 772 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 773 parse_mapping_key(Chars, Line, Col, Delta, Parser); 774 775%% Mapping value indicator. 776determine_token_type([$:, C | _] = Chars, Line, Col, Delta, 777 #yamerl_parser{doc_version = Version} = Parser) 778 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse ?IS_FLOW_INDICATOR(C) orelse 779 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 780 parse_mapping_value(Chars, Line, Col, Delta, Parser); 781determine_token_type([$: | _] = Chars, Line, Col, Delta, 782 #yamerl_parser{last_is_json_like = true} = Parser) 783 when ?IN_FLOW_CTX(Parser) -> 784 %% This is a key: value pair indicator only when the last token is 785 %% JSON-like and we're in flow context. 786 parse_mapping_value(Chars, Line, Col, Delta, Parser); 787determine_token_type([$:] = Chars, Line, Col, Delta, 788 #yamerl_parser{raw_eos = true} = Parser) 789 when ?IN_BLOCK_CTX(Parser) -> 790 parse_mapping_value(Chars, Line, Col, Delta, Parser); 791 792%% Anchor and alias indicator. 793determine_token_type([$& | _] = Chars, Line, Col, Delta, Parser) -> 794 parse_anchor_or_alias(Chars, Line, Col, Delta, Parser, anchor); 795determine_token_type([$* | _] = Chars, Line, Col, Delta, Parser) -> 796 parse_anchor_or_alias(Chars, Line, Col, Delta, Parser, alias); 797 798%% Tag indicator. 799determine_token_type([$! | _] = Chars, Line, Col, Delta, Parser) -> 800 parse_tag(Chars, Line, Col, Delta, Parser); 801 802%% Block scalar. 803determine_token_type([$| | _] = Chars, Line, Col, Delta, Parser) -> 804 parse_block_scalar(Chars, Line, Col, Delta, Parser, literal); 805determine_token_type([$> | _] = Chars, Line, Col, Delta, Parser) -> 806 parse_block_scalar(Chars, Line, Col, Delta, Parser, folded); 807 808%% Single-quoted flow scalar. 809determine_token_type([$' | _] = Chars, Line, Col, Delta, Parser) -> 810 parse_flow_scalar(Chars, Line, Col, Delta, Parser, single_quoted); 811 812%% Double-quoted flow scalar. 813determine_token_type([$" | _] = Chars, Line, Col, Delta, Parser) -> 814 parse_flow_scalar(Chars, Line, Col, Delta, Parser, double_quoted); 815 816%% Reserved indicators. 817%% We add a warning and parse it as a plain scalar. 818determine_token_type([C | _] = Chars, Line, Col, Delta, Parser) 819 when C == $@ orelse C == $` -> 820 Error = #yamerl_parsing_error{ 821 name = reserved_indicator, 822 type = warning, 823 line = Line, 824 column = Col 825 }, 826 Parser1 = add_error(Parser, Error, 827 "The reserved indicator \"~c\" is not allowed at the " 828 "beginning of a plain scalar", [C]), 829 parse_flow_scalar(Chars, Line, Col, Delta, Parser1, plain); 830 831%% Plain flow scalar. 832determine_token_type(Chars, Line, Col, Delta, Parser) -> 833 parse_flow_scalar(Chars, Line, Col, Delta, Parser, plain). 834 835%% ------------------------------------------------------------------- 836%% Directives and document ends. 837%% ------------------------------------------------------------------- 838 839parse_document_sep([_, _, _ | Rest] = Chars, Line, Col, Delta, Parser, Type) -> 840 %% Reset cursor on column 0 to close all opened block collections. 841 Parser1 = check_for_closed_block_collections(Chars, Line, Col, Delta, 842 Parser, 0), 843 Parser2 = remove_impl_key_pos(Parser1), 844 Parser3 = ?ALLOW_IMPL_KEY(Parser2, false), 845 Parser4 = case Type of 846 directives_end -> start_doc(Parser3, Line, Col, tail); 847 document_end -> end_doc(Parser3, Line, Col, tail) 848 end, 849 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 3), 850 find_next_token(Rest, Line, Col1, Delta1, Parser4). 851 852start_doc(#yamerl_parser{doc_started = true} = Parser, 853 Line, Col, Insert_At) -> 854 %% A document is already opened: we must close it before starting a 855 %% new one. 856 Parser1 = end_doc(Parser, Line, Col, Insert_At), 857 start_doc(Parser1, Line, Col, next_insert_at(Insert_At, 1)); 858start_doc( 859 #yamerl_parser{options = Options, doc_version = Version, 860 tags = Tags} = Parser, 861 Line, Col, Insert_At) -> 862 %% When a document starts, we set the version to 863 %% ?IMPLICIT_DOC_VERSION if no YAML directive were specified. 864 Forced = proplists:get_value(doc_version, Options), 865 Version1 = case Version of 866 _ when Forced /= undefined -> Forced; 867 undefined -> ?IMPLICIT_YAML_VERSION; 868 _ -> Version 869 end, 870 Token = #yamerl_doc_start{ 871 version = Version1, 872 tags = Tags, 873 line = Line, 874 column = Col 875 }, 876 Parser1 = case Version1 of 877 {Major, Minor} when Major < ?MIN_YAML_MAJOR_VERSION_SUPPORTED orelse 878 (Major == ?MIN_YAML_MAJOR_VERSION_SUPPORTED andalso 879 Minor < ?MIN_YAML_MINOR_VERSION_SUPPORTED) -> 880 %% The document's version is not supported at all (below 881 %% minimum supported version). 882 Error = #yamerl_parsing_error{ 883 name = version_not_supported, 884 token = Token, 885 line = Line, 886 column = Col 887 }, 888 Parser0 = add_error(Parser, Error, 889 "Version ~b.~b not supported (minimum version ~b.~b)", 890 [ 891 Major, Minor, 892 ?MIN_YAML_MAJOR_VERSION_SUPPORTED, 893 ?MIN_YAML_MINOR_VERSION_SUPPORTED 894 ]), 895 %% Caution: Chars/Line/Col/Delta aren't flushed to Parser. 896 do_return(Parser0); 897 {Major, Minor} when 898 Major < ?MAX_YAML_MAJOR_VERSION_SUPPORTED orelse 899 (Major == ?MAX_YAML_MAJOR_VERSION_SUPPORTED andalso 900 Minor =< ?MAX_YAML_MINOR_VERSION_SUPPORTED) -> 901 %% Version supported. 902 Parser; 903 {Major, Minor} when Major > ?MAX_YAML_MAJOR_VERSION_SUPPORTED -> 904 %% The document's version is not supported at all (major 905 %% above maximum supported major). 906 Error = #yamerl_parsing_error{ 907 name = version_not_supported, 908 token = Token, 909 line = Line, 910 column = Col 911 }, 912 Parser0 = add_error(Parser, Error, 913 "Version ~b.~b not supported (maximum version ~b.~b)", 914 [ 915 Major, Minor, 916 ?MAX_YAML_MAJOR_VERSION_SUPPORTED, 917 ?MAX_YAML_MINOR_VERSION_SUPPORTED 918 ]), 919 %% Caution: Chars/Line/Col/Delta aren't flushed to Parser. 920 do_return(Parser0); 921 {Major, Minor} when Minor > ?MAX_YAML_MINOR_VERSION_SUPPORTED -> 922 %% The document's minor version is greater than the 923 %% supported version. Add a warning and continue anyway. 924 Error = #yamerl_parsing_error{ 925 name = version_not_supported, 926 type = warning, 927 token = Token, 928 line = Line, 929 column = Col 930 }, 931 Parser0 = add_error(Parser, Error, 932 "Version ~b.~b not supported (maximum version ~b.~b); " 933 "parsing may fail", 934 [ 935 Major, Minor, 936 ?MAX_YAML_MAJOR_VERSION_SUPPORTED, 937 ?MAX_YAML_MINOR_VERSION_SUPPORTED 938 ]), 939 Parser0 940 end, 941 Parser2 = Parser1#yamerl_parser{ 942 doc_started = true, 943 doc_version = Version1 944 }, 945 %% Emit a token with the determined version and the tags table. 946 queue_token(Parser2, Token, Insert_At). 947 948end_doc(#yamerl_parser{doc_started = false} = Parser, _, _, _) -> 949 %% No document to end. 950 Parser; 951end_doc(Parser, Line, Col, Insert_At) -> 952 %% At the end of the document, we reset the version and the tags 953 %% table. 954 Parser1 = Parser#yamerl_parser{ 955 doc_started = false, 956 doc_version = undefined 957 }, 958 Parser2 = setup_default_tags(Parser1), 959 Token = #yamerl_doc_end{ 960 line = Line, 961 column = Col 962 }, 963 queue_token(Parser2, Token, Insert_At). 964 965%% ------------------------------------------------------------------- 966%% Directives. 967%% ------------------------------------------------------------------- 968 969parse_directive([_ | Rest] = Chars, Line, Col, Delta, Parser) -> 970 Ctx = #directive_ctx{ 971 line = Line, 972 col = Col 973 }, 974 %% Reset cursor on column 0 to close all opened block collections. 975 Parser1 = check_for_closed_block_collections(Chars, Line, Col, Delta, 976 Parser, 0), 977 Parser2 = remove_impl_key_pos(Parser1), 978 Parser3 = ?ALLOW_IMPL_KEY(Parser2, false), 979 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 980 do_parse_directive(Rest, Line, Col1, Delta1, Parser3, Ctx). 981 982do_parse_directive([C | _] = Chars, Line, Col, Delta, 983 #yamerl_parser{doc_version = Version} = Parser, Ctx) 984 when ?IS_NEWLINE(C) orelse ?IS_SPACE(C) orelse 985 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 986 parse_directive2(Chars, Line, Col, Delta, Parser, Ctx); 987 988do_parse_directive([C | Rest] = Chars, Line, Col, Delta, Parser, Ctx) -> 989 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 990 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 991 Ctx1 = Ctx#directive_ctx{ 992 name = [C | Ctx#directive_ctx.name] 993 }, 994 do_parse_directive(Rest, Line, Col1, Delta1, Parser1, Ctx1); 995 996do_parse_directive([] = Chars, Line, Col, Delta, 997 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 998 parse_directive2(Chars, Line, Col, Delta, Parser, Ctx); 999do_parse_directive([] = Chars, Line, Col, Delta, 1000 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1001 suspend_parsing(Chars, Line, Col, Delta, Parser, 1002 fun do_parse_directive/6, Ctx). 1003 1004parse_directive2(Chars, Line, Col, Delta, Parser, Ctx) -> 1005 Name = lists:reverse(Ctx#directive_ctx.name), 1006 Ctx1 = Ctx#directive_ctx{ 1007 name = Name 1008 }, 1009 case Name of 1010 "YAML" -> 1011 parse_yaml_directive(Chars, Line, Col, Delta, Parser, Ctx1); 1012 "TAG" -> 1013 parse_tag_directive(Chars, Line, Col, Delta, Parser, Ctx1); 1014 _ -> 1015 parse_reserved_directive(Chars, Line, Col, Delta, Parser, Ctx1) 1016 end. 1017 1018skip_directive_trailing_ws([C | Rest], Line, Col, Delta, Parser) 1019 when ?IS_SPACE(C) -> 1020 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1021 skip_directive_trailing_ws(Rest, Line, Col1, Delta1, Parser); 1022skip_directive_trailing_ws([C | _] = Chars, Line, Col, Delta, 1023 #yamerl_parser{doc_version = Version} = Parser) 1024 when ?IS_NEWLINE(C) orelse C == $# orelse 1025 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 1026 find_next_token(Chars, Line, Col, Delta, Parser); 1027skip_directive_trailing_ws([] = Chars, Line, Col, Delta, Parser) -> 1028 suspend_parsing(Chars, Line, Col, Delta, Parser, 1029 fun skip_directive_trailing_ws/5); 1030 1031skip_directive_trailing_ws([_ | _] = Chars, Line, Col, Delta, Parser) -> 1032 Error = #yamerl_parsing_error{ 1033 name = unexpected_directive_extra_params, 1034 line = Line, 1035 column = Col 1036 }, 1037 Parser1 = add_error(Parser, Error, 1038 "Unexpected directive extra parameters", []), 1039 return(Chars, Line, Col, Delta, Parser1). 1040 1041%% 1042%% YAML directive. 1043%% 1044 1045parse_yaml_directive(Chars, Line, Col, Delta, Parser, 1046 #directive_ctx{line = Dir_Line, col = Dir_Col}) -> 1047 Ctx = #yaml_directive_ctx{ 1048 line = Dir_Line, 1049 col = Dir_Col 1050 }, 1051 parse_yaml_directive_major(Chars, Line, Col, Delta, Parser, Ctx). 1052 1053%% Major version number. 1054parse_yaml_directive_major([C | Rest], Line, Col, Delta, Parser, 1055 #yaml_directive_ctx{major = Major} = Ctx) when C >= $0 andalso C =< $9 -> 1056 Major1 = case Major of 1057 undefined -> C - $0; 1058 _ -> Major * 10 + C - $0 1059 end, 1060 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1061 Ctx1 = Ctx#yaml_directive_ctx{ 1062 major = Major1 1063 }, 1064 parse_yaml_directive_major(Rest, Line, Col1, Delta1, Parser, Ctx1); 1065parse_yaml_directive_major([$. | Rest], Line, Col, Delta, Parser, 1066 #yaml_directive_ctx{major = Major} = Ctx) when is_integer(Major) -> 1067 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1068 %% End of the major part. Continue with the minor version number. 1069 parse_yaml_directive_minor(Rest, Line, Col1, Delta1, Parser, Ctx); 1070parse_yaml_directive_major([C | Rest], Line, Col, Delta, Parser, 1071 #yaml_directive_ctx{major = undefined} = Ctx) when ?IS_SPACE(C) -> 1072 %% Skip leading white spaces. 1073 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1074 parse_yaml_directive_major(Rest, Line, Col1, Delta1, Parser, Ctx); 1075parse_yaml_directive_major([] = Chars, Line, Col, Delta, 1076 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1077 suspend_parsing(Chars, Line, Col, Delta, Parser, 1078 fun parse_yaml_directive_major/6, Ctx); 1079 1080parse_yaml_directive_major([_ | _] = Chars, Line, Col, Delta, Parser, 1081 #yaml_directive_ctx{line = Dir_Line, col = Dir_Col}) -> 1082 %% Invalid character (or end of directive) while parsing major 1083 %% version number. 1084 Token = #yamerl_yaml_directive{ 1085 line = Dir_Line, 1086 column = Dir_Col 1087 }, 1088 Error = #yamerl_parsing_error{ 1089 name = invalid_yaml_directive, 1090 token = Token, 1091 line = Line, 1092 column = Col 1093 }, 1094 Parser1 = add_error(Parser, Error, 1095 "Invalid major version number in YAML directive", []), 1096 return(Chars, Line, Col, Delta, Parser1); 1097parse_yaml_directive_major([] = Chars, Line, Col, Delta, 1098 #yamerl_parser{raw_eos = true} = Parser, 1099 #yaml_directive_ctx{line = Dir_Line, col = Dir_Col}) -> 1100 %% Invalid end-of-stream while parsing major version number. 1101 Token = #yamerl_yaml_directive{ 1102 line = Dir_Line, 1103 column = Dir_Col 1104 }, 1105 Error = #yamerl_parsing_error{ 1106 name = invalid_yaml_directive, 1107 token = Token, 1108 line = Line, 1109 column = Col 1110 }, 1111 Parser1 = add_error(Parser, Error, 1112 "Unexpected end-of-stream while parsing YAML directive", []), 1113 return(Chars, Line, Col, Delta, Parser1). 1114 1115%% Minor version number. 1116parse_yaml_directive_minor([C | Rest], Line, Col, Delta, Parser, 1117 #yaml_directive_ctx{minor = Minor} = Ctx) when C >= $0 andalso C =< $9 -> 1118 Minor1 = case Minor of 1119 undefined -> C - $0; 1120 _ -> Minor * 10 + C - $0 1121 end, 1122 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1123 Ctx1 = Ctx#yaml_directive_ctx{ 1124 minor = Minor1 1125 }, 1126 parse_yaml_directive_minor(Rest, Line, Col1, Delta1, Parser, Ctx1); 1127parse_yaml_directive_minor([C | _] = Chars, Line, Col, Delta, 1128 #yamerl_parser{doc_version = Version} = Parser, 1129 #yaml_directive_ctx{minor = Minor} = Ctx) 1130 when is_integer(Minor) andalso 1131 (?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 1132 (Version == {1,1} andalso ?IS_NEWLINE_11(C))) -> 1133 %% Directive end. 1134 queue_yaml_directive(Chars, Line, Col, Delta, Parser, Ctx); 1135parse_yaml_directive_minor([] = Chars, Line, Col, Delta, 1136 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 1137 %% Directive end. 1138 queue_yaml_directive(Chars, Line, Col, Delta, Parser, Ctx); 1139parse_yaml_directive_minor([] = Chars, Line, Col, Delta, 1140 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1141 suspend_parsing(Chars, Line, Col, Delta, Parser, 1142 fun parse_yaml_directive_minor/6, Ctx); 1143 1144parse_yaml_directive_minor([_ | _] = Chars, Line, Col, Delta, Parser, 1145 #yaml_directive_ctx{line = Dir_Line, col = Dir_Col}) -> 1146 %% Invalid character while parsing minor version number. 1147 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 1148 Token = #yamerl_yaml_directive{ 1149 line = Dir_Line, 1150 column = Dir_Col 1151 }, 1152 Error = #yamerl_parsing_error{ 1153 name = invalid_yaml_directive, 1154 token = Token, 1155 line = Line, 1156 column = Col 1157 }, 1158 Parser2 = add_error(Parser1, Error, 1159 "Invalid minor version number in YAML directive", []), 1160 return(Chars, Line, Col, Delta, Parser2). 1161 1162%% Queue token. 1163queue_yaml_directive(Chars, Line, Col, Delta, 1164 #yamerl_parser{doc_version = undefined} = Parser, 1165 #yaml_directive_ctx{major = Major, minor = Minor, 1166 line = Dir_Line, col = Dir_Col}) -> 1167 Version = {Major, Minor}, 1168 Token = #yamerl_yaml_directive{ 1169 version = Version, 1170 line = Dir_Line, 1171 column = Dir_Col 1172 }, 1173 Parser1 = queue_token(Parser, Token), 1174 Parser2 = Parser1#yamerl_parser{ 1175 doc_version = Version 1176 }, 1177 skip_directive_trailing_ws(Chars, Line, Col, Delta, Parser2); 1178queue_yaml_directive(Chars, Line, Col, Delta, Parser, 1179 #yaml_directive_ctx{major = Major, minor = Minor, 1180 line = Dir_Line, col = Dir_Col} = Ctx) -> 1181 %% Warning: repeated YAML directive. 1182 Version = {Major, Minor}, 1183 Token = #yamerl_yaml_directive{ 1184 version = Version, 1185 line = Dir_Line, 1186 column = Dir_Col 1187 }, 1188 Error = #yamerl_parsing_error{ 1189 type = warning, 1190 name = multiple_yaml_directives, 1191 token = Token, 1192 line = Dir_Line, 1193 column = Dir_Col 1194 }, 1195 Parser1 = add_error(Parser, Error, 1196 "Multiple YAML directives found: the last one will be used", []), 1197 Parser2 = Parser1#yamerl_parser{ 1198 doc_version = undefined 1199 }, 1200 queue_yaml_directive(Chars, Line, Col, Delta, Parser2, Ctx). 1201 1202%% 1203%% TAG directive. 1204%% 1205 1206parse_tag_directive(Chars, Line, Col, Delta, Parser, 1207 #directive_ctx{line = Dir_Line, col = Dir_Col}) -> 1208 Ctx = #tag_directive_ctx{ 1209 line = Dir_Line, 1210 col = Dir_Col 1211 }, 1212 parse_tag_directive_handle(Chars, Line, Col, Delta, Parser, Ctx). 1213 1214%% Tag handle. 1215parse_tag_directive_handle([$! | Rest], Line, Col, Delta, Parser, 1216 #tag_directive_ctx{handle = undefined} = Ctx) -> 1217 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1218 Ctx1 = Ctx#tag_directive_ctx{ 1219 handle = "!" 1220 }, 1221 parse_tag_directive_handle(Rest, Line, Col1, Delta1, Parser, Ctx1); 1222parse_tag_directive_handle([C | Rest], Line, Col, Delta, Parser, 1223 #tag_directive_ctx{handle = Handle} = Ctx) 1224 when is_list(Handle) andalso 1225 ((C >= $a andalso C =< $z) orelse 1226 (C >= $A andalso C =< $Z) orelse 1227 (C >= $0 andalso C =< $9) orelse 1228 (C == $-)) -> 1229 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1230 Ctx1 = Ctx#tag_directive_ctx{ 1231 handle = [C | Handle] 1232 }, 1233 parse_tag_directive_handle(Rest, Line, Col1, Delta1, Parser, Ctx1); 1234parse_tag_directive_handle([$! | Rest], Line, Col, Delta, Parser, 1235 #tag_directive_ctx{handle = Handle} = Ctx) when is_list(Handle) -> 1236 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1237 Ctx1 = Ctx#tag_directive_ctx{ 1238 handle = [$! | Handle] 1239 }, 1240 parse_tag_directive_prefix(Rest, Line, Col1, Delta1, Parser, Ctx1); 1241parse_tag_directive_handle([C | _] = Chars, Line, Col, Delta, Parser, 1242 #tag_directive_ctx{handle = "!"} = Ctx) when ?IS_SPACE(C) -> 1243 parse_tag_directive_prefix(Chars, Line, Col, Delta, Parser, Ctx); 1244parse_tag_directive_handle([C | Rest], Line, Col, Delta, Parser, 1245 #tag_directive_ctx{handle = undefined} = Ctx) when ?IS_SPACE(C) -> 1246 %% Skip leading white spaces. 1247 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1248 parse_tag_directive_handle(Rest, Line, Col1, Delta1, Parser, Ctx); 1249parse_tag_directive_handle([] = Chars, Line, Col, Delta, 1250 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1251 suspend_parsing(Chars, Line, Col, Delta, Parser, 1252 fun parse_tag_directive_handle/6, Ctx); 1253 1254parse_tag_directive_handle([_ | _] = Chars, Line, Col, Delta, Parser, 1255 #tag_directive_ctx{handle = Handle, line = Dir_Line, col = Dir_Col}) -> 1256 %% Invalid character (or end of directive) while parsing tag handle. 1257 Handle1 = case Handle of 1258 undefined -> Handle; 1259 _ -> lists:reverse(Handle) 1260 end, 1261 Token = #yamerl_tag_directive{ 1262 handle = Handle1, 1263 line = Dir_Line, 1264 column = Dir_Col 1265 }, 1266 Error = #yamerl_parsing_error{ 1267 name = invalid_tag_directive, 1268 token = Token, 1269 line = Line, 1270 column = Col 1271 }, 1272 Parser1 = add_error(Parser, Error, 1273 "Invalid tag handle in TAG directive", []), 1274 return(Chars, Line, Col, Delta, Parser1); 1275parse_tag_directive_handle([] = Chars, Line, Col, Delta, 1276 #yamerl_parser{raw_eos = true} = Parser, 1277 #tag_directive_ctx{handle = Handle, line = Dir_Line, col = Dir_Col}) -> 1278 %% Invalid end-of-stream while parsing major version number. 1279 Handle1 = case Handle of 1280 undefined -> Handle; 1281 _ -> lists:reverse(Handle) 1282 end, 1283 Token = #yamerl_tag_directive{ 1284 handle = Handle1, 1285 line = Dir_Line, 1286 column = Dir_Col 1287 }, 1288 Error = #yamerl_parsing_error{ 1289 name = invalid_tag_directive, 1290 token = Token, 1291 line = Line, 1292 column = Col 1293 }, 1294 Parser1 = add_error(Parser, Error, 1295 "Unexpected end-of-stream while parsing TAG directive", []), 1296 return(Chars, Line, Col, Delta, Parser1). 1297 1298%% Tag prefix. 1299parse_tag_directive_prefix([C | Rest], Line, Col, Delta, Parser, 1300 #tag_directive_ctx{prefix = Prefix} = Ctx) 1301 when is_list(Prefix) andalso ?IS_URI_CHAR(C) -> 1302 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1303 Ctx1 = Ctx#tag_directive_ctx{ 1304 prefix = [C | Prefix] 1305 }, 1306 parse_tag_directive_prefix(Rest, Line, Col1, Delta1, Parser, Ctx1); 1307parse_tag_directive_prefix([C | Rest], Line, Col, Delta, Parser, 1308 #tag_directive_ctx{prefix = undefined} = Ctx) when ?IS_SPACE(C) -> 1309 %% Skip leading white spaces. 1310 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1311 Ctx1 = Ctx#tag_directive_ctx{ 1312 prefix = "" 1313 }, 1314 parse_tag_directive_prefix(Rest, Line, Col1, Delta1, Parser, Ctx1); 1315parse_tag_directive_prefix([C | Rest], Line, Col, Delta, Parser, 1316 #tag_directive_ctx{prefix = ""} = Ctx) when ?IS_SPACE(C) -> 1317 %% Skip leading white spaces. 1318 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1319 parse_tag_directive_prefix(Rest, Line, Col1, Delta1, Parser, Ctx); 1320parse_tag_directive_prefix([C | _] = Chars, Line, Col, Delta, 1321 #yamerl_parser{doc_version = Version} = Parser, 1322 #tag_directive_ctx{prefix = Prefix} = Ctx) 1323 when is_list(Prefix) andalso Prefix /= "" andalso 1324 (?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 1325 (Version == {1,1} andalso ?IS_NEWLINE_11(C))) -> 1326 queue_tag_directive(Chars, Line, Col, Delta, Parser, Ctx); 1327parse_tag_directive_prefix([] = Chars, Line, Col, Delta, 1328 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 1329 queue_tag_directive(Chars, Line, Col, Delta, Parser, Ctx); 1330parse_tag_directive_prefix([] = Chars, Line, Col, Delta, 1331 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1332 suspend_parsing(Chars, Line, Col, Delta, Parser, 1333 fun parse_tag_directive_prefix/6, Ctx); 1334 1335parse_tag_directive_prefix([_ | _] = Chars, Line, Col, Delta, Parser, 1336 #tag_directive_ctx{handle = Handle, prefix = Prefix, 1337 line = Dir_Line, col = Dir_Col}) -> 1338 %% Invalid character while parsing tag prefix. 1339 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 1340 Token = #yamerl_tag_directive{ 1341 handle = lists:reverse(Handle), 1342 prefix = lists:reverse(Prefix), 1343 line = Dir_Line, 1344 column = Dir_Col 1345 }, 1346 Error = #yamerl_parsing_error{ 1347 name = invalid_tag_directive, 1348 token = Token, 1349 line = Line, 1350 column = Col 1351 }, 1352 Parser2 = add_error(Parser1, Error, 1353 "Invalid tag prefix in TAG directive", []), 1354 return(Chars, Line, Col, Delta, Parser2). 1355 1356%% Queue token. 1357queue_tag_directive(Chars, Line, Col, Delta, 1358 #yamerl_parser{tags = Tags} = Parser, 1359 #tag_directive_ctx{handle = Handle, prefix = Prefix, 1360 line = Dir_Line, col = Dir_Col}) -> 1361 Handle1 = lists:reverse(Handle), 1362 Prefix1 = lists:reverse(Prefix), 1363 Token = #yamerl_tag_directive{ 1364 handle = Handle1, 1365 prefix = Prefix1, 1366 line = Dir_Line, 1367 column = Dir_Col 1368 }, 1369 Parser1 = is_uri_valid(Parser, Token), 1370 Parser2 = case dict:is_key(Handle1, Tags) of 1371 false -> 1372 Parser1; 1373 true -> 1374 Error = #yamerl_parsing_error{ 1375 type = warning, 1376 name = multiple_tag_handle_declarations, 1377 token = Token, 1378 line = Dir_Line, 1379 column = Dir_Col 1380 }, 1381 add_error(Parser1, Error, 1382 "Multiple declarations of the same handle found: " 1383 "the last one will be used", []) 1384 end, 1385 Parser3 = queue_token(Parser2, Token), 1386 Tags1 = dict:store(Handle1, Prefix1, Tags), 1387 Parser4 = Parser3#yamerl_parser{ 1388 tags = Tags1 1389 }, 1390 skip_directive_trailing_ws(Chars, Line, Col, Delta, Parser4). 1391 1392%% 1393%% Reserved directive. 1394%% 1395 1396parse_reserved_directive(Chars, Line, Col, Delta, Parser, 1397 #directive_ctx{name = Name, line = Dir_Line, col = Dir_Col}) -> 1398 Ctx = #reserved_directive_ctx{ 1399 name = Name, 1400 line = Dir_Line, 1401 col = Dir_Col 1402 }, 1403 parse_reserved_directive_arg(Chars, Line, Col, Delta, Parser, Ctx). 1404 1405parse_reserved_directive_arg([C | Rest], Line, Col, Delta, Parser, 1406 #reserved_directive_ctx{current = undefined} = Ctx) 1407 when ?IS_SPACE(C) -> 1408 %% Skip leading white spaces. 1409 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1410 parse_reserved_directive_arg(Rest, Line, Col1, Delta1, Parser, Ctx); 1411parse_reserved_directive_arg([C | _] = Chars, Line, Col, Delta, 1412 #yamerl_parser{doc_version = Version} = Parser, 1413 #reserved_directive_ctx{current = undefined} = Ctx) 1414 when ?IS_NEWLINE(C) orelse C == $# orelse 1415 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 1416 %% End of directive. 1417 queue_reserved_directive(Chars, Line, Col, Delta, Parser, Ctx); 1418parse_reserved_directive_arg([C | _] = Chars, Line, Col, Delta, 1419 #yamerl_parser{doc_version = Version} = Parser, 1420 #reserved_directive_ctx{current = Current, 1421 args = Args, args_count = Count} = Ctx) 1422 when is_list(Current) andalso 1423 (?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 1424 (Version == {1,1} andalso ?IS_NEWLINE_11(C))) -> 1425 %% Current argument finished. 1426 Current1 = lists:reverse(Current), 1427 Ctx1 = Ctx#reserved_directive_ctx{ 1428 current = undefined, 1429 args = [Current1 | Args], 1430 args_count = Count + 1 1431 }, 1432 parse_reserved_directive_arg(Chars, Line, Col, Delta, Parser, Ctx1); 1433parse_reserved_directive_arg([C | Rest] = Chars, Line, Col, Delta, Parser, 1434 #reserved_directive_ctx{current = Current} = Ctx) -> 1435 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 1436 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1437 Current1 = case Current of 1438 undefined -> [C]; 1439 _ -> [C | Current] 1440 end, 1441 Ctx1 = Ctx#reserved_directive_ctx{ 1442 current = Current1 1443 }, 1444 parse_reserved_directive_arg(Rest, Line, Col1, Delta1, Parser1, Ctx1); 1445parse_reserved_directive_arg([] = Chars, Line, Col, Delta, 1446 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 1447 %% End of directive. 1448 queue_reserved_directive(Chars, Line, Col, Delta, Parser, Ctx); 1449parse_reserved_directive_arg([] = Chars, Line, Col, Delta, 1450 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1451 suspend_parsing(Chars, Line, Col, Delta, Parser, 1452 fun parse_reserved_directive_arg/6, Ctx). 1453 1454queue_reserved_directive(Chars, Line, Col, Delta, Parser, 1455 #reserved_directive_ctx{name = Name, current = Current, 1456 args = Args, args_count = Count, line = Dir_Line, col = Dir_Col}) -> 1457 {Args1, Count1} = case Current of 1458 undefined -> {Args, Count}; 1459 _ -> {[lists:reverse(Current) | Args], Count + 1} 1460 end, 1461 Token = #yamerl_reserved_directive{ 1462 name = Name, 1463 args = lists:reverse(Args1), 1464 args_count = Count1, 1465 line = Dir_Line, 1466 column = Dir_Col 1467 }, 1468 Error = #yamerl_parsing_error{ 1469 type = warning, 1470 name = reserved_directive, 1471 token = Token, 1472 line = Dir_Line, 1473 column = Dir_Col 1474 }, 1475 Parser1 = add_error(Parser, Error, 1476 "Reserved directive \"~s\" ignored", [Name]), 1477 Parser2 = queue_token(Parser1, Token), 1478 skip_directive_trailing_ws(Chars, Line, Col, Delta, Parser2). 1479 1480%% ------------------------------------------------------------------- 1481%% Block sequences. 1482%% ------------------------------------------------------------------- 1483 1484%% We found a new block sequence entry. 1485parse_block_entry(Chars, Line, Col, Delta, 1486 #yamerl_parser{ik_allowed = true} = Parser) 1487 when ?IN_BLOCK_CTX(Parser) -> 1488 queue_block_sequence_entry_token(Chars, Line, Col, Delta, Parser); 1489parse_block_entry(Chars, Line, Col, Delta, Parser) when ?IN_BLOCK_CTX(Parser) -> 1490 Error = #yamerl_parsing_error{ 1491 name = block_sequence_entry_not_allowed, 1492 line = Line, 1493 column = Col 1494 }, 1495 Parser1 = add_error(Parser, Error, 1496 "Block sequence entry not allowed here", []), 1497 return(Chars, Line, Col, Delta, Parser1); 1498parse_block_entry(Chars, Line, Col, Delta, Parser) when ?IN_FLOW_CTX(Parser) -> 1499 Error = #yamerl_parsing_error{ 1500 name = block_collection_in_flow_context, 1501 line = Line, 1502 column = Col 1503 }, 1504 Parser1 = add_error(Parser, Error, 1505 "Block collection not allowed inside flow collection", []), 1506 return(Chars, Line, Col, Delta, Parser1). 1507 1508queue_block_sequence_entry_token([_ | Rest], Line, Col, Delta, Parser) 1509 when ?IN_BLOCK_CTX(Parser) -> 1510 Parser1 = remove_impl_key_pos(Parser), 1511 Parser2 = ?ALLOW_IMPL_KEY(Parser1, true), 1512 Token = #yamerl_sequence_entry{ 1513 line = Line, 1514 column = Col 1515 }, 1516 Parser3 = queue_token(Parser2, Token), 1517 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1518 find_next_token(Rest, Line, Col1, Delta1, Parser3). 1519 1520%% ------------------------------------------------------------------- 1521%% Flow collections. 1522%% ------------------------------------------------------------------- 1523 1524parse_flow_collection_start([_ | Rest] = Chars, Line, Col, Delta, 1525 #yamerl_parser{cur_coll = Cur_Coll, parent_colls = Colls} = Parser, 1526 Kind) -> 1527 Parser1 = save_impl_key_pos(Chars, Line, Col, Delta, Parser), 1528 Parser2 = ?PUSH_FAKE_IMPL_KEY(Parser1), 1529 Parser3 = ?ALLOW_IMPL_KEY(Parser2, true), 1530 Token = #yamerl_collection_start{ 1531 style = flow, 1532 kind = Kind, 1533 line = Line, 1534 column = Col, 1535 tag = ?COLL_SCALAR_DEFAULT_TAG(Line, Col) 1536 }, 1537 Parser4 = queue_token(Parser3, Token), 1538 New_Coll = #fcoll{kind = Kind}, 1539 Parser5 = case Kind of 1540 sequence -> 1541 Parser4#yamerl_parser{ 1542 cur_coll = New_Coll, 1543 parent_colls = [Cur_Coll | Colls], 1544 pending_entry = true 1545 }; 1546 mapping -> 1547 Parser4#yamerl_parser{ 1548 cur_coll = New_Coll, 1549 parent_colls = [Cur_Coll | Colls], 1550 waiting_for_kvpair = true 1551 } 1552 end, 1553 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1554 find_next_token(Rest, Line, Col1, Delta1, Parser5). 1555 1556parse_flow_collection_end(Chars, Line, Col, Delta, 1557 #yamerl_parser{cur_coll = #fcoll{kind = single_mapping}} = Parser, 1558 Kind) -> 1559 Parser1 = finish_incomplete_flow_entries(Line, Col, Delta, Parser), 1560 parse_flow_collection_end(Chars, Line, Col, Delta, Parser1, Kind); 1561parse_flow_collection_end([_ | Rest], Line, Col, Delta, 1562 #yamerl_parser{ 1563 cur_coll = #fcoll{kind = Kind}, parent_colls = [Coll | Colls]} = Parser, 1564 Kind) -> 1565 Parser1 = finish_incomplete_flow_entries(Line, Col, Delta, Parser), 1566 Parser2 = remove_impl_key_pos(Parser1), 1567 Parser3 = ?POP_IMPL_KEY(Parser2), 1568 Parser4 = Parser3#yamerl_parser{ 1569 cur_coll = Coll, 1570 parent_colls = Colls, 1571 pending_entry = false, 1572 waiting_for_kvpair = false 1573 }, 1574 Parser5 = ?ALLOW_IMPL_KEY(Parser4, false), 1575 Token = #yamerl_collection_end{ 1576 style = flow, 1577 kind = Kind, 1578 line = Line, 1579 column = Col 1580 }, 1581 Parser6 = queue_token(Parser5, Token), 1582 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1583 find_next_token(Rest, Line, Col1, Delta1, Parser6); 1584parse_flow_collection_end(Chars, Line, Col, Delta, 1585 #yamerl_parser{cur_coll = #fcoll{kind = Expected}} = Parser, Kind) -> 1586 %% Closing a different-type collection. 1587 Error = #yamerl_parsing_error{ 1588 name = closing_non_matching_flow_collection_type, 1589 line = Line, 1590 column = Col 1591 }, 1592 Parser1 = add_error(Parser, Error, 1593 "A ~s closing character is used to close a ~s collection", 1594 [Kind, Expected]), 1595 parse_flow_collection_end(Chars, Line, Col, Delta, Parser1, Expected); 1596parse_flow_collection_end([_ | Rest], Line, Col, Delta, 1597 #yamerl_parser{cur_coll = #bcoll{}} = Parser, Kind) -> 1598 %% Closing a never-opened collection. 1599 Error = #yamerl_parsing_error{ 1600 name = closing_never_opened_flow_collection, 1601 line = Line, 1602 column = Col 1603 }, 1604 Parser1 = add_error(Parser, Error, 1605 "The ~s closing character doesn't match any opening character", 1606 [Kind]), 1607 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1608 find_next_token(Rest, Line, Col1, Delta1, Parser1). 1609 1610parse_flow_entry(Chars, Line, Col, Delta, 1611 #yamerl_parser{cur_coll = #fcoll{kind = single_mapping}} = Parser) -> 1612 Parser1 = finish_incomplete_flow_entries(Line, Col, Delta, Parser), 1613 parse_flow_entry(Chars, Line, Col, Delta, Parser1); 1614parse_flow_entry([_ | Rest], Line, Col, Delta, 1615 #yamerl_parser{cur_coll = #fcoll{kind = Kind}} = Parser) when 1616 (Kind == sequence andalso ?MISSING_ENTRY(Parser)) orelse 1617 (Kind == mapping andalso ?MISSING_KVPAIR(Parser)) -> 1618 %% In a flow collection, the "," entry indicator immediatly follows a 1619 %% collection-start or a previous entry indicator. 1620 Error = #yamerl_parsing_error{ 1621 name = flow_collection_entry_not_allowed, 1622 line = Line, 1623 column = Col 1624 }, 1625 Parser1 = add_error(Parser, Error, 1626 "Empty flow collection entry not allowed", []), 1627 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1628 find_next_token(Rest, Line, Col1, Delta1, Parser1); 1629parse_flow_entry([_ | Rest], Line, Col, Delta, 1630 #yamerl_parser{cur_coll = #fcoll{kind = Kind}} = Parser) -> 1631 Parser1 = finish_incomplete_flow_entries(Line, Col, Delta, Parser), 1632 Parser2 = remove_impl_key_pos(Parser1), 1633 Parser3 = ?ALLOW_IMPL_KEY(Parser2, true), 1634 Parser4 = case Kind of 1635 sequence -> 1636 Parser3#yamerl_parser{ 1637 pending_entry = true 1638 }; 1639 mapping -> 1640 Parser3#yamerl_parser{ 1641 waiting_for_kvpair = true 1642 } 1643 end, 1644 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1645 find_next_token(Rest, Line, Col1, Delta1, Parser4); 1646parse_flow_entry(Chars, Line, Col, Delta, Parser) when ?IN_BLOCK_CTX(Parser) -> 1647 Error = #yamerl_parsing_error{ 1648 name = flow_collection_entry_not_allowed, 1649 line = Line, 1650 column = Col 1651 }, 1652 Parser1 = add_error(Parser, Error, 1653 "Flow collection entry not allowed outside any flow collection", []), 1654 return(Chars, Line, Col, Delta, Parser1). 1655 1656%% ------------------------------------------------------------------- 1657%% Block or flow mapping key. 1658%% ------------------------------------------------------------------- 1659 1660parse_mapping_key(Chars, Line, Col, Delta, 1661 #yamerl_parser{ik_allowed = true} = Parser) 1662 when ?IN_BLOCK_CTX(Parser) -> 1663 %% A mapping key is allowed here. 1664 Parser1 = finish_incomplete_block_entries(Line, Col, Parser), 1665 queue_mapping_key_token(Chars, Line, Col, Delta, Parser1); 1666parse_mapping_key(Chars, Line, Col, Delta, Parser) 1667 when ?IN_FLOW_CTX(Parser) -> 1668 %% A mapping key is always allowed in flow context. 1669 queue_mapping_key_token(Chars, Line, Col, Delta, Parser); 1670parse_mapping_key(Chars, Line, Col, Delta, Parser) -> 1671 %% A mapping key is NOT allowed here. 1672 Error = #yamerl_parsing_error{ 1673 name = block_mapping_key_not_allowed, 1674 line = Line, 1675 column = Col 1676 }, 1677 Parser1 = add_error(Parser, Error, 1678 "Block mapping key not allowed here", []), 1679 return(Chars, Line, Col, Delta, Parser1). 1680 1681queue_mapping_key_token([_ | Rest], Line, Col, Delta, Parser) -> 1682 Parser1 = remove_impl_key_pos(Parser), 1683 Parser2 = ?ALLOW_IMPL_KEY(Parser1, ?IN_BLOCK_CTX(Parser1)), 1684 Token = #yamerl_mapping_key{ 1685 line = Line, 1686 column = Col 1687 }, 1688 Parser3 = queue_token(Parser2, Token), 1689 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1690 find_next_token(Rest, Line, Col1, Delta1, Parser3). 1691 1692%% ------------------------------------------------------------------- 1693%% Block or flow mapping value. 1694%% ------------------------------------------------------------------- 1695 1696%% We found a new block mapping value. We must check if an implicit key 1697%% is pending. 1698parse_mapping_value(Chars, Line, Col, Delta, 1699 #yamerl_parser{ik_stack = [#impl_key{possible = true, line = Impl_Line} | _], 1700 last_token_endline = Endline} = Parser) 1701 when ?IN_BLOCK_CTX(Parser) andalso 1702 Impl_Line < Endline andalso Endline == Line -> 1703 %% The key of this mapping is an implicit key spanning over several lines. 1704 %% This will raise a warning. 1705 Parser1 = queue_impl_key(Delta, Parser), 1706 queue_mapping_value_token(Chars, Line, Col, Delta, Parser1); 1707parse_mapping_value(Chars, Line, Col, Delta, 1708 #yamerl_parser{ik_stack = 1709 [#impl_key{possible = true, line = Impl_Line} = Impl_Key | Rest]} = Parser) 1710 when ?IN_BLOCK_CTX(Parser) andalso 1711 Impl_Line < Line -> 1712 %% This is not an implicit key. 1713 Impl_Key1 = Impl_Key#impl_key{possible = false}, 1714 Parser1 = Parser#yamerl_parser{ 1715 ik_stack = [Impl_Key1 | Rest] 1716 }, 1717 parse_mapping_value(Chars, Line, Col, Delta, Parser1); 1718parse_mapping_value(Chars, Line, Col, Delta, 1719 #yamerl_parser{ik_stack = [#impl_key{possible = true} | _]} = Parser) -> 1720 %% The key of this mapping is an implicit key. 1721 Parser1 = queue_impl_key(Delta, Parser), 1722 queue_mapping_value_token(Chars, Line, Col, Delta, Parser1); 1723 1724parse_mapping_value(Chars, Line, Col, Delta, 1725 #yamerl_parser{ik_allowed = true, 1726 cur_coll = #bcoll{kind = mapping, kidx = KIdx, vidx = VIdx}} = Parser) 1727 when ?IN_BLOCK_CTX(Parser) andalso KIdx > VIdx -> 1728 %% The key of this mapping is an explicit key, already queued. 1729 %% In block context, an implicit key may follow. 1730 Parser1 = ?ALLOW_IMPL_KEY(Parser, true), 1731 queue_mapping_value_token(Chars, Line, Col, Delta, Parser1); 1732parse_mapping_value(Chars, Line, Col, Delta, 1733 #yamerl_parser{ik_allowed = true, 1734 cur_coll = #bcoll{kind = mapping, kidx = KIdx, vidx = VIdx}} = Parser) 1735 when ?IN_BLOCK_CTX(Parser) andalso KIdx =< VIdx -> 1736 %% The key of this mapping is an empty node. We queue a mapping-key 1737 %% token followed; the empty scalar will be automatically queued. 1738 Token = #yamerl_mapping_key{ 1739 line = Line, 1740 column = Col 1741 }, 1742 Parser1 = queue_token(Parser, Token), 1743 %% In block context, an implicit key may follow. 1744 Parser2 = ?ALLOW_IMPL_KEY(Parser1, true), 1745 queue_mapping_value_token(Chars, Line, Col, Delta, Parser2); 1746parse_mapping_value(Chars, Line, Col, Delta, 1747 #yamerl_parser{ik_allowed = false} = Parser) when ?IN_BLOCK_CTX(Parser) -> 1748 Error = #yamerl_parsing_error{ 1749 name = block_mapping_value_not_allowed, 1750 line = Line, 1751 column = Col 1752 }, 1753 Parser1 = add_error(Parser, Error, 1754 "Block mapping value not allowed here", []), 1755 return(Chars, Line, Col, Delta, Parser1); 1756 1757parse_mapping_value(Chars, Line, Col, Delta, 1758 #yamerl_parser{cur_coll = 1759 #fcoll{kind = Kind, kidx = KIdx, vidx = VIdx}} = Parser) 1760 when ?IN_FLOW_CTX(Parser) andalso 1761 (Kind == mapping orelse Kind == single_mapping) andalso KIdx > VIdx -> 1762 %% The key of this mapping is an explicit key, already queued. 1763 %% In flow context, an implicit key may not follow. 1764 Parser1 = ?ALLOW_IMPL_KEY(Parser, false), 1765 queue_mapping_value_token(Chars, Line, Col, Delta, Parser1); 1766parse_mapping_value(Chars, Line, Col, Delta, 1767 #yamerl_parser{cur_coll = 1768 #fcoll{kind = Kind, kidx = KIdx, vidx = VIdx}} = Parser) 1769 when ?IN_FLOW_CTX(Parser) andalso 1770 (((Kind == mapping orelse Kind == single_mapping) andalso KIdx =< VIdx) orelse 1771 Kind == sequence) -> 1772 %% The key of this mapping is an empty node. 1773 Token = #yamerl_mapping_key{ 1774 line = Line, 1775 column = Col 1776 }, 1777 Parser1 = queue_token(Parser, Token), 1778 %% In flow context, an implicit key may not follow. 1779 Parser2 = ?ALLOW_IMPL_KEY(Parser1, false), 1780 queue_mapping_value_token(Chars, Line, Col, Delta, Parser2). 1781 1782queue_mapping_value_token(Chars, Line, Col, Delta, 1783 #yamerl_parser{last_token_endline = Endline, 1784 cur_coll = #bcoll{indent = Indent}} = Parser) 1785 when ?IN_BLOCK_CTX(Parser) andalso Line > Endline andalso Col > Indent -> 1786 Token = #yamerl_mapping_value{ 1787 line = Line, 1788 column = Col 1789 }, 1790 Error = #yamerl_parsing_error{ 1791 name = invalid_block_mapping_value_indentation, 1792 token = Token, 1793 line = Line, 1794 column = Col 1795 }, 1796 Parser1 = add_error(Parser, Error, 1797 "Block mapping value's indentation (column #~b) " 1798 "greater than expected (column #~b)", 1799 [Col, Indent]), 1800 queue_mapping_value_token2(Chars, Line, Col, Delta, Parser1); 1801queue_mapping_value_token(Chars, Line, Col, Delta, Parser) -> 1802 queue_mapping_value_token2(Chars, Line, Col, Delta, Parser). 1803 1804queue_mapping_value_token2([_ | Rest], Line, Col, Delta, Parser) -> 1805 Token = #yamerl_mapping_value{ 1806 line = Line, 1807 column = Col 1808 }, 1809 Parser1 = queue_token(Parser, Token), 1810 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1811 find_next_token(Rest, Line, Col1, Delta1, Parser1). 1812 1813finish_incomplete_block_entries(Line, Col, 1814 #yamerl_parser{cur_coll = 1815 #bcoll{kind = mapping, kidx = KIdx, vidx = VIdx}} = Parser) 1816 when KIdx > VIdx -> 1817 %% The last block mapping key has an empty node as value (and the 1818 %% ":" value indicator was never used). Queue a value token. The 1819 %% empty scalar will be automatically queued. 1820 Token = #yamerl_mapping_value{ 1821 line = Line, 1822 column = Col 1823 }, 1824 queue_token(Parser, Token); 1825finish_incomplete_block_entries(_, _, Parser) -> 1826 Parser. 1827 1828finish_incomplete_flow_entries(Line, Col, _, 1829 #yamerl_parser{cur_coll = #fcoll{kind = single_mapping}, 1830 parent_colls = [Coll | Colls]} = Parser) -> 1831 %% Close single key: value pair. 1832 Token = #yamerl_collection_end{ 1833 style = flow, 1834 kind = mapping, 1835 line = Line, 1836 column = Col 1837 }, 1838 Parser1 = queue_token(Parser, Token), 1839 Parser1#yamerl_parser{ 1840 cur_coll = Coll, 1841 parent_colls = Colls 1842 }; 1843finish_incomplete_flow_entries(Line, Col, Delta, 1844 #yamerl_parser{cur_coll = #fcoll{kind = mapping}, 1845 ik_stack = [#impl_key{possible = true} | _]} = Parser) -> 1846 %% Queue implicit key token. 1847 Parser1 = queue_impl_key(Delta, Parser), 1848 finish_incomplete_flow_entries(Line, Col, Delta, Parser1); 1849finish_incomplete_flow_entries(Line, Col, Delta, 1850 #yamerl_parser{ 1851 cur_coll = #fcoll{kind = Kind, kidx = KIdx, vidx = VIdx}} = Parser) 1852 when (Kind == mapping orelse Kind == single_mapping) andalso KIdx > VIdx -> 1853 %% In a flow mapping, the last entry was a key without the ":" value 1854 %% indicator. Queue the value token and the implicit empty node. If 1855 %% the key was an empty node (ie. only a tag node property), it'll 1856 %% be added automatically by queue_token/2. 1857 Token = #yamerl_mapping_value{ 1858 line = Line, 1859 column = Col 1860 }, 1861 Parser1 = queue_token(Parser, Token), 1862 finish_incomplete_flow_entries(Line, Col, Delta, Parser1); 1863finish_incomplete_flow_entries(_, _, _, Parser) -> 1864 Parser. 1865 1866%% ------------------------------------------------------------------- 1867%% Anchors and aliases. 1868%% ------------------------------------------------------------------- 1869 1870parse_anchor_or_alias([_ | Rest] = Chars, Line, Col, Delta, Parser, Type) -> 1871 Ctx = #anchor_ctx{ 1872 type = Type, 1873 line = Line, 1874 col = Col 1875 }, 1876 Parser1 = save_impl_key_pos(Chars, Line, Col, Delta, Parser), 1877 Parser2 = ?ALLOW_IMPL_KEY(Parser1, false), 1878 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1879 do_parse_anchor_or_alias(Rest, Line, Col1, Delta1, Parser2, Ctx). 1880 1881%% White spaces and flow indicators are forbidden inside anchor or alias 1882%% names. 1883do_parse_anchor_or_alias([C | _] = Chars, Line, Col, Delta, 1884 #yamerl_parser{doc_version = Version} = Parser, Ctx) when 1885 ?IS_NEWLINE(C) orelse ?IS_SPACE(C) orelse ?IS_FLOW_INDICATOR(C) orelse 1886 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 1887 queue_anchor_or_alias_token(Chars, Line, Col, Delta, Parser, Ctx); 1888 1889do_parse_anchor_or_alias([C | Rest] = Chars, Line, Col, Delta, Parser, Ctx) -> 1890 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 1891 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1892 Ctx1 = Ctx#anchor_ctx{ 1893 output = [C | Ctx#anchor_ctx.output] 1894 }, 1895 do_parse_anchor_or_alias(Rest, Line, Col1, Delta1, Parser1, Ctx1); 1896 1897do_parse_anchor_or_alias([] = Chars, Line, Col, Delta, 1898 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 1899 queue_anchor_or_alias_token(Chars, Line, Col, Delta, Parser, Ctx); 1900do_parse_anchor_or_alias([] = Chars, Line, Col, Delta, 1901 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1902 suspend_parsing(Chars, Line, Col, Delta, Parser, 1903 fun do_parse_anchor_or_alias/6, Ctx). 1904 1905queue_anchor_or_alias_token(Chars, Line, Col, Delta, Parser, 1906 #anchor_ctx{type = Type, line = Anc_Line, col = Anc_Col, output = Name}) -> 1907 Token = case Type of 1908 anchor -> 1909 #yamerl_anchor{ 1910 name = lists:reverse(Name), 1911 line = Anc_Line, 1912 column = Anc_Col 1913 }; 1914 alias -> 1915 #yamerl_alias{ 1916 name = lists:reverse(Name), 1917 line = Anc_Line, 1918 column = Anc_Col 1919 } 1920 end, 1921 Parser1 = queue_token(Parser, Token), 1922 find_next_token(Chars, Line, Col, Delta, Parser1). 1923 1924%% ------------------------------------------------------------------- 1925%% Tags. 1926%% ------------------------------------------------------------------- 1927 1928parse_tag([_ | Rest] = Chars, Line, Col, Delta, 1929 #yamerl_parser{last_tag = undefined} = Parser) -> 1930 Ctx = #tag_ctx{ 1931 line = Line, 1932 col = Col, 1933 prefix = "!", 1934 suffix = "" 1935 }, 1936 Parser1 = save_impl_key_pos(Chars, Line, Col, Delta, Parser), 1937 Parser2 = ?ALLOW_IMPL_KEY(Parser1, false), 1938 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1939 determine_tag_type(Rest, Line, Col1, Delta1, Parser2, Ctx); 1940parse_tag(Chars, Line, Col, Delta, #yamerl_parser{last_tag = Tag} = Parser) 1941 when Tag /= undefined -> 1942 Error = #yamerl_parsing_error{ 1943 name = multiple_tag_properties, 1944 line = Line, 1945 column = Col 1946 }, 1947 Parser1 = add_error(Parser, Error, 1948 "Multiple tag properties attached to one node: " 1949 "the last one will be used", []), 1950 Parser2 = Parser1#yamerl_parser{ 1951 last_tag = undefined 1952 }, 1953 parse_tag(Chars, Line, Col, Delta, Parser2). 1954 1955%% Determine token type: verbatim tag or tag shorthand. 1956determine_tag_type([$< | Rest], Line, Col, Delta, Parser, Ctx) -> 1957 %% Verbatim tag. 1958 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1959 Ctx1 = Ctx#tag_ctx{ 1960 prefix = undefined 1961 }, 1962 parse_verbatim_tag(Rest, Line, Col1, Delta1, Parser, Ctx1); 1963determine_tag_type([_ | _] = Chars, Line, Col, Delta, Parser, Ctx) -> 1964 parse_tag_shorthand(Chars, Line, Col, Delta, Parser, Ctx); 1965determine_tag_type([] = Chars, Line, Col, Delta, 1966 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 1967 expand_tag(Chars, Line, Col, Delta, Parser, Ctx); 1968determine_tag_type([] = Chars, Line, Col, Delta, 1969 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 1970 suspend_parsing(Chars, Line, Col, Delta, Parser, 1971 fun determine_tag_type/6, Ctx). 1972 1973%% 1974%% Verbatim tag 1975%% 1976 1977parse_verbatim_tag([$> | Rest], Line, Col, Delta, Parser, Ctx) -> 1978 %% End of the verbatim tag. 1979 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1980 expand_tag(Rest, Line, Col1, Delta1, Parser, Ctx); 1981parse_verbatim_tag([$! | Rest], Line, Col, Delta, Parser, 1982 #tag_ctx{suffix = ""} = Ctx) -> 1983 %% Local tag. 1984 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 1985 Ctx1 = Ctx#tag_ctx{ 1986 suffix = "!" 1987 }, 1988 parse_verbatim_tag(Rest, Line, Col1, Delta1, Parser, Ctx1); 1989parse_verbatim_tag([$! | Rest], Line, Col, Delta, Parser, 1990 #tag_ctx{suffix = Suffix, line = Tag_Line, col = Tag_Col} = Ctx) -> 1991 %% "!" forbidden in verbatim tag. 1992 Token = #yamerl_tag{ 1993 uri = lists:reverse(Suffix), 1994 line = Tag_Line, 1995 column = Tag_Col 1996 }, 1997 Error = #yamerl_parsing_error{ 1998 name = invalid_tag_handle, 1999 token = Token, 2000 line = Line, 2001 column = Col 2002 }, 2003 Parser1 = add_error(Parser, Error, 2004 "Invalid character in tag handle", []), 2005 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2006 parse_verbatim_tag(Rest, Line, Col1, Delta1, Parser1, Ctx); 2007parse_verbatim_tag([C | Rest], Line, Col, Delta, Parser, Ctx) 2008 when ?IS_URI_CHAR(C) -> 2009 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2010 Ctx1 = Ctx#tag_ctx{ 2011 suffix = [C | Ctx#tag_ctx.suffix] 2012 }, 2013 parse_verbatim_tag(Rest, Line, Col1, Delta1, Parser, Ctx1); 2014parse_verbatim_tag([_ | Rest], Line, Col, Delta, Parser, 2015 #tag_ctx{suffix = Suffix, line = Tag_Line, col = Tag_Col} = Ctx) -> 2016 %% Character not allowed in a URI. 2017 Token = #yamerl_tag{ 2018 uri = lists:reverse(Suffix), 2019 line = Tag_Line, 2020 column = Tag_Col 2021 }, 2022 Error = #yamerl_parsing_error{ 2023 name = invalid_tag_handle, 2024 token = Token, 2025 line = Line, 2026 column = Col 2027 }, 2028 Parser1 = add_error(Parser, Error, 2029 "Invalid character in tag handle", []), 2030 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2031 parse_verbatim_tag(Rest, Line, Col1, Delta1, Parser1, Ctx); 2032parse_verbatim_tag([] = Chars, Line, Col, Delta, 2033 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 2034 suspend_parsing(Chars, Line, Col, Delta, Parser, 2035 fun parse_verbatim_tag/6, Ctx); 2036parse_verbatim_tag([] = Chars, Line, Col, Delta, 2037 #yamerl_parser{raw_eos = true} = Parser, 2038 #tag_ctx{suffix = Suffix, line = Tag_Line, col = Tag_Col}) -> 2039 %% Unexpected end-of-stream 2040 Token = #yamerl_tag{ 2041 uri = lists:reverse(Suffix), 2042 line = Tag_Line, 2043 column = Tag_Col 2044 }, 2045 Error = #yamerl_parsing_error{ 2046 name = unexpected_eos, 2047 token = Token, 2048 line = Line, 2049 column = Col 2050 }, 2051 Parser1 = add_error(Parser, Error, 2052 "Unexpected end-of-stream while parsing tag handle", []), 2053 return(Chars, Line, Col, Delta, Parser1). 2054 2055%% 2056%% Tag shorthand. 2057%% 2058 2059%% Tag prefix. 2060parse_tag_shorthand([$! | Rest], Line, Col, Delta, Parser, 2061 #tag_ctx{prefix = "!", suffix = Suffix} = Ctx) -> 2062 %% Separator between the prefix and the suffix. 2063 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2064 Ctx1 = Ctx#tag_ctx{ 2065 prefix = "!" ++ lists:reverse(Suffix) ++ "!", 2066 suffix = "" 2067 }, 2068 parse_tag_shorthand(Rest, Line, Col1, Delta1, Parser, Ctx1); 2069parse_tag_shorthand([$! | Rest], Line, Col, Delta, Parser, 2070 #tag_ctx{prefix = Prefix, suffix = Suffix, 2071 line = Tag_Line, col = Tag_Col} = Ctx) -> 2072 %% "!" forbidden in tag. 2073 Token = #yamerl_tag{ 2074 uri = Prefix ++ lists:reverse(Suffix), 2075 line = Tag_Line, 2076 column = Tag_Col 2077 }, 2078 Error = #yamerl_parsing_error{ 2079 name = invalid_tag_handle, 2080 token = Token, 2081 line = Line, 2082 column = Col 2083 }, 2084 Parser1 = add_error(Parser, Error, 2085 "Invalid character in tag handle", []), 2086 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2087 parse_tag_shorthand(Rest, Line, Col1, Delta1, Parser1, Ctx); 2088 2089%% Tag suffix. 2090parse_tag_shorthand([C | _] = Chars, Line, Col, Delta, Parser, Ctx) 2091 when ?IS_FLOW_INDICATOR(C) -> 2092 %% The next character starts another token. 2093 expand_tag(Chars, Line, Col, Delta, Parser, Ctx); 2094parse_tag_shorthand([C | _] = Chars, Line, Col, Delta, 2095 #yamerl_parser{doc_version = Version} = Parser, Ctx) 2096 when ?IS_NEWLINE(C) orelse ?IS_SPACE(C) orelse 2097 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2098 expand_tag(Chars, Line, Col, Delta, Parser, Ctx); 2099 2100parse_tag_shorthand([C | Rest], Line, Col, Delta, Parser, 2101 #tag_ctx{suffix = Suffix} = Ctx) when ?IS_URI_CHAR(C) -> 2102 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2103 Ctx1 = Ctx#tag_ctx{ 2104 suffix = [C | Suffix] 2105 }, 2106 parse_tag_shorthand(Rest, Line, Col1, Delta1, Parser, Ctx1); 2107 2108parse_tag_shorthand([_ | Rest] = Chars, Line, Col, Delta, Parser, 2109 #tag_ctx{prefix = Prefix, suffix = Suffix, 2110 line = Tag_Line, col = Tag_Col} = Ctx) -> 2111 %% Character not allowed in a URI. 2112 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 2113 Token = #yamerl_tag{ 2114 uri = Prefix ++ lists:reverse(Suffix), 2115 line = Tag_Line, 2116 column = Tag_Col 2117 }, 2118 Error = #yamerl_parsing_error{ 2119 name = invalid_tag_handle, 2120 token = Token, 2121 line = Line, 2122 column = Col 2123 }, 2124 Parser2 = add_error(Parser1, Error, 2125 "Invalid character in tag handle", []), 2126 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2127 parse_tag_shorthand(Rest, Line, Col1, Delta1, Parser2, Ctx); 2128 2129parse_tag_shorthand([] = Chars, Line, Col, Delta, 2130 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 2131 expand_tag(Chars, Line, Col, Delta, Parser, Ctx); 2132parse_tag_shorthand([] = Chars, Line, Col, Delta, 2133 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 2134 suspend_parsing(Chars, Line, Col, Delta, Parser, 2135 fun parse_tag_shorthand/6, Ctx). 2136 2137%% Verbatim tag. 2138expand_tag(Chars, Line, Col, Delta, Parser, 2139 #tag_ctx{prefix = undefined, suffix = Suffix} = Ctx) -> 2140 Ctx1 = Ctx#tag_ctx{ 2141 suffix = lists:reverse(Suffix) 2142 }, 2143 queue_tag_token(Chars, Line, Col, Delta, Parser, Ctx1); 2144 2145%% Non-specific tag. 2146expand_tag(Chars, Line, Col, Delta, Parser, 2147 #tag_ctx{prefix = "!", suffix = ""} = Ctx) -> 2148 Ctx1 = Ctx#tag_ctx{ 2149 prefix = undefined, 2150 suffix = {non_specific, "!"} 2151 }, 2152 queue_tag_token(Chars, Line, Col, Delta, Parser, Ctx1); 2153 2154%% Tag shorthand. 2155expand_tag(Chars, Line, Col, Delta, Parser, 2156 #tag_ctx{line = Tag_Line, col = Tag_Col, 2157 prefix = Prefix, suffix = ""} = Ctx) -> 2158 Token = #yamerl_tag{ 2159 uri = Prefix, 2160 line = Tag_Line, 2161 column = Tag_Col 2162 }, 2163 Error1 = #yamerl_parsing_error{ 2164 name = invalid_tag_handle, 2165 token = Token, 2166 line = Line, 2167 column = Col 2168 }, 2169 Parser1 = add_error(Parser, Error1, 2170 "Tag suffix mandatory", []), 2171 expand_tag2(Chars, Line, Col, Delta, Parser1, Ctx); 2172expand_tag(Chars, Line, Col, Delta, Parser, Ctx) -> 2173 expand_tag2(Chars, Line, Col, Delta, Parser, Ctx). 2174 2175expand_tag2(Chars, Line, Col, Delta, #yamerl_parser{tags = Tags} = Parser, 2176 #tag_ctx{prefix = Prefix, suffix = Suffix, 2177 line = Tag_Line, col = Tag_Col} = Ctx) -> 2178 Suffix1 = lists:reverse(Suffix), 2179 {Parser1, URI} = try 2180 case dict:is_key(Prefix, Tags) of 2181 true -> {Parser, dict:fetch(Prefix, Tags) ++ Suffix1}; 2182 false -> {Parser, dict:fetch({default, Prefix}, Tags) ++ Suffix1} 2183 end 2184 catch 2185 _:_ -> 2186 Bad_URI = Prefix ++ Suffix1, 2187 Token = #yamerl_tag{ 2188 uri = Bad_URI, 2189 line = Tag_Line, 2190 column = Tag_Col 2191 }, 2192 Error = #yamerl_parsing_error{ 2193 name = undeclared_tag_handle, 2194 token = Token, 2195 line = Line, 2196 column = Col 2197 }, 2198 Parser0 = add_error(Parser, Error, 2199 "Tag handle \"~s\" never declared", [Prefix]), 2200 {Parser0, Bad_URI} 2201 end, 2202 Ctx1 = Ctx#tag_ctx{ 2203 prefix = undefined, 2204 suffix = URI 2205 }, 2206 queue_tag_token(Chars, Line, Col, Delta, Parser1, Ctx1). 2207 2208queue_tag_token(Chars, Line, Col, Delta, Parser, 2209 #tag_ctx{suffix = "!", line = Tag_Line, col = Tag_Col} = Ctx) -> 2210 Token = #yamerl_tag{ 2211 uri = "!", 2212 line = Tag_Line, 2213 column = Tag_Col 2214 }, 2215 Error = #yamerl_parsing_error{ 2216 name = invalid_tag_handle, 2217 token = Token, 2218 line = Line, 2219 column = Col 2220 }, 2221 Parser1 = add_error(Parser, Error, 2222 "Local tag suffix mandatory", []), 2223 queue_tag_token2(Chars, Line, Col, Delta, Parser1, Ctx); 2224queue_tag_token(Chars, Line, Col, Delta, Parser, Ctx) -> 2225 queue_tag_token2(Chars, Line, Col, Delta, Parser, Ctx). 2226 2227queue_tag_token2(Chars, Line, Col, Delta, Parser, 2228 #tag_ctx{suffix = URI, line = Tag_Line, col = Tag_Col}) -> 2229 Token = #yamerl_tag{ 2230 uri = URI, 2231 line = Tag_Line, 2232 column = Tag_Col 2233 }, 2234 Parser1 = is_uri_valid(Parser, Token), 2235 Parser2 = queue_token(Parser1, Token), 2236 find_next_token(Chars, Line, Col, Delta, Parser2). 2237 2238%% ------------------------------------------------------------------- 2239%% Block scalars. 2240%% ------------------------------------------------------------------- 2241 2242parse_block_scalar([_ | Rest], Line, Col, Delta, Parser, 2243 Style) -> 2244 Ctx = #block_scalar_hd_ctx{ 2245 style = Style, 2246 line = Line, 2247 col = Col 2248 }, 2249 Parser1 = remove_impl_key_pos(Parser), 2250 Parser2 = ?ALLOW_IMPL_KEY(Parser1, true), 2251 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2252 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser2, Ctx). 2253 2254%% 2255%% Header parsing. 2256%% 2257 2258%% Newline, header termination. 2259do_parse_block_scalar_header([$\r] = Chars, Line, Col, Delta, 2260 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 2261 %% Can't be sure it's a newline. It may be followed by a LF. 2262 suspend_parsing(Chars, Line, Col, Delta, Parser, 2263 fun do_parse_block_scalar_header/6, Ctx); 2264 2265do_parse_block_scalar_header([C | _] = Chars, Line, _, Delta, 2266 #yamerl_parser{doc_version = Version} = Parser, Ctx) 2267 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2268 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2269 prepare_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx); 2270 2271%% Comments. 2272do_parse_block_scalar_header([$# | Rest], Line, Col, Delta, Parser, Ctx) -> 2273 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2274 Ctx1 = Ctx#block_scalar_hd_ctx{ 2275 in_comment = true 2276 }, 2277 Ctx2 = final_indent(Parser, Ctx1), 2278 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx2); 2279do_parse_block_scalar_header([_ | Rest], Line, Col, Delta, Parser, 2280 #block_scalar_hd_ctx{in_comment = true} = Ctx) -> 2281 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2282 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx); 2283 2284%% Chomping indicator. 2285do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, 2286 #block_scalar_hd_ctx{chomp = undefined} = Ctx) 2287 when C == $- orelse C == $+ -> 2288 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2289 Chomp = case C of 2290 $- -> strip; 2291 $+ -> keep 2292 end, 2293 Ctx1 = Ctx#block_scalar_hd_ctx{ 2294 chomp = Chomp 2295 }, 2296 Ctx2 = final_indent(Parser, Ctx1), 2297 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx2); 2298do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, 2299 #block_scalar_hd_ctx{style = Style, line = Sc_Line, col = Sc_Col} = Ctx) 2300 when C == $- orelse C == $+ -> 2301 Token = #yamerl_scalar{ 2302 style = block, 2303 substyle = Style, 2304 text = "", 2305 line = Sc_Line, 2306 column = Sc_Col, 2307 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2308 }, 2309 Error = #yamerl_parsing_error{ 2310 name = multiple_chomping_indicators, 2311 type = warning, 2312 token = Token, 2313 line = Line, 2314 column = Col 2315 }, 2316 Parser1 = add_error(Parser, Error, 2317 "Multiple chomping indicators specified: the last one will be used", 2318 []), 2319 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2320 Chomp = case C of 2321 $- -> strip; 2322 $+ -> keep 2323 end, 2324 Ctx1 = Ctx#block_scalar_hd_ctx{ 2325 chomp = Chomp 2326 }, 2327 Ctx2 = final_indent(Parser1, Ctx1), 2328 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser1, Ctx2); 2329 2330%% Explicit indentation indicator. 2331do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, 2332 #block_scalar_hd_ctx{indent = undefined} = Ctx) 2333 when C >= $1 andalso C =< $9 -> 2334 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2335 Ctx1 = Ctx#block_scalar_hd_ctx{ 2336 indent = {tmp, C - $0} 2337 }, 2338 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx1); 2339do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, 2340 #block_scalar_hd_ctx{indent = {tmp, Indent}} = Ctx) 2341 when C >= $1 andalso C =< $9 -> 2342 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2343 Ctx1 = Ctx#block_scalar_hd_ctx{ 2344 indent = {tmp, Indent * 10 + C - $0} 2345 }, 2346 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx1); 2347do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, 2348 #block_scalar_hd_ctx{style = Style, line = Sc_Line, col = Sc_Col} = Ctx) 2349 when C >= $1 andalso C =< $9 -> 2350 Token = #yamerl_scalar{ 2351 style = block, 2352 substyle = Style, 2353 text = "", 2354 line = Sc_Line, 2355 column = Sc_Col, 2356 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2357 }, 2358 Error = #yamerl_parsing_error{ 2359 name = multiple_indent_indicators, 2360 type = warning, 2361 token = Token, 2362 line = Line, 2363 column = Col 2364 }, 2365 Parser1 = add_error(Parser, Error, 2366 "Multiple indent indicators specified: the last one will be used", 2367 []), 2368 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2369 Ctx1 = Ctx#block_scalar_hd_ctx{ 2370 indent = {tmp, C - $0} 2371 }, 2372 Ctx2 = final_indent(Parser1, Ctx1), 2373 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser1, Ctx2); 2374 2375%% Trailing spaces. 2376do_parse_block_scalar_header([C | Rest], Line, Col, Delta, Parser, Ctx) 2377 when ?IS_SPACE(C) -> 2378 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2379 Ctx1 = final_indent(Parser, Ctx), 2380 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser, Ctx1); 2381 2382%% Invalid characters. 2383do_parse_block_scalar_header([_ | Rest] = Chars, Line, Col, Delta, Parser, 2384 #block_scalar_hd_ctx{style = Style, line = Sc_Line, col = Sc_Col} = Ctx) -> 2385 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 2386 Token = #yamerl_scalar{ 2387 style = block, 2388 substyle = Style, 2389 text = "", 2390 line = Sc_Line, 2391 column = Sc_Col, 2392 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2393 }, 2394 Error = #yamerl_parsing_error{ 2395 name = invalid_block_scalar_header, 2396 token = Token, 2397 line = Line, 2398 column = Col 2399 }, 2400 Parser2 = add_error(Parser1, Error, 2401 "Invalid character in block scalar header", []), 2402 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2403 Ctx1 = final_indent(Parser2, Ctx), 2404 do_parse_block_scalar_header(Rest, Line, Col1, Delta1, Parser2, Ctx1); 2405 2406do_parse_block_scalar_header([] = Chars, Line, Col, Delta, 2407 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 2408 %% End-of-stream reached while parsing block scalar header. Assume 2409 %% an empty string. 2410 prepare_parse_block_scalar(Chars, Line, Col, Delta, Parser, Ctx); 2411do_parse_block_scalar_header([] = Chars, Line, Col, Delta, Parser, Ctx) -> 2412 suspend_parsing(Chars, Line, Col, Delta, Parser, 2413 fun do_parse_block_scalar_header/6, Ctx). 2414 2415final_indent( 2416 #yamerl_parser{cur_coll = #bcoll{kind = root}}, 2417 #block_scalar_hd_ctx{indent = {tmp, Add_Indent}} = Ctx) -> 2418 Ctx#block_scalar_hd_ctx{indent = 1 + Add_Indent}; 2419final_indent( 2420 #yamerl_parser{cur_coll = #bcoll{indent = Indent}}, 2421 #block_scalar_hd_ctx{indent = {tmp, Add_Indent}} = Ctx) -> 2422 Ctx#block_scalar_hd_ctx{indent = Indent + Add_Indent}; 2423final_indent(_, Ctx) -> 2424 Ctx. 2425 2426prepare_parse_block_scalar(Chars, Line, Col, Delta, Parser, Ctx) -> 2427 Ctx1 = final_indent(Parser, Ctx), 2428 Chomp = case Ctx1#block_scalar_hd_ctx.chomp of 2429 undefined -> clip; 2430 C -> C 2431 end, 2432 Next_Ctx = #block_scalar_ctx{ 2433 style = Ctx1#block_scalar_hd_ctx.style, 2434 line = Ctx1#block_scalar_hd_ctx.line, 2435 col = Ctx1#block_scalar_hd_ctx.col, 2436 endline = Line, 2437 endcol = Col, 2438 chomp = Chomp, 2439 indent = Ctx1#block_scalar_hd_ctx.indent, 2440 newline = Ctx1#block_scalar_hd_ctx.indent /= undefined 2441 }, 2442 do_parse_block_scalar(Chars, Line, Col, Delta, Parser, Next_Ctx). 2443 2444%% 2445%% Newlines. 2446%% 2447 2448%% Can't be sure it's a newline. It may be followed by a LF. 2449do_parse_block_scalar([$\r] = Chars, Line, Col, Delta, 2450 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 2451 suspend_parsing(Chars, Line, Col, Delta, Parser, 2452 fun do_parse_block_scalar/6, Ctx); 2453 2454%% This is an empty line just after the header. 2455do_parse_block_scalar([C | _] = Chars, Line, _, Delta, 2456 #yamerl_parser{doc_version = Version} = Parser, 2457 #block_scalar_ctx{newline = false, spaces = Spaces, output = ""} = Ctx) 2458 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2459 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2460 Ctx1 = Ctx#block_scalar_ctx{ 2461 newline = true, 2462 spaces = [$\n | Spaces] 2463 }, 2464 do_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2465 2466%% Literal style: no line folding. 2467do_parse_block_scalar([C | _] = Chars, Line, _, Delta, 2468 #yamerl_parser{doc_version = Version} = Parser, 2469 #block_scalar_ctx{spaces = Spaces, style = literal} = Ctx) 2470 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2471 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2472 Ctx1 = Ctx#block_scalar_ctx{ 2473 newline = true, 2474 spaces = [$\n | Spaces] 2475 }, 2476 do_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2477 2478%% Folded style: a newline at the end of a normal-indented line. 2479do_parse_block_scalar([C | _] = Chars, Line, _, Delta, 2480 #yamerl_parser{doc_version = Version} = Parser, 2481 #block_scalar_ctx{spaces = Spaces, newline = false, 2482 more_indent = false} = Ctx) 2483 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2484 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2485 Ctx1 = Ctx#block_scalar_ctx{ 2486 newline = true, 2487 spaces = [$\s | Spaces] 2488 }, 2489 do_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2490 2491%% Folded style: an empty line after a normal-indented line. 2492do_parse_block_scalar([C | _] = Chars, Line, _, Delta, 2493 #yamerl_parser{doc_version = Version} = Parser, 2494 #block_scalar_ctx{spaces = Spaces, newline = true, 2495 more_indent = false} = Ctx) 2496 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2497 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2498 Spaces1 = case Spaces of 2499 [$\s | S] -> S; 2500 _ -> Spaces 2501 end, 2502 Ctx1 = Ctx#block_scalar_ctx{ 2503 newline = true, 2504 spaces = [$\n | Spaces1] 2505 }, 2506 do_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2507 2508%% Folded style: a newline in a more-indented paragraph. 2509do_parse_block_scalar([C | _] = Chars, Line, _, Delta, 2510 #yamerl_parser{doc_version = Version} = Parser, 2511 #block_scalar_ctx{spaces = Spaces, more_indent = true} = Ctx) 2512 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2513 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2514 Ctx1 = Ctx#block_scalar_ctx{ 2515 newline = true, 2516 spaces = [$\n | Spaces] 2517 }, 2518 do_parse_block_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2519 2520%% 2521%% Indentation. 2522%% 2523 2524%% First non-space character: set indentation. 2525do_parse_block_scalar([C | _] = Chars, Line, Col, Delta, Parser, 2526 #block_scalar_ctx{indent = undefined, longest_empty = Longest} = Ctx) 2527 when C /= $\s andalso Longest < Col -> 2528 Ctx1 = Ctx#block_scalar_ctx{ 2529 indent = Col, 2530 newline = true 2531 }, 2532 do_parse_block_scalar(Chars, Line, Col, Delta, Parser, Ctx1); 2533do_parse_block_scalar([C | _] = Chars, Line, Col, Delta, Parser, 2534 #block_scalar_ctx{indent = undefined, longest_empty = Longest, 2535 style = Style, line = Sc_Line, col = Sc_Col} = Ctx) 2536 when C /= $\s andalso Longest >= Col -> 2537 Token = #yamerl_scalar{ 2538 style = block, 2539 substyle = Style, 2540 line = Sc_Line, 2541 column = Sc_Col, 2542 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2543 }, 2544 Error = #yamerl_parsing_error{ 2545 type = warning, 2546 name = leading_empty_lines_too_long, 2547 token = Token, 2548 line = Line, 2549 column = Col 2550 }, 2551 Parser1 = add_error(Parser, Error, 2552 "A leading all-space line has too many spaces (~b) " 2553 "compared to detected indentation (~b)", [Longest, Col - 1]), 2554 Ctx1 = Ctx#block_scalar_ctx{ 2555 longest_empty = 0 2556 }, 2557 do_parse_block_scalar(Chars, Line, Col, Delta, Parser1, Ctx1); 2558do_parse_block_scalar([$\s | Rest], Line, Col, Delta, Parser, 2559 #block_scalar_ctx{indent = undefined, longest_empty = Longest} = Ctx) -> 2560 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2561 Ctx1 = if 2562 Col > Longest -> Ctx#block_scalar_ctx{longest_empty = Col}; 2563 true -> Ctx 2564 end, 2565 do_parse_block_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 2566 2567%% Skip indentation spaces. 2568do_parse_block_scalar([$\s | Rest], Line, Col, Delta, Parser, 2569 #block_scalar_ctx{indent = Indent, newline = true} = Ctx) 2570 when Indent == undefined orelse Col < Indent -> 2571 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2572 do_parse_block_scalar(Rest, Line, Col1, Delta1, Parser, Ctx); 2573 2574%% The next line is less indented than the block scalar: end it. 2575do_parse_block_scalar([C | _] = Chars, Line, Col, Delta, 2576 #yamerl_parser{cur_coll = #bcoll{indent = Indent}} = Parser, Ctx) 2577 when C /= $\s andalso Col =< Indent -> 2578 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2579 2580%% The next line is less indented than the block scalar, but more than 2581%% the parent node. However, it's a comment, so we end the scalar. 2582do_parse_block_scalar([$# | _] = Chars, Line, Col, Delta, Parser, 2583 #block_scalar_ctx{indent = Indent} = Ctx) 2584 when Col < Indent -> 2585 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2586 2587%% The next line is less indented than the block scalar, but more than 2588%% the parent node: it's an error. 2589do_parse_block_scalar([C | _] = Chars, Line, Col, Delta, Parser, 2590 #block_scalar_ctx{indent = Indent, style = Style, 2591 line = Token_Line, col = Token_Col, output = Output}) 2592 when C /= $\s andalso Col < Indent -> 2593 Token = #yamerl_scalar{ 2594 style = block, 2595 substyle = Style, 2596 text = lists:reverse(Output), 2597 line = Token_Line, 2598 column = Token_Col, 2599 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Token_Line, Token_Col) 2600 }, 2601 Error = #yamerl_parsing_error{ 2602 name = invalid_block_scalar_indentation, 2603 token = Token, 2604 line = Line, 2605 column = Col 2606 }, 2607 Parser1 = add_error(Parser, Error, 2608 "Invalid block scalar indentation", []), 2609 return(Chars, Line, Col, Delta, Parser1); 2610 2611%% The next line has a directives end or document end marker: end the 2612%% scalar. 2613do_parse_block_scalar([C | _] = Chars, Line, 1 = Col, Delta, 2614 #yamerl_parser{chars_len = Len, raw_eos = false} = Parser, 2615 Ctx) when (C == $- orelse C == $.) andalso (Len - Delta) < 4 -> 2616 %% We don't have enough data to determine if it's the end of the 2617 %% plain scalar. 2618 suspend_parsing(Chars, Line, Col, Delta, Parser, 2619 fun do_parse_block_scalar/6, Ctx); 2620do_parse_block_scalar([$-, $-, $-, C | _] = Chars, Line, 1 = Col, Delta, 2621 #yamerl_parser{doc_version = Version} = Parser, Ctx) 2622 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 2623 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2624 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2625do_parse_block_scalar([$., $., $., C | _] = Chars, Line, 1 = Col, Delta, 2626 #yamerl_parser{doc_version = Version} = Parser, Ctx) 2627 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 2628 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2629 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2630do_parse_block_scalar([$-, $-, $-] = Chars, Line, 1 = Col, Delta, 2631 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 2632 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2633do_parse_block_scalar([$., $., $.] = Chars, Line, 1 = Col, Delta, 2634 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 2635 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2636 2637%% 2638%% Content. 2639%% 2640 2641%% Literal style: everything after the indentation spaces is kept. 2642do_parse_block_scalar([C | Rest] = Chars, Line, Col, Delta, Parser, 2643 #block_scalar_ctx{style = literal, spaces = Spaces} = Ctx) -> 2644 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 2645 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2646 Output1 = case Spaces of 2647 "" -> [C | Ctx#block_scalar_ctx.output]; 2648 _ -> [C | Spaces ++ Ctx#block_scalar_ctx.output] 2649 end, 2650 Ctx1 = Ctx#block_scalar_ctx{ 2651 spaces = "", 2652 newline = false, 2653 output = Output1, 2654 endline = Line, 2655 endcol = Col1 2656 }, 2657 do_parse_block_scalar(Rest, Line, Col1, Delta1, Parser1, Ctx1); 2658 2659%% Folded style: a normal-indented line. 2660do_parse_block_scalar([C | _] = Chars, Line, Col, Delta, Parser, 2661 #block_scalar_ctx{style = folded, more_indent = true, indent = Indent} = Ctx) 2662 when not ?IS_SPACE(C) andalso Col == Indent -> 2663 %% This line uses the default indentation: end the more indented 2664 %% paragraph. 2665 Ctx1 = Ctx#block_scalar_ctx{ 2666 more_indent = false 2667 }, 2668 do_parse_block_scalar(Chars, Line, Col, Delta, Parser, Ctx1); 2669do_parse_block_scalar([C | Rest], Line, Col, Delta, Parser, 2670 #block_scalar_ctx{style = folded, newline = Newline, spaces = Spaces, 2671 output = Output} = Ctx) 2672 when not ?IS_SPACE(C) orelse 2673 (?IS_SPACE(C) andalso (not Newline orelse Output == "")) -> 2674 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2675 Output1 = case Spaces of 2676 "" -> [C | Ctx#block_scalar_ctx.output]; 2677 _ -> [C | Spaces ++ Ctx#block_scalar_ctx.output] 2678 end, 2679 Ctx1 = Ctx#block_scalar_ctx{ 2680 spaces = "", 2681 newline = false, 2682 output = Output1, 2683 endline = Line, 2684 endcol = Col1 2685 }, 2686 do_parse_block_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 2687 2688%% Folded style: a more-indented line. 2689do_parse_block_scalar([C | Rest], Line, Col, Delta, Parser, 2690 #block_scalar_ctx{style = folded, newline = true, spaces = Spaces, 2691 more_indent = More_Indented} = Ctx) 2692 when ?IS_SPACE(C) -> 2693 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2694 Spaces1 = case Spaces of 2695 [$\s | S] -> [$\n | S]; 2696 _ when More_Indented -> Spaces; 2697 _ -> [$\n | Spaces] 2698 end, 2699 Ctx1 = Ctx#block_scalar_ctx{ 2700 spaces = "", 2701 newline = false, 2702 more_indent = true, 2703 output = [C | Spaces1 ++ Ctx#block_scalar_ctx.output], 2704 endline = Line, 2705 endcol = Col1 2706 }, 2707 do_parse_block_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 2708 2709do_parse_block_scalar([] = Chars, Line, Col, Delta, 2710 #yamerl_parser{raw_eos = true} = Parser, Ctx) -> 2711 %% End-of-stream reached. 2712 queue_block_scalar_token(Chars, Line, Col, Delta, Parser, Ctx); 2713do_parse_block_scalar([] = Chars, Line, Col, Delta, Parser, Ctx) -> 2714 suspend_parsing(Chars, Line, Col, Delta, Parser, 2715 fun do_parse_block_scalar/6, Ctx). 2716 2717queue_block_scalar_token(Chars, Line, Col, Delta, Parser, 2718 #block_scalar_ctx{style = Style, output = Output, spaces = Spaces, 2719 chomp = Chomp, newline = Newline, line = Sc_Line, col = Sc_Col, 2720 endline = Endline, endcol = Endcol}) -> 2721 {Text, Endline1, Endcol1} = case Chomp of 2722 strip -> {Output, Endline, Endcol}; 2723 clip when Output == "" -> {Output, Endline, Endcol}; 2724 clip when Spaces == "" -> {Output, Endline, Endcol}; 2725 clip -> {[$\n | Output], Endline + 1, 1}; 2726 keep -> {Spaces ++ Output, Line, 1} 2727 end, 2728 Token = #yamerl_scalar{ 2729 style = block, 2730 substyle = Style, 2731 text = lists:reverse(Text), 2732 line = Sc_Line, 2733 column = Sc_Col, 2734 tag = ?BLOCK_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2735 }, 2736 Parser1 = queue_token(Parser, Token), 2737 Parser2 = Parser1#yamerl_parser{ 2738 endpos_set_by_token = true, 2739 last_token_endline = Endline1, 2740 last_token_endcol = Endcol1, 2741 missed_nl = Newline 2742 }, 2743 find_next_token(Chars, Line, Col, Delta, Parser2). 2744 2745%% ------------------------------------------------------------------- 2746%% Flow scalars. 2747%% ------------------------------------------------------------------- 2748 2749parse_flow_scalar([C | Rest] = Chars, Line, Col, Delta, Parser, Style) -> 2750 %% We start a flow scalar parsing: initialize the context. 2751 Ctx = #flow_scalar_ctx{ 2752 style = Style, 2753 line = Line, 2754 col = Col 2755 }, 2756 Parser1 = save_impl_key_pos(Chars, Line, Col, Delta, Parser), 2757 Parser2 = ?ALLOW_IMPL_KEY(Parser1, false), 2758 {Rest1, {Col1, Delta1}} = case C of 2759 $' -> {Rest, ?NEXT_COL(Col, Delta, 1)}; 2760 $" -> {Rest, ?NEXT_COL(Col, Delta, 1)}; 2761 _ -> {Chars, {Col, Delta}} 2762 end, 2763 do_parse_flow_scalar(Rest1, Line, Col1, Delta1, Parser2, Ctx). 2764 2765do_parse_flow_scalar([$\r] = Chars, Line, Col, Delta, 2766 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 2767 %% Can't be sure it's a newline. It may be followed by a LF. 2768 suspend_parsing(Chars, Line, Col, Delta, Parser, 2769 fun do_parse_flow_scalar/6, Ctx); 2770 2771%% 2772%% Leading white spaces (plain scalar). 2773%% 2774 2775do_parse_flow_scalar([C | Rest], Line, Col, Delta, Parser, 2776 #flow_scalar_ctx{style = plain, output = "", surrogate = undefined} = Ctx) 2777 when ?IS_SPACE(C) -> 2778 %% Skip leading white spaces in a plain scalar. We must update the 2779 %% position of beginning of the scalar and thus the implicit key. 2780 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2781 Parser1 = save_impl_key_pos(Rest, Line, Col1, Delta1, Parser), 2782 Ctx1 = Ctx#flow_scalar_ctx{ 2783 line = Line, 2784 col = Col1 2785 }, 2786 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser1, Ctx1); 2787 2788%% 2789%% Escaped characters [62]. 2790%% Only supported by double-quoted strings. 2791%% 2792 2793%% The next character is escaped. 2794do_parse_flow_scalar([$\\ | Rest], Line, Col, Delta, Parser, 2795 #flow_scalar_ctx{style = double_quoted, 2796 spaces = Spaces} = Ctx) -> 2797 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2798 Output1 = case Spaces of 2799 "" -> Ctx#flow_scalar_ctx.output; 2800 _ -> Spaces ++ Ctx#flow_scalar_ctx.output 2801 end, 2802 Ctx1 = Ctx#flow_scalar_ctx{ 2803 spaces = "", 2804 newline = false, 2805 output = Output1 2806 }, 2807 do_parse_flow_scalar_escaped(Rest, Line, Col1, Delta1, Parser, Ctx1); 2808 2809%% Invalid surrogate pair. 2810do_parse_flow_scalar([_ | _] = Chars, Line, Col, Delta, Parser, 2811 #flow_scalar_ctx{surrogate = High, style = Style, 2812 line = Sc_Line, col = Sc_Col, output = Output} = Ctx) 2813 when High /= undefined -> 2814 %% The next character isn't the expected low surrogate. 2815 Token = #yamerl_scalar{ 2816 style = flow, 2817 substyle = Style, 2818 text = lists:reverse(Output), 2819 line = Sc_Line, 2820 column = Sc_Col, 2821 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 2822 }, 2823 Error = #yamerl_parsing_error{ 2824 name = invalid_surrogate_pair, 2825 token = Token, 2826 line = Line, 2827 column = Col 2828 }, 2829 Parser1 = add_error(Parser, Error, 2830 "Invalid UTF-16 surrogate pair", []), 2831 Ctx1 = Ctx#flow_scalar_ctx{ 2832 surrogate = undefined 2833 }, 2834 do_parse_flow_scalar(Chars, Line, Col, Delta, Parser1, Ctx1); 2835 2836%% In a single-quoted string, a single quote is escaped by doubling 2837%% it. 2838do_parse_flow_scalar([$'] = Chars, Line, Col, Delta, 2839 #yamerl_parser{raw_eos = false} = Parser, 2840 #flow_scalar_ctx{style = single_quoted} = Ctx) -> 2841 %% Can't be sure it's an escaped single quote. 2842 suspend_parsing(Chars, Line, Col, Delta, Parser, 2843 fun do_parse_flow_scalar/6, Ctx); 2844do_parse_flow_scalar([$', $' | Rest], Line, Col, Delta, Parser, 2845 #flow_scalar_ctx{style = single_quoted, spaces = Spaces} = Ctx) -> 2846 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 2), 2847 Output1 = case Spaces of 2848 "" -> Ctx#flow_scalar_ctx.output; 2849 _ -> Spaces ++ Ctx#flow_scalar_ctx.output 2850 end, 2851 Ctx1 = Ctx#flow_scalar_ctx{ 2852 spaces = "", 2853 newline = false, 2854 output = [$' | Output1] 2855 }, 2856 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 2857 2858%% 2859%% Line folding. 2860%% Leading and trailing white spaces are dropped, except when the 2861%% newline character is escaped, a previous white spaces is escaped or 2862%% at the beginning of the first line. 2863%% 2864 2865do_parse_flow_scalar([C | _] = Chars, Line, _, Delta, 2866 #yamerl_parser{doc_version = Version} = Parser, 2867 #flow_scalar_ctx{newline = false} = Ctx) 2868 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2869 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2870 Ctx1 = Ctx#flow_scalar_ctx{ 2871 spaces = " ", 2872 newline = true 2873 }, 2874 do_parse_flow_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2875 2876do_parse_flow_scalar([C | _] = Chars, Line, _, Delta, 2877 #yamerl_parser{doc_version = Version} = Parser, 2878 #flow_scalar_ctx{newline = true, spaces = Spaces} = Ctx) 2879 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2880 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 2881 Spaces1 = case Spaces of 2882 [$\s | S] -> S; 2883 _ -> Spaces 2884 end, 2885 Ctx1 = Ctx#flow_scalar_ctx{ 2886 spaces = [$\n | Spaces1] 2887 }, 2888 do_parse_flow_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 2889 2890do_parse_flow_scalar([C | Rest], Line, Col, Delta, Parser, 2891 #flow_scalar_ctx{spaces = Spaces, newline = false} = Ctx) 2892 when ?IS_SPACE(C) -> 2893 %% Keep white spaces in a separate buffer. If we find content later, 2894 %% this buffer will be merged with the result buffer. Otherwise, the 2895 %% white spaces buffer may be trimmed or dropped. 2896 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2897 Ctx1 = Ctx#flow_scalar_ctx{ 2898 spaces = [C | Spaces] 2899 }, 2900 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 2901 2902do_parse_flow_scalar([C | Rest], Line, Col, Delta, Parser, 2903 #flow_scalar_ctx{newline = true} = Ctx) 2904 when ?IS_SPACE(C) -> 2905 %% Drop leading white spaces when not on the first line. 2906 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2907 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx); 2908 2909%% 2910%% Flow scalar end character. 2911%% 2912 2913do_parse_flow_scalar([C | Rest], Line, Col, Delta, Parser, 2914 #flow_scalar_ctx{style = Style} = Ctx) when 2915 (Style == double_quoted andalso C == $") orelse 2916 (Style == single_quoted andalso C == $') -> 2917 %% Found the end of this flow scalar. Next step: find the next 2918 %% token. 2919 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 2920 Ctx1 = Ctx#flow_scalar_ctx{ 2921 endline = Line, 2922 endcol = Col1 2923 }, 2924 queue_flow_scalar_token(Rest, Line, Col1, Delta1, Parser, Ctx1); 2925 2926do_parse_flow_scalar([$# | _] = Chars, Line, Col, Delta, Parser, 2927 #flow_scalar_ctx{style = plain, spaces = Spaces} = Ctx) when Spaces /= [] -> 2928 %% A '#' character preceeded by white spaces is a comment. The plain 2929 %% scalar terminates with the first white spaces because trailing 2930 %% white spaces are ignored. [130] 2931 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2932 Ctx#flow_scalar_ctx{spaces = ""}); 2933 2934do_parse_flow_scalar([$:] = Chars, Line, Col, Delta, 2935 #yamerl_parser{raw_eos = true} = Parser, 2936 #flow_scalar_ctx{style = plain} = Ctx) -> 2937 %% We consider the end-of-stream as a "white space" and use the ':' 2938 %% character as the end character for this plain scalar. [130] 2939 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2940 Ctx#flow_scalar_ctx{spaces = ""}); 2941do_parse_flow_scalar([$:] = Chars, Line, Col, Delta, 2942 #yamerl_parser{raw_eos = false} = Parser, 2943 #flow_scalar_ctx{style = plain} = Ctx) -> 2944 %% We don't have enough data to determine if it's the end of the 2945 %% plain scalar. 2946 suspend_parsing(Chars, Line, Col, Delta, Parser, 2947 fun do_parse_flow_scalar/6, Ctx); 2948do_parse_flow_scalar([$:, C | _] = Chars, Line, Col, Delta, 2949 #yamerl_parser{doc_version = Version} = Parser, 2950 #flow_scalar_ctx{style = plain} = Ctx) 2951 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 2952 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2953 %% A ':' character followed by white spaces is not allowed in a 2954 %% plain scalar: end it. Only one character is available but it's 2955 %% enough to take a decision. The next state will handle the newline 2956 %% properly. [130] 2957 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2958 Ctx#flow_scalar_ctx{spaces = ""}); 2959do_parse_flow_scalar([$:, C | _] = Chars, Line, Col, Delta, Parser, 2960 #flow_scalar_ctx{style = plain} = Ctx) 2961 when ?IN_FLOW_CTX(Parser) andalso ?IS_FLOW_INDICATOR(C) -> 2962 %% A ':' character followed by an flow indicator character in flow 2963 %% context ends the plain scalar. 2964 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2965 Ctx#flow_scalar_ctx{spaces = ""}); 2966 2967do_parse_flow_scalar([C | _] = Chars, Line, Col, Delta, Parser, 2968 #flow_scalar_ctx{style = plain} = Ctx) 2969 when ?IN_FLOW_CTX(Parser) andalso ?IS_FLOW_INDICATOR(C) -> 2970 %% The characters '[', ']', '{', '}' and ',' are forbidden in plain 2971 %% scalar because they are used as flow collection separation 2972 %% characters. [129] 2973 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2974 Ctx#flow_scalar_ctx{spaces = ""}); 2975 2976do_parse_flow_scalar([C | _] = Chars, Line, 1 = Col, Delta, 2977 #yamerl_parser{chars_len = Len, raw_eos = false} = Parser, 2978 #flow_scalar_ctx{style = plain} = Ctx) 2979 when (C == $- orelse C == $.) andalso (Len - Delta) < 4 -> 2980 %% We don't have enough data to determine if it's the end of the 2981 %% plain scalar. 2982 suspend_parsing(Chars, Line, Col, Delta, Parser, 2983 fun do_parse_flow_scalar/6, Ctx); 2984do_parse_flow_scalar([$-, $-, $-, C | _] = Chars, Line, 1 = Col, Delta, 2985 #yamerl_parser{doc_version = Version} = Parser, 2986 #flow_scalar_ctx{style = plain} = Ctx) 2987 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 2988 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2989 %% A directives end indicator puts an end to the plain scalar. 2990 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2991 Ctx#flow_scalar_ctx{spaces = ""}); 2992do_parse_flow_scalar([$., $., $., C | _] = Chars, Line, 1 = Col, Delta, 2993 #yamerl_parser{doc_version = Version} = Parser, 2994 #flow_scalar_ctx{style = plain} = Ctx) 2995 when ?IS_SPACE(C) orelse ?IS_NEWLINE(C) orelse 2996 (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 2997 %% A document end indicator puts an end to the plain scalar. 2998 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 2999 Ctx#flow_scalar_ctx{spaces = ""}); 3000do_parse_flow_scalar([$-, $-, $-] = Chars, Line, 1 = Col, Delta, 3001 #yamerl_parser{raw_eos = true} = Parser, 3002 #flow_scalar_ctx{style = plain} = Ctx) -> 3003 %% A directives end indicator puts an end to the plain scalar. 3004 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 3005 Ctx#flow_scalar_ctx{spaces = ""}); 3006do_parse_flow_scalar([$., $., $.] = Chars, Line, 1 = Col, Delta, 3007 #yamerl_parser{raw_eos = true} = Parser, 3008 #flow_scalar_ctx{style = plain} = Ctx) -> 3009 %% A document end indicator puts an end to the plain scalar. 3010 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 3011 Ctx#flow_scalar_ctx{spaces = ""}); 3012 3013do_parse_flow_scalar([_ | _] = Chars, Line, Col, Delta, 3014 #yamerl_parser{cur_coll = #bcoll{indent = Indent}} = Parser, 3015 #flow_scalar_ctx{style = plain, newline = true} = Ctx) 3016 when ?IN_BLOCK_CTX(Parser) andalso Col =< Indent -> 3017 %% The continuation line is as or less indented than the current 3018 %% block collection. Therefore, it's not a continuation line and we 3019 %% end the flow scalar. 3020 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 3021 Ctx#flow_scalar_ctx{spaces = ""}); 3022 3023do_parse_flow_scalar([] = Chars, Line, Col, Delta, 3024 #yamerl_parser{raw_eos = true} = Parser, 3025 #flow_scalar_ctx{style = plain} = Ctx) -> 3026 %% End of stream = end of plain scalar. 3027 queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 3028 Ctx#flow_scalar_ctx{spaces = ""}); 3029 3030%% 3031%% JSON acceptable characters range [2]. 3032%% 3033 3034do_parse_flow_scalar([C | Rest] = Chars, Line, Col, Delta, Parser, 3035 #flow_scalar_ctx{spaces = Spaces} = Ctx) 3036 when C == 16#9 orelse (C >= 16#20 andalso C =< 16#10FFFF) -> 3037 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 3038 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 3039 Output1 = case Spaces of 3040 "" -> [C | Ctx#flow_scalar_ctx.output]; 3041 _ -> [C | Spaces ++ Ctx#flow_scalar_ctx.output] 3042 end, 3043 Ctx1 = Ctx#flow_scalar_ctx{ 3044 spaces = "", 3045 newline = false, 3046 output = Output1, 3047 endline = Line, 3048 endcol = Col1 3049 }, 3050 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser1, Ctx1); 3051 3052do_parse_flow_scalar([] = Chars, Line, Col, Delta, 3053 #yamerl_parser{raw_eos = true} = Parser, 3054 #flow_scalar_ctx{style = Style, line = Sc_Line, col = Sc_Col, 3055 output = Output}) -> 3056 Tag = case Style of 3057 plain -> ?PLAIN_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col); 3058 _ -> ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3059 end, 3060 Token = #yamerl_scalar{ 3061 style = flow, 3062 substyle = Style, 3063 text = lists:reverse(Output), 3064 line = Sc_Line, 3065 column = Sc_Col, 3066 tag = Tag 3067 }, 3068 Error = #yamerl_parsing_error{ 3069 name = unexpected_eos, 3070 token = Token, 3071 line = Line, 3072 column = Col 3073 }, 3074 Parser1 = add_error(Parser, Error, 3075 "Unexpected end-of-stream while parsing flow scalar", []), 3076 return(Chars, Line, Col, Delta, Parser1); 3077do_parse_flow_scalar([] = Chars, Line, Col, Delta, Parser, Ctx) -> 3078 suspend_parsing(Chars, Line, Col, Delta, Parser, 3079 fun do_parse_flow_scalar/6, Ctx). 3080 3081%% 3082%% Escaped characters [62]. 3083%% Only supported by double-quoted strings. 3084%% 3085 3086%% Escaped 16-bit Unicode character, \uFFFF [60]. 3087do_parse_flow_scalar_escaped([$u | _] = Chars, Line, Col, Delta, 3088 #yamerl_parser{chars_len = Len, raw_eos = true} = Parser, 3089 #flow_scalar_ctx{style = Style, line = Sc_Line, col = Sc_Col, 3090 output = Output}) 3091 when (Len - Delta) < 5 -> 3092 %% Unexpected enf-of-stream. 3093 Token = #yamerl_scalar{ 3094 style = flow, 3095 substyle = Style, 3096 text = lists:reverse(Output), 3097 line = Sc_Line, 3098 column = Sc_Col, 3099 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3100 }, 3101 Error = #yamerl_parsing_error{ 3102 name = unexpected_eos, 3103 token = Token, 3104 line = Line, 3105 column = Col 3106 }, 3107 Parser1 = add_error(Parser, Error, 3108 "Unexpected end-of-stream while parsing flow scalar", []), 3109 return(Chars, Line, Col, Delta, Parser1); 3110do_parse_flow_scalar_escaped([$u | _] = Chars, Line, Col, Delta, 3111 #yamerl_parser{chars_len = Len} = Parser, Ctx) 3112 when (Len - Delta) < 5 -> 3113 %% Can't be sure it's an escaped Unicode character. 3114 suspend_parsing(Chars, Line, Col, Delta, Parser, 3115 fun do_parse_flow_scalar_escaped/6, Ctx); 3116do_parse_flow_scalar_escaped([$u, O1, O2, O3, O4 | Rest], Line, Col, Delta, 3117 Parser, #flow_scalar_ctx{surrogate = High} = Ctx) when 3118 ?IS_HEXADECIMAL(O1) andalso ?IS_HEXADECIMAL(O2) andalso 3119 ?IS_HEXADECIMAL(O3) andalso ?IS_HEXADECIMAL(O4) -> 3120 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 5), 3121 C = hex_to_dec([O1, O2, O3, O4], 0), 3122 Ctx1 = case High of 3123 undefined -> 3124 if 3125 ?IS_HIGH_SURROGATE(C) -> 3126 %% This is the high part of a UTF-16 surrogate pair. 3127 Ctx#flow_scalar_ctx{ 3128 surrogate = C 3129 }; 3130 true -> 3131 %% Normal character. 3132 Ctx#flow_scalar_ctx{ 3133 output = [C | Ctx#flow_scalar_ctx.output] 3134 } 3135 end; 3136 _ -> 3137 if 3138 ?IS_LOW_SURROGATE(C) -> 3139 %% This is the low part of a UTF-16 surrogate pair. 3140 C1 = 16#10000 + (High - 16#d800) * 16#400 + (C - 16#dc00), 3141 Ctx#flow_scalar_ctx{ 3142 output = [C1 | Ctx#flow_scalar_ctx.output], 3143 surrogate = undefined 3144 }; 3145 true -> 3146 %% Error: high surrogate without a low surrogate. 3147 %% The error is generated by the next clause. 3148 Ctx 3149 end 3150 end, 3151 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 3152do_parse_flow_scalar_escaped([$u | _] = Chars, Line, Col, Delta, Parser, 3153 #flow_scalar_ctx{line = Sc_Line, col = Sc_Col, style = Style, 3154 output = Output} = Ctx) -> 3155 %% Invalid escaped character. 3156 Token = #yamerl_scalar{ 3157 style = flow, 3158 substyle = Style, 3159 text = lists:reverse(Output), 3160 line = Sc_Line, 3161 column = Sc_Col, 3162 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3163 }, 3164 Error = #yamerl_parsing_error{ 3165 name = invalid_escaped_character, 3166 type = warning, 3167 token = Token, 3168 line = Line, 3169 column = Col - 1 3170 }, 3171 Parser1 = add_error(Parser, Error, 3172 "Invalid escaped character", []), 3173 do_parse_flow_scalar(Chars, Line, Col, Delta, Parser1, Ctx); 3174 3175%% Escaped 32-bit Unicode character, \UFFFFFFFF [61]. 3176do_parse_flow_scalar_escaped([$U | _] = Chars, Line, Col, Delta, 3177 #yamerl_parser{chars_len = Len, raw_eos = true} = Parser, 3178 #flow_scalar_ctx{style = Style, line = Sc_Line, col = Sc_Col, 3179 output = Output}) 3180 when (Len - Delta) < 9 -> 3181 %% Unexpected enf-of-stream. 3182 Token = #yamerl_scalar{ 3183 style = flow, 3184 substyle = Style, 3185 text = lists:reverse(Output), 3186 line = Sc_Line, 3187 column = Sc_Col, 3188 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3189 }, 3190 Error = #yamerl_parsing_error{ 3191 name = unexpected_eos, 3192 token = Token, 3193 line = Line, 3194 column = Col 3195 }, 3196 Parser1 = add_error(Parser, Error, 3197 "Unexpected end-of-stream while parsing flow scalar", []), 3198 return(Chars, Line, Col, Delta, Parser1); 3199do_parse_flow_scalar_escaped([$U | _] = Chars, Line, Col, Delta, 3200 #yamerl_parser{chars_len = Len} = Parser, Ctx) 3201 when (Len - Delta) < 9 -> 3202 %% Can't be sure it's an escaped Unicode character. 3203 suspend_parsing(Chars, Line, Col, Delta, Parser, 3204 fun do_parse_flow_scalar_escaped/6, Ctx); 3205do_parse_flow_scalar_escaped([$U, O1, O2, O3, O4, O5, O6, O7, O8 | Rest], 3206 Line, Col, Delta, Parser, #flow_scalar_ctx{surrogate = High} = Ctx) when 3207 ?IS_HEXADECIMAL(O1) andalso ?IS_HEXADECIMAL(O2) andalso 3208 ?IS_HEXADECIMAL(O3) andalso ?IS_HEXADECIMAL(O4) andalso 3209 ?IS_HEXADECIMAL(O5) andalso ?IS_HEXADECIMAL(O6) andalso 3210 ?IS_HEXADECIMAL(O7) andalso ?IS_HEXADECIMAL(O8) -> 3211 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 9), 3212 C = hex_to_dec([O1, O2, O3, O4, O5, O6, O7, O8], 0), 3213 Ctx1 = case High of 3214 undefined -> 3215 if 3216 ?IS_HIGH_SURROGATE(C) -> 3217 %% This is the high part of a UTF-16 surrogate pair. 3218 Ctx#flow_scalar_ctx{ 3219 surrogate = C 3220 }; 3221 true -> 3222 %% Normal character. 3223 Ctx#flow_scalar_ctx{ 3224 output = [C | Ctx#flow_scalar_ctx.output] 3225 } 3226 end; 3227 _ -> 3228 if 3229 ?IS_LOW_SURROGATE(C) -> 3230 %% This is the low part of a UTF-16 surrogate pair. 3231 C1 = 16#10000 + (High - 16#d800) * 16#400 + (C - 16#dc00), 3232 Ctx#flow_scalar_ctx{ 3233 output = [C1 | Ctx#flow_scalar_ctx.output], 3234 surrogate = undefined 3235 }; 3236 true -> 3237 %% Error: high surrogate without a low surrogate. 3238 %% The error is generated by the next clause. 3239 Ctx 3240 end 3241 end, 3242 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 3243do_parse_flow_scalar_escaped([$U | _] = Chars, Line, Col, Delta, Parser, 3244 #flow_scalar_ctx{line = Sc_Line, col = Sc_Col, 3245 style = Style, output = Output} = Ctx) -> 3246 %% Invalid escaped character. 3247 Token = #yamerl_scalar{ 3248 style = flow, 3249 substyle = Style, 3250 text = lists:reverse(Output), 3251 line = Sc_Line, 3252 column = Sc_Col, 3253 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3254 }, 3255 Error = #yamerl_parsing_error{ 3256 name = invalid_escaped_character, 3257 type = warning, 3258 token = Token, 3259 line = Line, 3260 column = Col - 1 3261 }, 3262 Parser1 = add_error(Parser, Error, 3263 "Invalid escaped character", []), 3264 do_parse_flow_scalar(Chars, Line, Col, Delta, Parser1, Ctx); 3265 3266%% Escaped 8-bit Unicode character, \xFF [59]. 3267do_parse_flow_scalar_escaped([$x | _] = Chars, Line, Col, Delta, 3268 #yamerl_parser{chars_len = Len, raw_eos = true} = Parser, 3269 #flow_scalar_ctx{style = Style, line = Sc_Line, col = Sc_Col, 3270 output = Output}) 3271 when (Len - Delta) < 3 -> 3272 %% Unexpected enf-of-stream. 3273 Token = #yamerl_scalar{ 3274 style = flow, 3275 substyle = Style, 3276 text = lists:reverse(Output), 3277 line = Sc_Line, 3278 column = Sc_Col, 3279 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3280 }, 3281 Error = #yamerl_parsing_error{ 3282 name = unexpected_eos, 3283 token = Token, 3284 line = Line, 3285 column = Col 3286 }, 3287 Parser1 = add_error(Parser, Error, 3288 "Unexpected end-of-stream while parsing flow scalar", []), 3289 return(Chars, Line, Col, Delta, Parser1); 3290do_parse_flow_scalar_escaped([$x | _] = Chars, Line, Col, Delta, 3291 #yamerl_parser{chars_len = Len} = Parser, Ctx) 3292 when (Len - Delta) < 3 -> 3293 %% Can't be sure it's an escaped Unicode character. 3294 suspend_parsing(Chars, Line, Col, Delta, Parser, 3295 fun do_parse_flow_scalar_escaped/6, Ctx); 3296do_parse_flow_scalar_escaped([$x, O1, O2 | Rest], Line, Col, Delta, Parser, Ctx) 3297 when ?IS_HEXADECIMAL(O1) andalso ?IS_HEXADECIMAL(O2) -> 3298 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 3), 3299 C = hex_to_dec([O1, O2], 0), 3300 Ctx1 = Ctx#flow_scalar_ctx{ 3301 output = [C | Ctx#flow_scalar_ctx.output] 3302 }, 3303 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser, Ctx1); 3304do_parse_flow_scalar_escaped([$x | _] = Chars, Line, Col, Delta, Parser, 3305 #flow_scalar_ctx{line = Sc_Line, col = Sc_Col, style = Style, 3306 output = Output} = Ctx) -> 3307 %% Invalid escaped character. 3308 Token = #yamerl_scalar{ 3309 style = flow, 3310 substyle = Style, 3311 text = lists:reverse(Output), 3312 line = Sc_Line, 3313 column = Sc_Col, 3314 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3315 }, 3316 Error = #yamerl_parsing_error{ 3317 name = invalid_escaped_character, 3318 type = warning, 3319 token = Token, 3320 line = Line, 3321 column = Col - 1 3322 }, 3323 Parser1 = add_error(Parser, Error, 3324 "Invalid escaped character", []), 3325 do_parse_flow_scalar(Chars, Line, Col, Delta, Parser1, Ctx); 3326 3327%% Escaped newline. 3328%% All trailing whitespaces are kept as content before an escaped 3329%% newline: this is handled in the $\\ clause above. 3330do_parse_flow_scalar_escaped([$\r] = Chars, Line, Col, Delta, 3331 #yamerl_parser{raw_eos = false} = Parser, Ctx) -> 3332 %% Can't be sure it's a newline. It may be followed by a LF. 3333 suspend_parsing(Chars, Line, Col, Delta, Parser, 3334 fun do_parse_flow_scalar_escaped/6, Ctx); 3335do_parse_flow_scalar_escaped([C | _] = Chars, Line, _, Delta, 3336 #yamerl_parser{doc_version = Version} = Parser, 3337 #flow_scalar_ctx{style = double_quoted} = Ctx) 3338 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 3339 {Rest, Line1, Col1, Delta1} = ?NEXT_LINE(Chars, Line, Delta, Parser), 3340 Ctx1 = Ctx#flow_scalar_ctx{ 3341 newline = true 3342 }, 3343 do_parse_flow_scalar(Rest, Line1, Col1, Delta1, Parser, Ctx1); 3344 3345%% Other escaped characters. 3346do_parse_flow_scalar_escaped([C | Rest] = Chars, Line, Col, Delta, Parser, 3347 #flow_scalar_ctx{line = Sc_Line, col = Sc_Col, style = Style, 3348 output = Output} = Ctx) -> 3349 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 3350 case unescape_char(C) of 3351 undefined -> 3352 %% Invalid escaped character. 3353 Token = #yamerl_scalar{ 3354 style = flow, 3355 substyle = Style, 3356 text = lists:reverse(Output), 3357 line = Sc_Line, 3358 column = Sc_Col, 3359 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3360 }, 3361 Error = #yamerl_parsing_error{ 3362 name = invalid_escaped_character, 3363 type = warning, 3364 token = Token, 3365 line = Line, 3366 column = Col - 1 3367 }, 3368 Parser2 = add_error(Parser1, Error, 3369 "Invalid escaped character", []), 3370 do_parse_flow_scalar(Chars, Line, Col, Delta, Parser2, Ctx); 3371 C1 -> 3372 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 3373 Ctx1 = Ctx#flow_scalar_ctx{ 3374 output = [C1 | Ctx#flow_scalar_ctx.output] 3375 }, 3376 do_parse_flow_scalar(Rest, Line, Col1, Delta1, Parser1, Ctx1) 3377 end; 3378 3379do_parse_flow_scalar_escaped([] = Chars, Line, Col, Delta, 3380 #yamerl_parser{raw_eos = true} = Parser, 3381 #flow_scalar_ctx{style = Style, line = Sc_Line, col = Sc_Col, 3382 output = Output}) -> 3383 Token = #yamerl_scalar{ 3384 style = flow, 3385 substyle = Style, 3386 text = lists:reverse(Output), 3387 line = Sc_Line, 3388 column = Sc_Col, 3389 tag = ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3390 }, 3391 Error = #yamerl_parsing_error{ 3392 name = unexpected_eos, 3393 token = Token, 3394 line = Line, 3395 column = Col 3396 }, 3397 Parser1 = add_error(Parser, Error, 3398 "Unexpected end-of-stream while parsing flow scalar", []), 3399 return(Chars, Line, Col, Delta, Parser1); 3400do_parse_flow_scalar_escaped([] = Chars, Line, Col, Delta, Parser, Ctx) -> 3401 suspend_parsing(Chars, Line, Col, Delta, Parser, 3402 fun do_parse_flow_scalar_escaped/6, Ctx). 3403 3404queue_flow_scalar_token(Chars, Line, Col, Delta, Parser, 3405 #flow_scalar_ctx{style = Style, output = Output, spaces = Spaces, 3406 newline = Newline, line = Sc_Line, col = Sc_Col, 3407 endline = Endline, endcol = Endcol}) -> 3408 Output1 = case Spaces of 3409 "" -> Output; 3410 _ -> Spaces ++ Output 3411 end, 3412 Tag = case Style of 3413 plain -> ?PLAIN_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col); 3414 _ -> ?FLOW_SCALAR_DEFAULT_TAG(Sc_Line, Sc_Col) 3415 end, 3416 Token = #yamerl_scalar{ 3417 style = flow, 3418 substyle = Style, 3419 text = lists:reverse(Output1), 3420 line = Sc_Line, 3421 column = Sc_Col, 3422 tag = Tag 3423 }, 3424 Parser1 = queue_token(Parser, Token), 3425 Parser2 = Parser1#yamerl_parser{ 3426 endpos_set_by_token = true, 3427 last_token_endline = Endline, 3428 last_token_endcol = Endcol, 3429 missed_nl = (Style == plain andalso Newline) 3430 }, 3431 find_next_token(Chars, Line, Col, Delta, Parser2). 3432 3433unescape_char($0) -> 16#0; %% \0 = NUL [42] 3434unescape_char($a) -> 16#7; %% \a = BELL [43] 3435unescape_char($b) -> $\b; %% \b = BS [44] 3436unescape_char($t) -> $\t; %% \t = TAB [45] 3437unescape_char($n) -> $\n; %% \n = LF [46] 3438unescape_char($v) -> $\v; %% \v = VT [47] 3439unescape_char($f) -> $\f; %% \f = FF [48] 3440unescape_char($r) -> $\r; %% \r = CR [49] 3441unescape_char($e) -> $\e; %% \e = ESC [50] 3442unescape_char($N) -> 16#85; %% \N = Unicode next line [55] 3443unescape_char($_) -> 16#A0; %% \_ = Unicode non-breaking space [56] 3444unescape_char($L) -> 16#2028; %% \L = Unicode line sep. [57] 3445unescape_char($P) -> 16#2029; %% \P = Unicode paragraph sep. [58] 3446unescape_char($\s) -> $\s; %% \ = SPC [51] 3447unescape_char($") -> $"; %% \" = " [52] 3448unescape_char($/) -> $/; %% \/ = / [53] 3449unescape_char($\\) -> $\\; %% \\ = \ [54] 3450unescape_char(_) -> undefined. 3451 3452hex_to_dec([C | Rest], Number) -> 3453 C_Dec = if 3454 C >= $0, C =< $9 -> C - $0; 3455 C >= $a, C =< $f -> C - $a + 10; 3456 C >= $A, C =< $F -> C - $A + 10 3457 end, 3458 hex_to_dec(Rest, Number * 16 + C_Dec); 3459hex_to_dec([], Number) -> 3460 Number. 3461 3462%% ------------------------------------------------------------------- 3463%% Comments. 3464%% ------------------------------------------------------------------- 3465 3466parse_comment([C | _] = Chars, Line, Col, Delta, 3467 #yamerl_parser{doc_version = Version} = Parser) 3468 when ?IS_NEWLINE(C) orelse (Version == {1,1} andalso ?IS_NEWLINE_11(C)) -> 3469 %% A comment ends at the end of the line. 3470 %% This clause also takes care of DOS newline (even if the buffer 3471 %% contains only \r and not \n yet). It doesn't matter because we 3472 %% let the next state handle the newline properly; the cursor is not 3473 %% moved forward. 3474 find_next_token(Chars, Line, Col, Delta, Parser); 3475parse_comment([_ | Rest] = Chars, Line, Col, Delta, Parser) -> 3476 Parser1 = ?WARN_IF_NON_ASCII_LINE_BREAK(Chars, Line, Col, Parser), 3477 {Col1, Delta1} = ?NEXT_COL(Col, Delta, 1), 3478 parse_comment(Rest, Line, Col1, Delta1, Parser1); 3479parse_comment([] = Chars, Line, Col, Delta, 3480 #yamerl_parser{raw_eos = false} = Parser) -> 3481 suspend_parsing(Chars, Line, Col, Delta, Parser, fun parse_comment/5); 3482parse_comment([] = Chars, Line, Col, Delta, 3483 #yamerl_parser{raw_eos = true} = Parser) -> 3484 find_next_token(Chars, Line, Col, Delta, Parser). 3485 3486%% ------------------------------------------------------------------- 3487%% Implicit key handling. 3488%% ------------------------------------------------------------------- 3489 3490save_impl_key_pos(Chars, Line, Col, Delta, 3491 #yamerl_parser{chars_idx = Chars_Index, 3492 tks_first_idx = First, tks_queued = Queued, 3493 cur_coll = Cur_Coll, ik_stack = [_ | Rest]} = Parser) -> 3494 Required = ?IN_BLOCK_CTX(Parser) andalso Cur_Coll#bcoll.indent == Col, 3495 if 3496 Parser#yamerl_parser.ik_allowed -> 3497 Impl_Key = #impl_key{ 3498 possible = true, 3499 required = Required, 3500 line = Line, 3501 col = Col, 3502 chars_idx = Chars_Index + Delta, 3503 token_idx = First + Queued 3504 }, 3505 Parser#yamerl_parser{ik_stack = [Impl_Key | Rest]}; 3506 Required -> 3507 Error = #yamerl_parsing_error{ 3508 name = required_implicit_key_not_allowed, 3509 line = Line, 3510 column = Col 3511 }, 3512 Parser1 = add_error(Parser, Error, 3513 "Required implicit key not allowed here", []), 3514 return(Chars, Line, Col, Delta, Parser1); 3515 true -> 3516 Parser 3517 end. 3518 3519queue_impl_key(_, #yamerl_parser{last_token_endline = Line, 3520 ik_stack = [#impl_key{line = Impl_Line} = Impl_Key | _]} = Parser) 3521 when Line > Impl_Line andalso ?IN_BLOCK_CTX(Parser) -> 3522 %% An implicit key must not span several lines. 3523 Error = #yamerl_parsing_error{ 3524 name = invalid_implicit_key, 3525 type = warning, 3526 line = Impl_Key#impl_key.line, 3527 column = Impl_Key#impl_key.col 3528 }, 3529 Parser1 = add_error(Parser, Error, 3530 "An implicit key must not span several lines", []), 3531 queue_impl_key2(Parser1); 3532queue_impl_key(_, #yamerl_parser{last_token_endline = Line, 3533 ik_stack = [#impl_key{line = Impl_Line} = Impl_Key | _]} = Parser) 3534 when Line > Impl_Line andalso ?IN_FLOW_CTX(Parser) -> 3535 %% An implicit key must not span several lines. 3536 Error = #yamerl_parsing_error{ 3537 name = invalid_implicit_key, 3538 type = warning, 3539 line = Impl_Key#impl_key.line, 3540 column = Impl_Key#impl_key.col 3541 }, 3542 Parser1 = add_error(Parser, Error, 3543 "An implicit key must not span several lines", []), 3544 queue_impl_key2(Parser1); 3545queue_impl_key(Delta, #yamerl_parser{chars_idx = Index, 3546 ik_stack = [#impl_key{chars_idx = Impl_Index} = Impl_Key | _]} = Parser) 3547 when (Index + Delta) > Impl_Index + 1024 -> 3548 %% An implicit key must not take more than 1024 characters. 3549 Error = #yamerl_parsing_error{ 3550 name = invalid_implicit_key, 3551 type = warning, 3552 line = Impl_Key#impl_key.line, 3553 column = Impl_Key#impl_key.col 3554 }, 3555 Parser1 = add_error(Parser, Error, 3556 "An implicit key must not take more than 1024 characters", []), 3557 queue_impl_key2(Parser1); 3558queue_impl_key(_, Parser) -> 3559 queue_impl_key2(Parser). 3560 3561queue_impl_key2( 3562 #yamerl_parser{ik_stack = [Impl_Key | Rest]} = Parser) -> 3563 Token = #yamerl_mapping_key{ 3564 line = Impl_Key#impl_key.line, 3565 column = Impl_Key#impl_key.col 3566 }, 3567 Parser1 = queue_token(Parser, Token, Impl_Key#impl_key.token_idx), 3568 Parser1#yamerl_parser{ 3569 ik_stack = [?FAKE_IMPL_KEY | Rest], 3570 ik_allowed = false 3571 }. 3572 3573remove_impl_key_pos( 3574 #yamerl_parser{ik_stack = [ 3575 #impl_key{required = true, line = Line, col = Col} | _]} = Parser) -> 3576 %% This error is raised with the following examples: 3577 %% 3578 %% - entry 3579 %% unexpected-scalar 3580 %% 3581 %% ? key 3582 %% unexpected-scalar 3583 Error = #yamerl_parsing_error{ 3584 name = expected_sequence_entry_or_mapping_key_not_found, 3585 line = Line, 3586 column = Col 3587 }, 3588 Parser1 = add_error(Parser, Error, 3589 "Expected sequence entry or mapping implicit key not found", []), 3590 do_return(Parser1); 3591remove_impl_key_pos( 3592 #yamerl_parser{ik_stack = [_ | Rest]} = Parser) -> 3593 Parser#yamerl_parser{ik_stack = [?FAKE_IMPL_KEY | Rest]}. 3594 3595%% ------------------------------------------------------------------- 3596%% Tokens queueing. 3597%% ------------------------------------------------------------------- 3598 3599-spec queue_token(Parser, Token) -> 3600 New_Parser when 3601 Parser :: yamerl_parser(), 3602 Token :: yamerl_token(), 3603 New_Parser :: yamerl_parser(). 3604 3605queue_token(Parser, Token) -> 3606 queue_token_check_doc(Parser, Token, tail). 3607 3608-spec queue_token(Parser, Token, Insert_At) -> 3609 New_Parser when 3610 Parser :: yamerl_parser(), 3611 Token :: yamerl_token(), 3612 Insert_At :: position() | tail, 3613 New_Parser :: yamerl_parser(). 3614 3615queue_token(Parser, Token, Insert_At) -> 3616 queue_token_check_doc(Parser, Token, Insert_At). 3617 3618%% 3619%% Handle document start/end. 3620%% 3621 3622queue_token_check_doc( 3623 #yamerl_parser{doc_started = false} = Parser, Token, Insert_At) 3624 when is_record(Token, yamerl_stream_start) orelse 3625 is_record(Token, yamerl_stream_end) orelse 3626 is_record(Token, yamerl_doc_end) orelse 3627 is_record(Token, yamerl_yaml_directive) orelse 3628 is_record(Token, yamerl_tag_directive) orelse 3629 is_record(Token, yamerl_reserved_directive) -> 3630 %% Directives token outside a document are perfectly allowed. 3631 queue_token_pending_entry(Parser, Token, Insert_At); 3632queue_token_check_doc( 3633 #yamerl_parser{doc_started = false, last_tag = Tag} = Parser, 3634 Token, Insert_At) -> 3635 %% Other tokens starts the document automatically. 3636 {Line, Col} = case Tag of 3637 #yamerl_tag{line = L, column = C} -> 3638 %% A tag is pending: use its position instead of the 3639 %% position of the token about to be queued. 3640 {L, C}; 3641 _ -> 3642 {?TOKEN_LINE(Token), ?TOKEN_COLUMN(Token)} 3643 end, 3644 %% We may need to shift the implicit key's index. 3645 Parser1 = update_impl_key_index(Parser, Line, Col), 3646 Parser2 = start_doc(Parser1, Line, Col, Insert_At), 3647 queue_token_pending_entry(Parser2, Token, next_insert_at(Insert_At, 1)); 3648queue_token_check_doc( 3649 #yamerl_parser{doc_started = true} = Parser, Token, Insert_At) 3650 when is_record(Token, yamerl_stream_end) -> 3651 %% A document is automatically ended when we reach the end of the stream. 3652 Parser1 = end_doc(Parser, ?TOKEN_LINE(Token), ?TOKEN_COLUMN(Token), 3653 Insert_At), 3654 queue_token_check_doc(Parser1, Token, Insert_At); 3655queue_token_check_doc( 3656 #yamerl_parser{doc_started = true} = Parser, Token, Insert_At) -> 3657 %% Document already started. 3658 queue_token_pending_entry(Parser, Token, Insert_At). 3659 3660%% 3661%% Pending entries. 3662%% 3663 3664queue_token_pending_entry( 3665 #yamerl_parser{pending_entry = false} = Parser, 3666 Token, Insert_At) -> 3667 queue_token_check_collection_start(Parser, Token, Insert_At); 3668queue_token_pending_entry( 3669 #yamerl_parser{pending_entry = true, last_tag = Tag} = Parser, 3670 Token, Insert_At) -> 3671 %% There's a pending entry: queue it now. 3672 Parser1 = Parser#yamerl_parser{ 3673 pending_entry = false 3674 }, 3675 {Line, Col} = case Tag of 3676 #yamerl_tag{line = L, column = C} -> 3677 %% A tag is pending: use its position instead of the 3678 %% position of the token about to be queued. 3679 {L, C}; 3680 _ -> 3681 {?TOKEN_LINE(Token), ?TOKEN_COLUMN(Token)} 3682 end, 3683 Entry = #yamerl_sequence_entry{ 3684 line = Line, 3685 column = Col 3686 }, 3687 %% We may need to shift the implicit key's index. 3688 Parser2 = update_impl_key_index(Parser1, 3689 ?TOKEN_LINE(Token), ?TOKEN_COLUMN(Token)), 3690 Parser3 = queue_token(Parser2, Entry, Insert_At), 3691 queue_token_check_collection_start(Parser3, Token, 3692 next_insert_at(Insert_At, 1)). 3693 3694%% 3695%% Handle collection start. 3696%% 3697 3698queue_token_check_collection_start( 3699 #yamerl_parser{ 3700 cur_coll = #bcoll{kind = Kind, indent = Indent} = Cur_Coll, 3701 parent_colls = Colls} = Parser, 3702 #yamerl_sequence_entry{line = Line, column = Col} = Token, Insert_At) 3703 when ?IN_BLOCK_CTX(Parser) andalso 3704 (Col > Indent orelse (Kind == mapping andalso Col == Indent)) -> 3705 %% This is the first entry of a block sequence collection. Queue a 3706 %% collection-start token. 3707 Collection_Start = #yamerl_collection_start{ 3708 style = block, 3709 kind = sequence, 3710 line = Line, 3711 column = Col, 3712 tag = ?COLL_SCALAR_DEFAULT_TAG(Line, Col) 3713 }, 3714 Parser1 = queue_token(Parser, Collection_Start, Insert_At), 3715 %% Record the new block indent. 3716 New_Coll = #bcoll{kind = sequence, indent = Col}, 3717 Parser2 = Parser1#yamerl_parser{ 3718 cur_coll = New_Coll, 3719 parent_colls = [Cur_Coll | Colls] 3720 }, 3721 queue_token_keep_last_pos(Parser2, Token, next_insert_at(Insert_At, 1)); 3722queue_token_check_collection_start( 3723 #yamerl_parser{ 3724 cur_coll = #bcoll{indent = Indent} = Cur_Coll, 3725 parent_colls = Colls} = Parser, 3726 #yamerl_mapping_key{line = Line, column = Col} = Token, Insert_At) 3727 when ?IN_BLOCK_CTX(Parser) andalso Col > Indent -> 3728 %% This is the first key: value pair of a block mapping collection. Queue 3729 %% a collection-start token. 3730 Collection_Start = #yamerl_collection_start{ 3731 style = block, 3732 kind = mapping, 3733 line = Line, 3734 column = Col, 3735 tag = ?COLL_SCALAR_DEFAULT_TAG(Line, Col) 3736 }, 3737 Parser1 = queue_token(Parser, Collection_Start, Insert_At), 3738 %% Record the new block indent. 3739 New_Coll = #bcoll{kind = mapping, indent = Col}, 3740 Parser2 = Parser1#yamerl_parser{ 3741 cur_coll = New_Coll, 3742 parent_colls = [Cur_Coll | Colls] 3743 }, 3744 queue_token_keep_last_pos(Parser2, Token, next_insert_at(Insert_At, 1)); 3745queue_token_check_collection_start( 3746 #yamerl_parser{ 3747 cur_coll = #fcoll{kind = sequence} = Cur_Coll, 3748 parent_colls = Colls} = Parser, 3749 #yamerl_mapping_key{line = Line, column = Col} = Token, Insert_At) 3750 when ?IN_FLOW_CTX(Parser) -> 3751 %% This is a single key: value pair inside a flow sequence. Queue 3752 %% a collection-start token. 3753 Collection_Start = #yamerl_collection_start{ 3754 style = flow, 3755 kind = mapping, 3756 line = Line, 3757 column = Col, 3758 tag = ?COLL_SCALAR_DEFAULT_TAG(Line, Col) 3759 }, 3760 Parser1 = queue_token(Parser, Collection_Start, Insert_At), 3761 %% Flag this mapping as single pair inside flow sequence. 3762 New_Coll = #fcoll{kind = single_mapping}, 3763 Parser2 = Parser1#yamerl_parser{ 3764 cur_coll = New_Coll, 3765 parent_colls = [Cur_Coll | Colls] 3766 }, 3767 queue_token_keep_last_pos(Parser2, Token, next_insert_at(Insert_At, 1)); 3768queue_token_check_collection_start(Parser, Token, Insert_At) -> 3769 queue_token_keep_last_pos(Parser, Token, Insert_At). 3770 3771%% 3772%% Remember last sequence entry, mapping key and mapping value 3773%% positions. 3774%% 3775 3776queue_token_keep_last_pos( 3777 #yamerl_parser{cur_coll = Coll, 3778 tks_first_idx = First, tks_queued = Queued} = Parser, 3779 #yamerl_sequence_entry{line = Line, column = Col} = Token, Insert_At) -> 3780 Index = case Insert_At of 3781 tail -> First + Queued; 3782 _ -> Insert_At + 1 3783 end, 3784 Coll1 = if 3785 ?IN_BLOCK_CTX(Parser) -> 3786 Coll#bcoll{kidx = Index, kline = Line, kcol = Col}; 3787 ?IN_FLOW_CTX(Parser) -> 3788 Coll#fcoll{kidx = Index, kline = Line, kcol = Col} 3789 end, 3790 Parser1 = Parser#yamerl_parser{ 3791 cur_coll = Coll1 3792 }, 3793 queue_token_json_like(Parser1, Token, Insert_At); 3794queue_token_keep_last_pos( 3795 #yamerl_parser{cur_coll = Coll, 3796 tks_first_idx = First, tks_queued = Queued} = Parser, 3797 #yamerl_mapping_key{line = Line, column = Col} = Token, Insert_At) -> 3798 %% While we're handling a mapping key, tell that we're not waiting 3799 %% for a key: value pair anymore. 3800 Index = case Insert_At of 3801 tail -> First + Queued; 3802 _ -> Insert_At + 1 3803 end, 3804 Coll1 = if 3805 ?IN_BLOCK_CTX(Parser) -> 3806 Coll#bcoll{kidx = Index, kline = Line, kcol = Col}; 3807 ?IN_FLOW_CTX(Parser) -> 3808 Coll#fcoll{kidx = Index, kline = Line, kcol = Col} 3809 end, 3810 Parser1 = Parser#yamerl_parser{ 3811 cur_coll = Coll1, 3812 waiting_for_kvpair = false 3813 }, 3814 queue_token_json_like(Parser1, Token, Insert_At); 3815queue_token_keep_last_pos( 3816 #yamerl_parser{cur_coll = Coll, 3817 tks_first_idx = First, tks_queued = Queued} = Parser, 3818 #yamerl_mapping_value{line = Line, column = Col} = Token, Insert_At) -> 3819 Index = case Insert_At of 3820 tail -> First + Queued; 3821 _ -> Insert_At + 1 3822 end, 3823 Coll1 = if 3824 ?IN_BLOCK_CTX(Parser) -> 3825 Coll#bcoll{vidx = Index, vline = Line, vcol = Col}; 3826 ?IN_FLOW_CTX(Parser) -> 3827 Coll#fcoll{vidx = Index, vline = Line, vcol = Col} 3828 end, 3829 Parser1 =Parser#yamerl_parser{ 3830 cur_coll = Coll1 3831 }, 3832 queue_token_json_like(Parser1, Token, Insert_At); 3833queue_token_keep_last_pos(Parser, Token, Insert_At) -> 3834 queue_token_json_like(Parser, Token, Insert_At). 3835 3836%% 3837%% JSON-like tokens. 3838%% 3839 3840queue_token_json_like(Parser, Token, tail) 3841 when ?IS_JSON_LIKE(Token) -> 3842 Parser1 = Parser#yamerl_parser{ 3843 last_is_json_like = true 3844 }, 3845 do_queue_token(Parser1, Token, tail); 3846queue_token_json_like(Parser, Token, tail) -> 3847 Parser1 = Parser#yamerl_parser{ 3848 last_is_json_like = false 3849 }, 3850 do_queue_token(Parser1, Token, tail); 3851queue_token_json_like(Parser, Token, Insert_At) -> 3852 do_queue_token(Parser, Token, Insert_At). 3853 3854%% 3855%% Insert the token at the end of the queue or a given index. 3856%% 3857 3858do_queue_token(#yamerl_parser{tokens = Tokens, tks_queued = Queued} = Parser, 3859 Token, tail) -> 3860 Tokens1 = [Token | Tokens], 3861 Parser1 = Parser#yamerl_parser{ 3862 tokens = Tokens1, 3863 tks_queued = Queued + 1 3864 }, 3865 emit_tokens(Parser1); 3866do_queue_token(#yamerl_parser{tokens = Tokens, tks_queued = Queued, 3867 tks_first_idx = First} = Parser, 3868 Token, Insert_At) -> 3869 Split = Queued - (Insert_At - First), 3870 {Head, Tail} = lists:split(Split, Tokens), 3871 Parser1 = Parser#yamerl_parser{ 3872 tokens = Head ++ [Token] ++ Tail, 3873 tks_queued = Queued + 1 3874 }, 3875 emit_tokens(Parser1). 3876 3877%% 3878%% Emit tokens which are ready using the callback function. 3879%% At this point, all tokens coming in are in the final order. We may 3880%% only do some checks, add empty nodes and set nodes' tag property. 3881%% 3882 3883emit_tokens( 3884 #yamerl_parser{tokens = Tokens, tks_first_idx = First} = Parser) -> 3885 Max = max_token_idx_ready(Parser), 3886 emit_tokens2(Parser, lists:reverse(Tokens), First, Max). 3887 3888emit_tokens2( 3889 #yamerl_parser{last_tag = undefined, 3890 tks_queued = Queued, tks_first_idx = First} = Parser, 3891 [#yamerl_tag{} = Tag | Rest], Idx, Max) 3892 when Idx =< Max -> 3893 %% Keep the tag outside of the token queue. It'll be attached to a 3894 %% following node. 3895 Parser1 = Parser#yamerl_parser{ 3896 tks_queued = Queued - 1, 3897 tks_first_idx = First + 1, 3898 last_tag = Tag 3899 }, 3900 emit_tokens2(Parser1, Rest, Idx + 1, Max); 3901emit_tokens2( 3902 #yamerl_parser{tks_queued = Queued, tks_first_idx = First} = Parser, 3903 [#yamerl_tag{line = Line, column = Col} = Tag | Rest], Idx, Max) 3904 when Idx =< Max -> 3905 %% Error: several tags for the same node. 3906 Error = #yamerl_parsing_error{ 3907 name = multiple_tag_properties, 3908 line = Line, 3909 column = Col 3910 }, 3911 Parser1 = add_error(Parser, Error, 3912 "Multiple tag properties attached to one node: " 3913 "the last one will be used", []), 3914 Parser2 = Parser1#yamerl_parser{ 3915 tks_queued = Queued - 1, 3916 tks_first_idx = First + 1, 3917 last_tag = Tag 3918 }, 3919 emit_tokens2(Parser2, Rest, Idx + 1, Max); 3920 3921emit_tokens2(#yamerl_parser{last_anchor = undefined, 3922 tks_queued = Queued, tks_first_idx = First} = Parser, 3923 [#yamerl_anchor{} = Anchor | Rest], Idx, Max) 3924 when Idx =< Max -> 3925 %% Keep the anchor outside of the token queue. It'll be emitted with 3926 %% its attached node. We also use this to check if multiple anchors 3927 %% are attached to the same node. 3928 Parser1 = Parser#yamerl_parser{ 3929 tks_queued = Queued - 1, 3930 tks_first_idx = First + 1, 3931 last_anchor = Anchor 3932 }, 3933 emit_tokens2(Parser1, Rest, Idx + 1, Max); 3934emit_tokens2( 3935 #yamerl_parser{tks_queued = Queued, tks_first_idx = First} = Parser, 3936 [#yamerl_anchor{line = Line, column = Col} = Anchor | Rest], Idx, Max) 3937 when Idx =< Max -> 3938 %% Error: several tags for the same node. 3939 Error = #yamerl_parsing_error{ 3940 name = multiple_anchor_properties, 3941 line = Line, 3942 column = Col 3943 }, 3944 Parser1 = add_error(Parser, Error, 3945 "Multiple anchor properties attached to one node: " 3946 "the last one will be used", []), 3947 Parser2 = Parser1#yamerl_parser{ 3948 tks_queued = Queued - 1, 3949 tks_first_idx = First + 1, 3950 last_anchor = Anchor 3951 }, 3952 emit_tokens2(Parser2, Rest, Idx + 1, Max); 3953 3954emit_tokens2(#yamerl_parser{last_token = Last} = Parser, 3955 [Token | Rest], Idx , Max) 3956 when Idx =< Max -> 3957 %% Run some checks: 3958 %% o Can "Last" and "Token" be in a raw? 3959 %% o Do we need to insert an empty scalar? 3960 Parser1 = check_tokens_in_a_row(Parser, Last, Token), 3961 %% Handle properties and execute the specified callback function (or 3962 %% queue the token). 3963 Parser2 = handle_anchor_property(Parser1, Token), 3964 emit_tokens2(Parser2, Rest, Idx + 1, Max); 3965 3966emit_tokens2(Parser, Tokens, _, _) -> 3967 Tokens1 = lists:reverse(Tokens), 3968 Parser#yamerl_parser{ 3969 tokens = Tokens1 3970 }. 3971 3972%% Check if a token can follow another and insert empty node if 3973%% necessary. 3974check_tokens_in_a_row( 3975 #yamerl_parser{last_tag = Tag, last_anchor = Anchor, 3976 tks_queued = Queued} = Parser, 3977 Token1, Token2) when 3978 %% Empty sequence entry. 3979 (is_record(Token1, yamerl_sequence_entry) andalso 3980 (?IN_BLOCK_CTX(Parser) orelse 3981 Tag /= undefined orelse 3982 Anchor /= undefined) andalso 3983 (is_record(Token2, yamerl_sequence_entry) orelse 3984 is_record(Token2, yamerl_collection_end))) orelse 3985 %% Empty mapping key. 3986 (is_record(Token1, yamerl_mapping_key) andalso 3987 (is_record(Token2, yamerl_mapping_value) orelse 3988 is_record(Token2, yamerl_collection_end))) orelse 3989 %% Empty mapping value. 3990 (is_record(Token1, yamerl_mapping_value) andalso 3991 (is_record(Token2, yamerl_mapping_key) orelse 3992 is_record(Token2, yamerl_collection_end))) orelse 3993 %% Empty mapping value. 3994 (is_record(Token1, yamerl_mapping_value) andalso 3995 (is_record(Token2, yamerl_mapping_key) orelse 3996 is_record(Token2, yamerl_collection_end))) orelse 3997 %% Empty document. 3998 (is_record(Token1, yamerl_doc_start) andalso 3999 is_record(Token2, yamerl_doc_end)) orelse 4000 %% Anchor alone. 4001 (is_record(Token1, yamerl_anchor) andalso 4002 (is_record(Token2, yamerl_mapping_value) orelse 4003 is_record(Token2, yamerl_collection_end) orelse 4004 is_record(Token2, yamerl_doc_end))) -> 4005 %% Token1 is followed by an empty scalar. 4006 {Line, Col} = case Tag of 4007 #yamerl_tag{line = L, column = C} -> 4008 %% A tag is pending: use its position instead of the 4009 %% position of the token about to be queued. 4010 {L, C}; 4011 undefined -> 4012 case Token1 of 4013 #yamerl_doc_start{} -> 4014 {?TOKEN_LINE(Token2), ?TOKEN_COLUMN(Token2)}; 4015 _ -> 4016 {?TOKEN_LINE(Token1), ?TOKEN_COLUMN(Token1)} 4017 end 4018 end, 4019 Empty = empty_scalar(Line, Col), 4020 Parser1 = Parser#yamerl_parser{ 4021 tks_queued = Queued + 1 4022 }, 4023 handle_anchor_property(Parser1, Empty); 4024check_tokens_in_a_row(Parser, Token1, Token2) when 4025 (is_record(Token1, yamerl_scalar) orelse 4026 is_record(Token1, yamerl_collection_end)) andalso 4027 (is_record(Token2, yamerl_scalar) orelse 4028 is_record(Token2, yamerl_collection_start)) -> 4029 %% Token2 can't follow Token1. 4030 Error = #yamerl_parsing_error{ 4031 name = unexpected_token, 4032 token = Token2, 4033 line = ?TOKEN_LINE(Token2), 4034 column = ?TOKEN_COLUMN(Token2) 4035 }, 4036 Parser1 = add_error(Parser, Error, 4037 "Unexpected \"~s\" token following a \"~s\" token", 4038 [?TOKEN_NAME(Token2), ?TOKEN_NAME(Token1)]), 4039 do_return(Parser1); 4040check_tokens_in_a_row(Parser, _, _) -> 4041 Parser. 4042 4043handle_anchor_property( 4044 #yamerl_parser{last_anchor = undefined} = Parser, Token) -> 4045 handle_tag_property(Parser, Token); 4046handle_anchor_property( 4047 #yamerl_parser{last_anchor = Anchor} = Parser, Token) when 4048 (is_record(Token, yamerl_collection_start) andalso 4049 Anchor#yamerl_anchor.line < Token#yamerl_collection_start.line) orelse 4050 (is_record(Token, yamerl_scalar) andalso 4051 (Anchor#yamerl_anchor.line < Token#yamerl_scalar.line orelse 4052 (Anchor#yamerl_anchor.line == Token#yamerl_scalar.line andalso 4053 Anchor#yamerl_anchor.column =< Token#yamerl_scalar.column))) -> 4054 Parser1 = do_emit_token(Parser, Anchor), 4055 Parser2 = Parser1#yamerl_parser{ 4056 last_anchor = undefined 4057 }, 4058 handle_tag_property(Parser2, Token); 4059handle_anchor_property(Parser, Token) -> 4060 handle_tag_property(Parser, Token). 4061 4062handle_tag_property( 4063 #yamerl_parser{last_tag = undefined} = Parser, Token) -> 4064 do_emit_token(Parser, Token); 4065handle_tag_property( 4066 #yamerl_parser{last_tag = Tag} = Parser, Token) when 4067 (is_record(Token, yamerl_collection_start) andalso 4068 Tag#yamerl_tag.line < Token#yamerl_collection_start.line) orelse 4069 (is_record(Token, yamerl_scalar) andalso 4070 (Tag#yamerl_tag.line < Token#yamerl_scalar.line orelse 4071 (Tag#yamerl_tag.line == Token#yamerl_scalar.line andalso 4072 Tag#yamerl_tag.column =< Token#yamerl_scalar.column))) -> 4073 %% The tag property is attached to this token. 4074 Token1 = case Token of 4075 #yamerl_scalar{} -> 4076 Token#yamerl_scalar{ 4077 tag = Tag 4078 }; 4079 #yamerl_collection_start{} -> 4080 Token#yamerl_collection_start{ 4081 tag = Tag 4082 } 4083 end, 4084 %% Clear the pending tag property. 4085 Parser1 = Parser#yamerl_parser{ 4086 last_tag = undefined 4087 }, 4088 do_emit_token(Parser1, Token1); 4089handle_tag_property(Parser, Token) -> 4090 do_emit_token(Parser, Token). 4091 4092do_emit_token( 4093 #yamerl_parser{token_fun = Not_Fun, 4094 tks_queued = Queued, tks_first_idx = First, 4095 tks_emitted = Emitted, tks_ready = Ready} = Parser, 4096 Token) when Not_Fun == acc orelse Not_Fun == drop -> 4097 %% The anchor was already counted when first removed from the queue. 4098 {Queued1, First1} = case ?TOKEN_NAME(Token) of 4099 yamerl_anchor -> {Queued, First}; 4100 _ -> {Queued - 1, First + 1} 4101 end, 4102 Ready1 = case Not_Fun of 4103 acc -> [Token | Ready]; 4104 drop -> Ready 4105 end, 4106 Parser#yamerl_parser{ 4107 tks_queued = Queued1, 4108 tks_first_idx = First1, 4109 tks_emitted = Emitted + 1, 4110 last_token = Token, 4111 tks_ready = Ready1 4112 }; 4113do_emit_token( 4114 #yamerl_parser{token_fun = Fun, 4115 tks_queued = Queued, tks_first_idx = First, tks_emitted = Emitted} = Parser, 4116 Token) -> 4117 %% The anchor was already counted when first removed from the queue. 4118 {Queued1, First1} = case ?TOKEN_NAME(Token) of 4119 yamerl_anchor -> {Queued, First}; 4120 _ -> {Queued - 1, First + 1} 4121 end, 4122 try 4123 Fun1 = case Fun(Token) of 4124 ok -> Fun; 4125 {ok, F1} -> F1 4126 end, 4127 Parser#yamerl_parser{ 4128 token_fun = Fun1, 4129 tks_queued = Queued1, 4130 tks_first_idx = First1, 4131 tks_emitted = Emitted + 1, 4132 last_token = Token 4133 } 4134 catch 4135 throw:Error when is_record(Error, yamerl_parsing_error) -> 4136 Parser1 = add_error(Parser, Error), 4137 Parser2 = Parser1#yamerl_parser{ 4138 tks_queued = Queued1, 4139 tks_first_idx = First1, 4140 tks_emitted = Emitted + 1, 4141 last_token = Token 4142 }, 4143 if 4144 Error#yamerl_parsing_error.type == error -> do_return(Parser2); 4145 true -> Parser2 4146 end; 4147 throw:{Fun2, Error} when is_record(Error, yamerl_parsing_error) -> 4148 Parser1 = add_error(Parser, Error), 4149 Parser2 = Parser1#yamerl_parser{ 4150 token_fun = Fun2, 4151 tks_queued = Queued1, 4152 tks_first_idx = First1, 4153 tks_emitted = Emitted + 1, 4154 last_token = Token 4155 }, 4156 if 4157 Error#yamerl_parsing_error.type == error -> do_return(Parser2); 4158 true -> Parser2 4159 end 4160 end. 4161 4162next_insert_at(tail, _) -> tail; 4163next_insert_at(Insert_At, N) -> Insert_At + N. 4164 4165max_token_idx_ready(#yamerl_parser{ik_stack = Stack, 4166 tks_first_idx = First, tks_queued = Queued}) -> 4167 max_token_idx_ready2(First + Queued - 1, lists:reverse(Stack)). 4168 4169max_token_idx_ready2(_, [#impl_key{possible = true, token_idx = Idx} | _]) -> 4170 Idx - 1; 4171max_token_idx_ready2(All, [_ | Rest]) -> 4172 max_token_idx_ready2(All, Rest); 4173max_token_idx_ready2(All, []) -> 4174 All. 4175 4176%% A token was inserted before the potential implicit key: move the 4177%% key's index. 4178update_impl_key_index(#yamerl_parser{ik_stack = Stack} = Parser, Line, Col) -> 4179 update_impl_key_index2(Parser, Stack, Line, Col, []). 4180 4181update_impl_key_index2(Parser, 4182 [#impl_key{token_idx = Index, line = Key_L, col = Key_C} = Impl_Key | Rest], 4183 Line, Col, Result) 4184 when is_integer(Key_L) andalso is_integer(Key_C) andalso 4185 (Line < Key_L orelse (Line == Key_L andalso Col =< Key_C)) -> 4186 Impl_Key1 = Impl_Key#impl_key{ 4187 token_idx = Index + 1 4188 }, 4189 Result1 = [Impl_Key1 | Result], 4190 update_impl_key_index2(Parser, Rest, Line, Col, Result1); 4191update_impl_key_index2(Parser, 4192 [#impl_key{line = Key_L, col = Key_C} | _] = Rest, 4193 Line, Col, Result) 4194 when is_integer(Key_L) andalso is_integer(Key_C) andalso 4195 (Line > Key_L orelse (Line == Key_L andalso Col > Key_C)) -> 4196 Parser#yamerl_parser{ 4197 ik_stack = lists:reverse(Result) ++ Rest 4198 }; 4199update_impl_key_index2(Parser, 4200 [Impl_Key | Rest], Line, Col, Result) -> 4201 Result1 = [Impl_Key | Result], 4202 update_impl_key_index2(Parser, Rest, Line, Col, Result1); 4203update_impl_key_index2(Parser, [], _, _, Result) -> 4204 Parser#yamerl_parser{ 4205 ik_stack = lists:reverse(Result) 4206 }. 4207 4208%% ------------------------------------------------------------------- 4209%% Tag resolution. 4210%% ------------------------------------------------------------------- 4211 4212setup_default_tags(#yamerl_parser{options = Options} = Parser) -> 4213 Tags = dict:new(), 4214 %% By default, "!" is resolved as "!" and the tag is considered 4215 %% local. 4216 Tags1 = dict:store({default, "!"}, "!", Tags), 4217 %% By default, "!!" is resolved as "tag:yaml.org,2002:" and is used 4218 %% by the YAML tags repository. 4219 Tags2 = dict:store({default, "!!"}, "tag:yaml.org,2002:", Tags1), 4220 %% Non-specific tags are associated to nodes which don't have an 4221 %% explicit tag or to those with the "!" explicit non-specific tag. 4222 %% The non-specific tags are resolved using a schema. 4223 Tags3 = dict:store({default, {non_specific, "!"}}, "tag:yaml.org,2002:", 4224 Tags2), 4225 Tags4 = dict:store({default, {non_specific, "?"}}, "tag:yaml.org,2002:", 4226 Tags3), 4227 Tags5 = case proplists:get_value(default_tags, Options) of 4228 undefined -> 4229 Tags4; 4230 List -> 4231 Fun = fun({Prefix, Value}, T) -> 4232 dict:store({default, Prefix}, Value, T) 4233 end, 4234 lists:foldl(Fun, Tags4, List) 4235 end, 4236 Parser#yamerl_parser{ 4237 tags = Tags5 4238 }. 4239 4240%% ------------------------------------------------------------------- 4241%% Internal functions. 4242%% ------------------------------------------------------------------- 4243 4244%% @private 4245 4246option_names() -> 4247 [ 4248 default_tags, 4249 doc_version, 4250 io_blocksize, 4251 token_fun 4252 ]. 4253 4254check_options([Option | Rest]) -> 4255 case is_option_valid(Option) of 4256 true -> check_options(Rest); 4257 false -> invalid_option(Option) 4258 end; 4259check_options([]) -> 4260 ok. 4261 4262is_option_valid({default_tags, List}) when is_list(List) -> 4263 %% This fun() returns true for any invalid entries, to keep only 4264 %% those. 4265 Fun = fun 4266 ({{non_specific, A}, B}) -> 4267 not (io_lib:char_list(A) andalso io_lib:char_list(B)); 4268 ({A, B}) -> 4269 not (io_lib:char_list(A) andalso io_lib:char_list(B)); 4270 (_) -> 4271 true 4272 end, 4273 case lists:filter(Fun, List) of 4274 [] -> true; 4275 _ -> false 4276 end; 4277is_option_valid({doc_version, {Major, Minor}}) when 4278 is_integer(Major) andalso Major >= 0 andalso 4279 is_integer(Minor) andalso Minor >= 0 -> 4280 true; 4281is_option_valid({io_blocksize, BS}) 4282 when is_integer(BS) andalso BS >= 1 -> 4283 true; 4284is_option_valid({token_fun, acc}) -> 4285 true; 4286is_option_valid({token_fun, drop}) -> 4287 true; 4288is_option_valid({token_fun, Fun}) 4289 when is_function(Fun, 1) -> 4290 true; 4291is_option_valid(_) -> 4292 false. 4293 4294invalid_option(Option) -> 4295 Error = #yamerl_invalid_option{ 4296 option = Option 4297 }, 4298 Error1 = case Option of 4299 {default_tags, _} -> 4300 Error#yamerl_invalid_option{ 4301 text = "Invalid value for option \"default_tags\": " 4302 "it must be a list of {Prefix, Prefix_Value}" 4303 }; 4304 {doc_version, _} -> 4305 Error#yamerl_invalid_option{ 4306 text = "Invalid value for option \"doc_version\": " 4307 "it must be a tuple of the form {Major, Minor} " 4308 "where Major and Minor are positive integers" 4309 }; 4310 {io_blocksize, _} -> 4311 Error#yamerl_invalid_option{ 4312 text = "Invalid value for option \"io_blocksize\": " 4313 "it must be a positive interger, expressed in bytes" 4314 }; 4315 {token_fun, _} -> 4316 Error#yamerl_invalid_option{ 4317 text = "Invalid value for option \"token_fun\": " 4318 "it must be a function taking the next token as " 4319 "its sole argument, or the atom 'acc' or 'drop'" 4320 }; 4321 _ -> 4322 yamerl_errors:format(Error, "Unknown option \"~w\"", [Option]) 4323 end, 4324 yamerl_errors:throw(Error1). 4325 4326empty_scalar(Line, Col) -> 4327 #yamerl_scalar{ 4328 style = flow, 4329 substyle = plain, 4330 text = "", 4331 line = Line, 4332 column = Col, 4333 tag = ?PLAIN_SCALAR_DEFAULT_TAG(Line, Col) 4334 }. 4335 4336check_for_closed_block_collections([C | _] = Chars, Line, Col, Delta, 4337 #yamerl_parser{cur_coll = #bcoll{kind = sequence, indent = At_Col}, 4338 parent_colls = [#bcoll{kind = mapping, indent = At_Col} = Parent_Coll | 4339 Colls]} = Parser, At_Col) when C /= $- -> 4340 %% The sequence has the same indentation level than its parent 4341 %% mapping. The next token has this same indentation but is not a 4342 %% sequence entry (denoted by the '-' character). Let's close it but 4343 %% not the parent mapping. 4344 Token = #yamerl_collection_end{ 4345 style = block, 4346 kind = sequence, 4347 line = Parser#yamerl_parser.last_token_endline, 4348 column = Parser#yamerl_parser.last_token_endcol 4349 }, 4350 Parser1 = queue_token(Parser, Token), 4351 %% Remove its indentation from the stack. 4352 Parser2 = Parser1#yamerl_parser{ 4353 cur_coll = Parent_Coll, 4354 parent_colls = Colls 4355 }, 4356 check_for_closed_block_collections(Chars, Line, Col, Delta, Parser2, 4357 At_Col); 4358check_for_closed_block_collections(Chars, Line, Col, Delta, 4359 #yamerl_parser{cur_coll = #bcoll{kind = Kind, indent = Indent}, 4360 parent_colls = [Parent_Coll | Colls]} = Parser, At_Col) 4361 when At_Col < Indent -> 4362 Parser1 = finish_incomplete_block_entries(Line, Col, Parser), 4363 %% Emit a token to signal the end of the block collection. 4364 Token = #yamerl_collection_end{ 4365 style = block, 4366 kind = Kind, 4367 line = Parser1#yamerl_parser.last_token_endline, 4368 column = Parser1#yamerl_parser.last_token_endcol 4369 }, 4370 Parser2 = queue_token(Parser1, Token), 4371 %% Remove its indentation from the stack. 4372 Parser3 = Parser2#yamerl_parser{ 4373 cur_coll = Parent_Coll, 4374 parent_colls = Colls 4375 }, 4376 check_for_closed_block_collections(Chars, Line, Col, Delta, Parser3, 4377 At_Col); 4378check_for_closed_block_collections(_, _, _, _, Parser, _) -> 4379 Parser. 4380 4381is_uri_valid(Parser, #yamerl_tag{uri = {non_specific, _}}) -> 4382 Parser; 4383is_uri_valid(Parser, #yamerl_tag{uri = [$! | _]}) -> 4384 Parser; 4385is_uri_valid(Parser, #yamerl_tag{uri = URI} = Tag) -> 4386 is_uri_scheme_valid1(Parser, Tag, URI); 4387is_uri_valid(Parser, #yamerl_tag_directive{prefix = [$! | _]}) -> 4388 Parser; 4389is_uri_valid(Parser, #yamerl_tag_directive{prefix = URI} = Directive) -> 4390 is_uri_scheme_valid1(Parser, Directive, URI). 4391 4392is_uri_scheme_valid1(Parser, Token, [C | Rest]) when 4393 (C >= $a andalso C =< $z) orelse 4394 (C >= $A andalso C =< $Z) -> 4395 is_uri_scheme_valid2(Parser, Token, Rest); 4396is_uri_scheme_valid1(Parser, Token, [_ | _]) -> 4397 Error = #yamerl_parsing_error{ 4398 name = invalid_uri, 4399 type = warning, 4400 token = Token, 4401 line = ?TOKEN_LINE(Token), 4402 column = ?TOKEN_COLUMN(Token) 4403 }, 4404 add_error(Parser, Error, "Invalid character in URI scheme", []); 4405is_uri_scheme_valid1(Parser, Token, []) -> 4406 Error = #yamerl_parsing_error{ 4407 name = invalid_uri, 4408 type = warning, 4409 token = Token, 4410 line = ?TOKEN_LINE(Token), 4411 column = ?TOKEN_COLUMN(Token) 4412 }, 4413 add_error(Parser, Error, "Unexpected end of URI", []). 4414 4415is_uri_scheme_valid2(Parser, Token, [C | Rest]) when 4416 (C >= $a andalso C =< $z) orelse 4417 (C >= $A andalso C =< $Z) orelse 4418 (C >= $0 andalso C =< $9) orelse 4419 C == $+ orelse C == $. orelse C == $- -> 4420 is_uri_scheme_valid2(Parser, Token, Rest); 4421is_uri_scheme_valid2(Parser, Token, [$: | Rest]) -> 4422 is_uri_hier_part_valid(Parser, Token, Rest); 4423is_uri_scheme_valid2(Parser, Token, [_ | _]) -> 4424 Error = #yamerl_parsing_error{ 4425 name = invalid_uri, 4426 type = warning, 4427 token = Token, 4428 line = ?TOKEN_LINE(Token), 4429 column = ?TOKEN_COLUMN(Token) 4430 }, 4431 add_error(Parser, Error, "Invalid character in URI scheme", []); 4432is_uri_scheme_valid2(Parser, Token, []) -> 4433 Error = #yamerl_parsing_error{ 4434 name = invalid_uri, 4435 type = warning, 4436 token = Token, 4437 line = ?TOKEN_LINE(Token), 4438 column = ?TOKEN_COLUMN(Token) 4439 }, 4440 add_error(Parser, Error, "Unexpected end of URI", []). 4441 4442is_uri_hier_part_valid(Parser, Token, [C | Rest]) when ?IS_URI_CHAR(C) -> 4443 is_uri_hier_part_valid(Parser, Token, Rest); 4444is_uri_hier_part_valid(Parser, _, []) -> 4445 Parser; 4446is_uri_hier_part_valid(Parser, Token, [_ | _]) -> 4447 Error = #yamerl_parsing_error{ 4448 name = invalid_uri, 4449 type = warning, 4450 token = Token, 4451 line = ?TOKEN_LINE(Token), 4452 column = ?TOKEN_COLUMN(Token) 4453 }, 4454 add_error(Parser, Error, "Invalid character in URI scheme", []). 4455 4456add_error(Parser, Error, Format, Args) -> 4457 %% Format error message. 4458 Error1 = yamerl_errors:format(Error, Format, Args), 4459 add_error(Parser, Error1). 4460 4461add_error( 4462 #yamerl_parser{has_errors = Has_Errors, errors = Errors} = Parser, Error) -> 4463 %% Update has_errors flag. 4464 Has_Errors1 = if 4465 Has_Errors -> Has_Errors; 4466 true -> Error#yamerl_parsing_error.type == error 4467 end, 4468 Parser#yamerl_parser{ 4469 has_errors = Has_Errors1, 4470 errors = [Error | Errors] 4471 }. 4472 4473-spec suspend_parsing(Chars, Line, Col, Delta, Parser, Fun) -> 4474 Ret | no_return() when 4475 Chars :: unicode_string(), 4476 Line :: position(), 4477 Col :: position(), 4478 Delta :: non_neg_integer(), 4479 Parser :: yamerl_parser(), 4480 Fun :: stream_state_fun(), 4481 Ret :: {continue, New_Parser} | Parser, 4482 New_Parser :: yamerl_parser(). 4483 4484suspend_parsing(Chars, Line, Col, Delta, Parser, Fun) -> 4485 Parser1 = ?FLUSH_TO_PARSER(Chars, Line, Col, Delta, Parser), 4486 Parser2 = Parser1#yamerl_parser{ 4487 stream_state = Fun 4488 }, 4489 do_return(Parser2). 4490 4491suspend_parsing(Chars, Line, Col, Delta, Parser, Fun, Ctx) -> 4492 Fun1 = fun(Ch, Li, Co, De, P) -> Fun(Ch, Li, Co, De, P, Ctx) end, 4493 suspend_parsing(Chars, Line, Col, Delta, Parser, Fun1). 4494 4495return(Chars, Line, Col, Delta, Parser) -> 4496 Parser1 = ?FLUSH_TO_PARSER(Chars, Line, Col, Delta, Parser), 4497 do_return(Parser1). 4498 4499-spec do_return(Parser) -> {continue, Parser} | Parser | no_return() when 4500 Parser :: yamerl_parser(). 4501 4502do_return(#yamerl_parser{has_errors = true, errors = Errors}) -> 4503 yamerl_errors:throw(Errors); 4504do_return(#yamerl_parser{raw_eos = true, chars_len = 0} = Parser) -> 4505 Parser; 4506do_return(Parser) -> 4507 {continue, Parser}. 4508