1-module(neotoma_parse). 2-export([parse/1,file/1]). 3-define(p_anything,true). 4-define(p_charclass,true). 5-define(p_choose,true). 6-define(p_label,true). 7-define(p_not,true). 8-define(p_one_or_more,true). 9-define(p_optional,true). 10-define(p_scan,true). 11-define(p_seq,true). 12-define(p_string,true). 13-define(p_zero_or_more,true). 14 15 16 17% insert escapes into a string 18-spec escape_string(string()) -> string(). 19escape_string(String) -> escape_string(String, []). 20 21-spec escape_string(string(), string()) -> string(). 22escape_string([], Output) -> 23 lists:reverse(Output); 24escape_string([H|T], Output) -> 25 escape_string(T, 26 case H of 27 $/ -> [$/,$\\|Output]; 28 $\" -> [$\",$\\|Output]; % " comment inserted to help some editors with highlighting the generated parser 29 $\' -> [$\',$\\|Output]; % ' comment inserted to help some editors with highlighting the generated parser 30 $\b -> [$b,$\\|Output]; 31 $\d -> [$d,$\\|Output]; 32 $\e -> [$e,$\\|Output]; 33 $\f -> [$f,$\\|Output]; 34 $\n -> [$n,$\\|Output]; 35 $\r -> [$r,$\\|Output]; 36 $\s -> [$s,$\\|Output]; 37 $\t -> [$t,$\\|Output]; 38 $\v -> [$v,$\\|Output]; 39 _ -> [H|Output] 40 end). 41 42-spec add_lhs(binary(), index()) -> true. 43add_lhs(Symbol, Index) -> 44 case ets:lookup(memo_table_name(), lhs) of 45 [] -> 46 ets:insert(memo_table_name(), {lhs, [{Symbol,Index}]}); 47 [{lhs, L}] when is_list(L) -> 48 ets:insert(memo_table_name(), {lhs, [{Symbol,Index}|L]}) 49 end. 50 51-spec add_nt(binary(), index()) -> true | ok. 52add_nt(Symbol, Index) -> 53 case ets:lookup(memo_table_name(), nts) of 54 [] -> 55 ets:insert(memo_table_name(), {nts, [{Symbol,Index}]}); 56 [{nts, L}] when is_list(L) -> 57 case proplists:is_defined(Symbol, L) of 58 true -> 59 ok; 60 _ -> 61 ets:insert(memo_table_name(), {nts, [{Symbol,Index}|L]}) 62 end 63 end. 64 65-spec verify_rules() -> ok | no_return(). 66verify_rules() -> 67 [{lhs, LHS}] = ets:lookup(memo_table_name(), lhs), 68 [{nts, NTs}] = ets:lookup(memo_table_name(), nts), 69 [Root|NonRoots] = lists:reverse(LHS), 70 lists:foreach(fun({Sym,Idx}) -> 71 case proplists:is_defined(Sym, NTs) of 72 true -> 73 ok; 74 _ -> 75 io:format("neotoma warning: rule '~s' is unused. ~p~n", [Sym,Idx]) 76 end 77 end, NonRoots), 78 lists:foreach(fun({S,I}) -> 79 case proplists:is_defined(S, LHS) of 80 true -> 81 ok; 82 _ -> 83 io:format("neotoma error: nonterminal '~s' has no reduction. (found at ~p) No parser will be generated!~n", [S,I]), 84 exit({neotoma, {no_reduction, list_to_atom(binary_to_list(S))}}) 85 end 86 end, NTs), 87 Root. 88 89-spec used_combinator(atom()) -> true. 90used_combinator(C) -> 91 case ets:lookup(memo_table_name(), combinators) of 92 [] -> 93 ets:insert(memo_table_name(), {combinators, ordsets:from_list([C])}); 94 [{combinators, Cs}] -> 95 ets:insert(memo_table_name(), {combinators, ordsets:add_element(C, Cs)}) 96 end. 97 98-spec used_transform_variables(binary()) -> [ 'Node' | 'Idx' ]. 99used_transform_variables(Transform) -> 100 Code = unicode:characters_to_list(Transform), 101 {ok, Tokens, _} = erl_scan:string(Code), 102 used_transform_variables(Tokens, []). 103 104used_transform_variables([{var, _, Name}|Tokens], Acc) -> 105 used_transform_variables(Tokens, case Name of 106 'Node' -> [Name | Acc]; 107 'Idx' -> [Name | Acc]; 108 _ -> Acc 109 end); 110used_transform_variables([_|Tokens], Acc) -> 111 used_transform_variables(Tokens, Acc); 112used_transform_variables([], Acc) -> 113 lists:usort(Acc). 114 115-spec file(file:name()) -> any(). 116file(Filename) -> case file:read_file(Filename) of {ok,Bin} -> parse(Bin); Err -> Err end. 117 118-spec parse(binary() | list()) -> any(). 119parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List)); 120parse(Input) when is_binary(Input) -> 121 setup_memo(), 122 Result = case 'rules'(Input,{{line,1},{column,1}}) of 123 {AST, <<>>, _Index} -> AST; 124 Any -> Any 125 end, 126 release_memo(), Result. 127 128-spec 'rules'(input(), index()) -> parse_result(). 129'rules'(Input, Index) -> 130 p(Input, Index, 'rules', fun(I,D) -> (p_seq([p_optional(fun 'space'/2), fun 'declaration_sequence'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2)]))(I,D) end, fun(Node, _Idx) -> 131 RootRule = verify_rules(), 132 Rules = iolist_to_binary(lists:map(fun(R) -> [R, "\n\n"] end, lists:nth(2, Node))), 133 Code = case lists:nth(4, Node) of 134 {code, Block} -> Block; 135 _ -> [] 136 end, 137 [{rules, Rules}, 138 {code, Code}, 139 {root, RootRule}, 140 {transform, ets:lookup(memo_table_name(),gen_transform)}, 141 {combinators, ets:lookup_element(memo_table_name(), combinators, 2)}] 142 143 end). 144 145-spec 'declaration_sequence'(input(), index()) -> parse_result(). 146'declaration_sequence'(Input, Index) -> 147 p(Input, Index, 'declaration_sequence', fun(I,D) -> (p_seq([p_label('head', fun 'declaration'/2), p_label('tail', p_zero_or_more(p_seq([fun 'space'/2, fun 'declaration'/2])))]))(I,D) end, fun(Node, _Idx) -> 148 FirstRule = proplists:get_value(head, Node), 149 OtherRules = [I || [_,I] <- proplists:get_value(tail, Node, [])], 150 [FirstRule|OtherRules] 151 end). 152 153-spec 'declaration'(input(), index()) -> parse_result(). 154'declaration'(Input, Index) -> 155 p(Input, Index, 'declaration', fun(I,D) -> (p_seq([fun 'nonterminal'/2, p_zero_or_more(fun 'space'/2), p_string(<<"<-">>), p_zero_or_more(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2), p_string(<<";">>)]))(I,D) end, fun(Node, _Idx) -> 156 [{nonterminal,Symbol}|Tail] = Node, 157 add_lhs(Symbol, Index), 158 Transform = case lists:nth(6,Tail) of 159 {code, CodeBlock} -> CodeBlock; 160 _ -> 161 ets:insert_new(memo_table_name(),{gen_transform, true}), 162 ["transform('",Symbol,"', Node, Idx)"] 163 end, 164 TransformArgs = case used_transform_variables(Transform) of 165 [] -> "_Node, _Idx"; 166 ['Idx'] -> "_Node, Idx"; 167 ['Node'] -> "Node, _Idx"; 168 ['Idx', 'Node'] -> "Node, Idx" 169 end, 170 ["-spec '", Symbol, "'(input(), index()) -> parse_result().\n", 171 "'",Symbol,"'","(Input, Index) ->\n ", 172 "p(Input, Index, '",Symbol,"', fun(I,D) -> (", 173 lists:nth(4, Tail), 174 ")(I,D) end, fun(", TransformArgs, ") ->",Transform," end)."] 175 end). 176 177-spec 'parsing_expression'(input(), index()) -> parse_result(). 178'parsing_expression'(Input, Index) -> 179 p(Input, Index, 'parsing_expression', fun(I,D) -> (p_choose([fun 'choice'/2, fun 'sequence'/2, fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end). 180 181-spec 'choice'(input(), index()) -> parse_result(). 182'choice'(Input, Index) -> 183 p(Input, Index, 'choice', fun(I,D) -> (p_seq([p_label('head', fun 'alternative'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, p_string(<<"\/">>), fun 'space'/2, fun 'alternative'/2])))]))(I,D) end, fun(Node, _Idx) -> 184 Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)], 185 Head = proplists:get_value(head, Node), 186 Statements = [[", ", TS] || TS <- Tail], 187 used_combinator(p_choose), 188 ["p_choose([", Head, Statements, "])"] 189 end). 190 191-spec 'alternative'(input(), index()) -> parse_result(). 192'alternative'(Input, Index) -> 193 p(Input, Index, 'alternative', fun(I,D) -> (p_choose([fun 'sequence'/2, fun 'labeled_primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end). 194 195-spec 'primary'(input(), index()) -> parse_result(). 196'primary'(Input, Index) -> 197 p(Input, Index, 'primary', fun(I,D) -> (p_choose([p_seq([fun 'prefix'/2, fun 'atomic'/2]), p_seq([fun 'atomic'/2, fun 'suffix'/2]), fun 'atomic'/2]))(I,D) end, fun(Node, _Idx) -> 198case Node of 199 [Atomic, one_or_more] -> 200 used_combinator(p_one_or_more), 201 used_combinator(p_scan), 202 ["p_one_or_more(", Atomic, ")"]; 203 [Atomic, zero_or_more] -> 204 used_combinator(p_zero_or_more), 205 used_combinator(p_scan), 206 ["p_zero_or_more(", Atomic, ")"]; 207 [Atomic, optional] -> 208 used_combinator(p_optional), 209 ["p_optional(", Atomic, ")"]; 210 [assert, Atomic] -> 211 used_combinator(p_assert), 212 ["p_assert(", Atomic, ")"]; 213 [not_, Atomic] -> 214 used_combinator(p_not), 215 ["p_not(", Atomic, ")"]; 216 _ -> Node 217end 218 end). 219 220-spec 'sequence'(input(), index()) -> parse_result(). 221'sequence'(Input, Index) -> 222 p(Input, Index, 'sequence', fun(I,D) -> (p_seq([p_label('head', fun 'labeled_primary'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, fun 'labeled_primary'/2])))]))(I,D) end, fun(Node, _Idx) -> 223 Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)], 224 Head = proplists:get_value(head, Node), 225 Statements = [[", ", TS] || TS <- Tail], 226 used_combinator(p_seq), 227 ["p_seq([", Head, Statements, "])"] 228 end). 229 230-spec 'labeled_primary'(input(), index()) -> parse_result(). 231'labeled_primary'(Input, Index) -> 232 p(Input, Index, 'labeled_primary', fun(I,D) -> (p_seq([p_optional(fun 'label'/2), fun 'primary'/2]))(I,D) end, fun(Node, _Idx) -> 233 case hd(Node) of 234 [] -> lists:nth(2, Node); 235 Label -> 236 used_combinator(p_label), 237 ["p_label('", Label, "', ", lists:nth(2, Node), ")"] 238 end 239 end). 240 241-spec 'label'(input(), index()) -> parse_result(). 242'label'(Input, Index) -> 243 p(Input, Index, 'label', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2), p_string(<<":">>)]))(I,D) end, fun(Node, _Idx) -> 244 lists:sublist(Node, length(Node)-1) 245 end). 246 247-spec 'suffix'(input(), index()) -> parse_result(). 248'suffix'(Input, Index) -> 249 p(Input, Index, 'suffix', fun(I,D) -> (p_choose([fun 'repetition_suffix'/2, fun 'optional_suffix'/2]))(I,D) end, fun(Node, _Idx) -> 250 case Node of 251 <<"*">> -> zero_or_more; 252 <<"+">> -> one_or_more; 253 <<"?">> -> optional 254 end 255 end). 256 257-spec 'optional_suffix'(input(), index()) -> parse_result(). 258'optional_suffix'(Input, Index) -> 259 p(Input, Index, 'optional_suffix', fun(I,D) -> (p_string(<<"?">>))(I,D) end, fun(Node, _Idx) ->Node end). 260 261-spec 'repetition_suffix'(input(), index()) -> parse_result(). 262'repetition_suffix'(Input, Index) -> 263 p(Input, Index, 'repetition_suffix', fun(I,D) -> (p_choose([p_string(<<"+">>), p_string(<<"*">>)]))(I,D) end, fun(Node, _Idx) ->Node end). 264 265-spec 'prefix'(input(), index()) -> parse_result(). 266'prefix'(Input, Index) -> 267 p(Input, Index, 'prefix', fun(I,D) -> (p_choose([p_string(<<"&">>), p_string(<<"!">>)]))(I,D) end, fun(Node, _Idx) -> 268 case Node of 269 <<"&">> -> assert; 270 <<"!">> -> not_ 271 end 272 end). 273 274-spec 'atomic'(input(), index()) -> parse_result(). 275'atomic'(Input, Index) -> 276 p(Input, Index, 'atomic', fun(I,D) -> (p_choose([fun 'terminal'/2, fun 'nonterminal'/2, fun 'parenthesized_expression'/2]))(I,D) end, fun(Node, _Idx) -> 277case Node of 278 {nonterminal, Symbol} -> 279 [<<"fun '">>, Symbol, <<"'/2">>]; 280 _ -> Node 281end 282 end). 283 284-spec 'parenthesized_expression'(input(), index()) -> parse_result(). 285'parenthesized_expression'(Input, Index) -> 286 p(Input, Index, 'parenthesized_expression', fun(I,D) -> (p_seq([p_string(<<"(">>), p_optional(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_string(<<")">>)]))(I,D) end, fun(Node, _Idx) ->lists:nth(3, Node) end). 287 288-spec 'nonterminal'(input(), index()) -> parse_result(). 289'nonterminal'(Input, Index) -> 290 p(Input, Index, 'nonterminal', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2)]))(I,D) end, fun(Node, Idx) -> 291 Symbol = iolist_to_binary(Node), 292 add_nt(Symbol, Idx), 293 {nonterminal, Symbol} 294 end). 295 296-spec 'terminal'(input(), index()) -> parse_result(). 297'terminal'(Input, Index) -> 298 p(Input, Index, 'terminal', fun(I,D) -> (p_choose([fun 'regexp_string'/2, fun 'quoted_string'/2, fun 'character_class'/2, fun 'anything_symbol'/2]))(I,D) end, fun(Node, _Idx) ->Node end). 299 300-spec 'regexp_string'(input(), index()) -> parse_result(). 301'regexp_string'(Input, Index) -> 302 p(Input, Index, 'regexp_string', fun(I,D) -> (p_seq([p_string(<<"#">>), p_label('string', p_one_or_more(p_seq([p_not(p_string(<<"#">>)), p_choose([p_string(<<"\\#">>), p_anything()])]))), p_string(<<"#">>)]))(I,D) end, fun(Node, _Idx) -> 303 used_combinator(p_regexp), 304 ["p_regexp(<<\"", 305 % Escape \ and " as they are used in erlang string. Other sumbol stay as is. 306 % \ -> \\ 307 % " -> \" 308 re:replace(proplists:get_value(string, Node), "\"|\\\\", "\\\\&", [{return, binary}, global]), 309 "\">>)"] 310 end). 311 312-spec 'quoted_string'(input(), index()) -> parse_result(). 313'quoted_string'(Input, Index) -> 314 p(Input, Index, 'quoted_string', fun(I,D) -> (p_choose([fun 'single_quoted_string'/2, fun 'double_quoted_string'/2]))(I,D) end, fun(Node, _Idx) -> 315 used_combinator(p_string), 316 lists:flatten(["p_string(<<\"", 317 escape_string(binary_to_list(iolist_to_binary(proplists:get_value(string, Node)))), 318 "\">>)"]) 319 end). 320 321-spec 'double_quoted_string'(input(), index()) -> parse_result(). 322'double_quoted_string'(Input, Index) -> 323 p(Input, Index, 'double_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\"">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\"">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\"">>), p_anything()])]))), p_string(<<"\"">>)]))(I,D) end, fun(Node, _Idx) ->Node end). 324 325-spec 'single_quoted_string'(input(), index()) -> parse_result(). 326'single_quoted_string'(Input, Index) -> 327 p(Input, Index, 'single_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\'">>), p_anything()])]))), p_string(<<"\'">>)]))(I,D) end, fun(Node, _Idx) ->Node end). 328 329-spec 'character_class'(input(), index()) -> parse_result(). 330'character_class'(Input, Index) -> 331 p(Input, Index, 'character_class', fun(I,D) -> (p_seq([p_string(<<"[">>), p_label('characters', p_one_or_more(p_seq([p_not(p_string(<<"]">>)), p_choose([p_seq([p_string(<<"\\\\">>), p_anything()]), p_seq([p_not(p_string(<<"\\\\">>)), p_anything()])])]))), p_string(<<"]">>)]))(I,D) end, fun(Node, _Idx) -> 332 used_combinator(p_charclass), 333 ["p_charclass(<<\"[", 334 escape_string(binary_to_list(iolist_to_binary(proplists:get_value(characters, Node)))), 335 "]\">>)"] 336 end). 337 338-spec 'anything_symbol'(input(), index()) -> parse_result(). 339'anything_symbol'(Input, Index) -> 340 p(Input, Index, 'anything_symbol', fun(I,D) -> (p_string(<<".">>))(I,D) end, fun(_Node, _Idx) -> used_combinator(p_anything), <<"p_anything()">> end). 341 342-spec 'alpha_char'(input(), index()) -> parse_result(). 343'alpha_char'(Input, Index) -> 344 p(Input, Index, 'alpha_char', fun(I,D) -> (p_charclass(<<"[A-Za-z_]">>))(I,D) end, fun(Node, _Idx) ->Node end). 345 346-spec 'alphanumeric_char'(input(), index()) -> parse_result(). 347'alphanumeric_char'(Input, Index) -> 348 p(Input, Index, 'alphanumeric_char', fun(I,D) -> (p_choose([fun 'alpha_char'/2, p_charclass(<<"[0-9]">>)]))(I,D) end, fun(Node, _Idx) ->Node end). 349 350-spec 'space'(input(), index()) -> parse_result(). 351'space'(Input, Index) -> 352 p(Input, Index, 'space', fun(I,D) -> (p_one_or_more(p_choose([fun 'white'/2, fun 'comment_to_eol'/2])))(I,D) end, fun(Node, _Idx) ->Node end). 353 354-spec 'comment_to_eol'(input(), index()) -> parse_result(). 355'comment_to_eol'(Input, Index) -> 356 p(Input, Index, 'comment_to_eol', fun(I,D) -> (p_seq([p_not(p_string(<<"%{">>)), p_string(<<"%">>), p_zero_or_more(p_seq([p_not(p_string(<<"\n">>)), p_anything()]))]))(I,D) end, fun(Node, _Idx) ->Node end). 357 358-spec 'white'(input(), index()) -> parse_result(). 359'white'(Input, Index) -> 360 p(Input, Index, 'white', fun(I,D) -> (p_charclass(<<"[\s\t\n\r]">>))(I,D) end, fun(Node, _Idx) ->Node end). 361 362-spec 'code_block'(input(), index()) -> parse_result(). 363'code_block'(Input, Index) -> 364 p(Input, Index, 'code_block', fun(I,D) -> (p_choose([p_seq([p_string(<<"%{">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\%">>), p_string(<<"$%">>), p_seq([p_not(p_string(<<"%}">>)), p_anything()])]))), p_string(<<"%}">>)]), p_seq([p_string(<<"`">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\`">>), p_string(<<"$`">>), p_seq([p_not(p_string(<<"`">>)), p_anything()])]))), p_string(<<"`">>)]), p_string(<<"~">>)]))(I,D) end, fun(Node, _Idx) -> 365 case Node of 366 <<"~">> -> {code, <<"Node">>}; 367 _ -> {code, proplists:get_value('code', Node)} 368 end 369 end). 370 371 372 373-file("peg_includes.hrl", 1). 374-type index() :: {{line, pos_integer()}, {column, pos_integer()}}. 375-type input() :: binary(). 376-type parse_failure() :: {fail, term()}. 377-type parse_success() :: {term(), input(), index()}. 378-type parse_result() :: parse_failure() | parse_success(). 379-type parse_fun() :: fun((input(), index()) -> parse_result()). 380-type xform_fun() :: fun((input(), index()) -> term()). 381 382-spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result(). 383p(Inp, StartIndex, Name, ParseFun, TransformFun) -> 384 case get_memo(StartIndex, Name) of % See if the current reduction is memoized 385 {ok, Memo} -> %Memo; % If it is, return the stored result 386 Memo; 387 _ -> % If not, attempt to parse 388 Result = case ParseFun(Inp, StartIndex) of 389 {fail,_} = Failure -> % If it fails, memoize the failure 390 Failure; 391 {Match, InpRem, NewIndex} -> % If it passes, transform and memoize the result. 392 Transformed = TransformFun(Match, StartIndex), 393 {Transformed, InpRem, NewIndex} 394 end, 395 memoize(StartIndex, Name, Result), 396 Result 397 end. 398 399-spec setup_memo() -> ets:tid(). 400setup_memo() -> 401 put({parse_memo_table, ?MODULE}, ets:new(?MODULE, [set])). 402 403-spec release_memo() -> true. 404release_memo() -> 405 ets:delete(memo_table_name()). 406 407-spec memoize(index(), atom(), parse_result()) -> true. 408memoize(Index, Name, Result) -> 409 Memo = case ets:lookup(memo_table_name(), Index) of 410 [] -> []; 411 [{Index, Plist}] -> Plist 412 end, 413 ets:insert(memo_table_name(), {Index, [{Name, Result}|Memo]}). 414 415-spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}. 416get_memo(Index, Name) -> 417 case ets:lookup(memo_table_name(), Index) of 418 [] -> {error, not_found}; 419 [{Index, Plist}] -> 420 case proplists:lookup(Name, Plist) of 421 {Name, Result} -> {ok, Result}; 422 _ -> {error, not_found} 423 end 424 end. 425 426-spec memo_table_name() -> ets:tid(). 427memo_table_name() -> 428 get({parse_memo_table, ?MODULE}). 429 430-ifdef(p_eof). 431-spec p_eof() -> parse_fun(). 432p_eof() -> 433 fun(<<>>, Index) -> {eof, [], Index}; 434 (_, Index) -> {fail, {expected, eof, Index}} end. 435-endif. 436 437-ifdef(p_optional). 438-spec p_optional(parse_fun()) -> parse_fun(). 439p_optional(P) -> 440 fun(Input, Index) -> 441 case P(Input, Index) of 442 {fail,_} -> {[], Input, Index}; 443 {_, _, _} = Success -> Success 444 end 445 end. 446-endif. 447 448-ifdef(p_not). 449-spec p_not(parse_fun()) -> parse_fun(). 450p_not(P) -> 451 fun(Input, Index)-> 452 case P(Input,Index) of 453 {fail,_} -> 454 {[], Input, Index}; 455 {Result, _, _} -> {fail, {expected, {no_match, Result},Index}} 456 end 457 end. 458-endif. 459 460-ifdef(p_assert). 461-spec p_assert(parse_fun()) -> parse_fun(). 462p_assert(P) -> 463 fun(Input,Index) -> 464 case P(Input,Index) of 465 {fail,_} = Failure-> Failure; 466 _ -> {[], Input, Index} 467 end 468 end. 469-endif. 470 471-ifdef(p_seq). 472-spec p_seq([parse_fun()]) -> parse_fun(). 473p_seq(P) -> 474 fun(Input, Index) -> 475 p_all(P, Input, Index, []) 476 end. 477 478-spec p_all([parse_fun()], input(), index(), [term()]) -> parse_result(). 479p_all([], Inp, Index, Accum ) -> {lists:reverse( Accum ), Inp, Index}; 480p_all([P|Parsers], Inp, Index, Accum) -> 481 case P(Inp, Index) of 482 {fail, _} = Failure -> Failure; 483 {Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum]) 484 end. 485-endif. 486 487-ifdef(p_choose). 488-spec p_choose([parse_fun()]) -> parse_fun(). 489p_choose(Parsers) -> 490 fun(Input, Index) -> 491 p_attempt(Parsers, Input, Index, none) 492 end. 493 494-spec p_attempt([parse_fun()], input(), index(), none | parse_failure()) -> parse_result(). 495p_attempt([], _Input, _Index, Failure) -> Failure; 496p_attempt([P|Parsers], Input, Index, FirstFailure)-> 497 case P(Input, Index) of 498 {fail, _} = Failure -> 499 case FirstFailure of 500 none -> p_attempt(Parsers, Input, Index, Failure); 501 _ -> p_attempt(Parsers, Input, Index, FirstFailure) 502 end; 503 Result -> Result 504 end. 505-endif. 506 507-ifdef(p_zero_or_more). 508-spec p_zero_or_more(parse_fun()) -> parse_fun(). 509p_zero_or_more(P) -> 510 fun(Input, Index) -> 511 p_scan(P, Input, Index, []) 512 end. 513-endif. 514 515-ifdef(p_one_or_more). 516-spec p_one_or_more(parse_fun()) -> parse_fun(). 517p_one_or_more(P) -> 518 fun(Input, Index)-> 519 Result = p_scan(P, Input, Index, []), 520 case Result of 521 {[_|_], _, _} -> 522 Result; 523 _ -> 524 {fail, {expected, Failure, _}} = P(Input,Index), 525 {fail, {expected, {at_least_one, Failure}, Index}} 526 end 527 end. 528-endif. 529 530-ifdef(p_label). 531-spec p_label(atom(), parse_fun()) -> parse_fun(). 532p_label(Tag, P) -> 533 fun(Input, Index) -> 534 case P(Input, Index) of 535 {fail,_} = Failure -> 536 Failure; 537 {Result, InpRem, NewIndex} -> 538 {{Tag, Result}, InpRem, NewIndex} 539 end 540 end. 541-endif. 542 543-ifdef(p_scan). 544-spec p_scan(parse_fun(), input(), index(), [term()]) -> {[term()], input(), index()}. 545p_scan(_, <<>>, Index, Accum) -> {lists:reverse(Accum), <<>>, Index}; 546p_scan(P, Inp, Index, Accum) -> 547 case P(Inp, Index) of 548 {fail,_} -> {lists:reverse(Accum), Inp, Index}; 549 {Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum]) 550 end. 551-endif. 552 553-ifdef(p_string). 554-spec p_string(binary()) -> parse_fun(). 555p_string(S) -> 556 Length = erlang:byte_size(S), 557 fun(Input, Index) -> 558 try 559 <<S:Length/binary, Rest/binary>> = Input, 560 {S, Rest, p_advance_index(S, Index)} 561 catch 562 error:{badmatch,_} -> {fail, {expected, {string, S}, Index}} 563 end 564 end. 565-endif. 566 567-ifdef(p_anything). 568-spec p_anything() -> parse_fun(). 569p_anything() -> 570 fun(<<>>, Index) -> {fail, {expected, any_character, Index}}; 571 (Input, Index) when is_binary(Input) -> 572 <<C/utf8, Rest/binary>> = Input, 573 {<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)} 574 end. 575-endif. 576 577-ifdef(p_charclass). 578-spec p_charclass(string() | binary()) -> parse_fun(). 579p_charclass(Class) -> 580 {ok, RE} = re:compile(Class, [unicode, dotall]), 581 fun(Inp, Index) -> 582 case re:run(Inp, RE, [anchored]) of 583 {match, [{0, Length}|_]} -> 584 {Head, Tail} = erlang:split_binary(Inp, Length), 585 {Head, Tail, p_advance_index(Head, Index)}; 586 _ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}} 587 end 588 end. 589-endif. 590 591-ifdef(p_regexp). 592-spec p_regexp(binary()) -> parse_fun(). 593p_regexp(Regexp) -> 594 {ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]), 595 fun(Inp, Index) -> 596 case re:run(Inp, RE) of 597 {match, [{0, Length}|_]} -> 598 {Head, Tail} = erlang:split_binary(Inp, Length), 599 {Head, Tail, p_advance_index(Head, Index)}; 600 _ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}} 601 end 602 end. 603-endif. 604 605-ifdef(line). 606-spec line(index() | term()) -> pos_integer() | undefined. 607line({{line,L},_}) -> L; 608line(_) -> undefined. 609-endif. 610 611-ifdef(column). 612-spec column(index() | term()) -> pos_integer() | undefined. 613column({_,{column,C}}) -> C; 614column(_) -> undefined. 615-endif. 616 617-spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index(). 618p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings 619 lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput)); 620p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters 621 {{line, Line}, {column, Col}} = Index, 622 case MatchedInput of 623 $\n -> {{line, Line+1}, {column, 1}}; 624 _ -> {{line, Line}, {column, Col+1}} 625 end. 626