1-module(neotoma_parse).
2-export([parse/1,file/1]).
3-define(p_anything,true).
4-define(p_charclass,true).
5-define(p_choose,true).
6-define(p_label,true).
7-define(p_not,true).
8-define(p_one_or_more,true).
9-define(p_optional,true).
10-define(p_scan,true).
11-define(p_seq,true).
12-define(p_string,true).
13-define(p_zero_or_more,true).
14
15
16
17% insert escapes into a string
18-spec escape_string(string()) -> string().
19escape_string(String) -> escape_string(String, []).
20
21-spec escape_string(string(), string()) -> string().
22escape_string([], Output) ->
23  lists:reverse(Output);
24escape_string([H|T], Output) ->
25  escape_string(T,
26    case H of
27        $/  -> [$/,$\\|Output];
28        $\" -> [$\",$\\|Output];     % " comment inserted to help some editors with highlighting the generated parser
29        $\' -> [$\',$\\|Output];     % ' comment inserted to help some editors with highlighting the generated parser
30        $\b -> [$b,$\\|Output];
31        $\d -> [$d,$\\|Output];
32        $\e -> [$e,$\\|Output];
33        $\f -> [$f,$\\|Output];
34        $\n -> [$n,$\\|Output];
35        $\r -> [$r,$\\|Output];
36        $\s -> [$s,$\\|Output];
37        $\t -> [$t,$\\|Output];
38        $\v -> [$v,$\\|Output];
39        _   -> [H|Output]
40    end).
41
42-spec add_lhs(binary(), index()) -> true.
43add_lhs(Symbol, Index) ->
44  case ets:lookup(memo_table_name(), lhs) of
45    [] ->
46      ets:insert(memo_table_name(), {lhs, [{Symbol,Index}]});
47    [{lhs, L}] when is_list(L) ->
48      ets:insert(memo_table_name(), {lhs, [{Symbol,Index}|L]})
49  end.
50
51-spec add_nt(binary(), index()) -> true | ok.
52add_nt(Symbol, Index) ->
53  case ets:lookup(memo_table_name(), nts) of
54    [] ->
55      ets:insert(memo_table_name(), {nts, [{Symbol,Index}]});
56    [{nts, L}] when is_list(L) ->
57      case proplists:is_defined(Symbol, L) of
58        true ->
59          ok;
60        _ ->
61          ets:insert(memo_table_name(), {nts, [{Symbol,Index}|L]})
62      end
63  end.
64
65-spec verify_rules() -> ok | no_return().
66verify_rules() ->
67  [{lhs, LHS}] = ets:lookup(memo_table_name(), lhs),
68  [{nts, NTs}] = ets:lookup(memo_table_name(), nts),
69  [Root|NonRoots] = lists:reverse(LHS),
70  lists:foreach(fun({Sym,Idx}) ->
71                    case proplists:is_defined(Sym, NTs) of
72                      true ->
73                        ok;
74                      _ ->
75                        io:format("neotoma warning: rule '~s' is unused. ~p~n", [Sym,Idx])
76                    end
77                end, NonRoots),
78  lists:foreach(fun({S,I}) ->
79                    case proplists:is_defined(S, LHS) of
80                      true ->
81                        ok;
82                      _ ->
83                        io:format("neotoma error: nonterminal '~s' has no reduction. (found at ~p) No parser will be generated!~n", [S,I]),
84                        exit({neotoma, {no_reduction, list_to_atom(binary_to_list(S))}})
85                    end
86                end, NTs),
87    Root.
88
89-spec used_combinator(atom()) -> true.
90used_combinator(C) ->
91    case ets:lookup(memo_table_name(), combinators) of
92        [] ->
93            ets:insert(memo_table_name(), {combinators, ordsets:from_list([C])});
94        [{combinators, Cs}] ->
95            ets:insert(memo_table_name(), {combinators, ordsets:add_element(C, Cs)})
96    end.
97
98-spec used_transform_variables(binary()) -> [ 'Node' | 'Idx' ].
99used_transform_variables(Transform) ->
100  Code = unicode:characters_to_list(Transform),
101  {ok, Tokens, _} = erl_scan:string(Code),
102  used_transform_variables(Tokens, []).
103
104used_transform_variables([{var, _, Name}|Tokens], Acc) ->
105  used_transform_variables(Tokens, case Name of
106                                    'Node' -> [Name | Acc];
107                                    'Idx'  -> [Name | Acc];
108                                    _      -> Acc
109                                  end);
110used_transform_variables([_|Tokens], Acc) ->
111  used_transform_variables(Tokens, Acc);
112used_transform_variables([], Acc) ->
113  lists:usort(Acc).
114
115-spec file(file:name()) -> any().
116file(Filename) -> case file:read_file(Filename) of {ok,Bin} -> parse(Bin); Err -> Err end.
117
118-spec parse(binary() | list()) -> any().
119parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List));
120parse(Input) when is_binary(Input) ->
121  setup_memo(),
122  Result = case 'rules'(Input,{{line,1},{column,1}}) of
123             {AST, <<>>, _Index} -> AST;
124             Any -> Any
125           end,
126  release_memo(), Result.
127
128-spec 'rules'(input(), index()) -> parse_result().
129'rules'(Input, Index) ->
130  p(Input, Index, 'rules', fun(I,D) -> (p_seq([p_optional(fun 'space'/2), fun 'declaration_sequence'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2)]))(I,D) end, fun(Node, _Idx) ->
131  RootRule = verify_rules(),
132  Rules = iolist_to_binary(lists:map(fun(R) -> [R, "\n\n"] end, lists:nth(2, Node))),
133  Code = case lists:nth(4, Node) of
134             {code, Block} -> Block;
135             _ -> []
136         end,
137  [{rules, Rules},
138   {code, Code},
139   {root, RootRule},
140   {transform, ets:lookup(memo_table_name(),gen_transform)},
141   {combinators, ets:lookup_element(memo_table_name(), combinators, 2)}]
142
143 end).
144
145-spec 'declaration_sequence'(input(), index()) -> parse_result().
146'declaration_sequence'(Input, Index) ->
147  p(Input, Index, 'declaration_sequence', fun(I,D) -> (p_seq([p_label('head', fun 'declaration'/2), p_label('tail', p_zero_or_more(p_seq([fun 'space'/2, fun 'declaration'/2])))]))(I,D) end, fun(Node, _Idx) ->
148  FirstRule = proplists:get_value(head, Node),
149  OtherRules =  [I || [_,I] <- proplists:get_value(tail, Node, [])],
150  [FirstRule|OtherRules]
151 end).
152
153-spec 'declaration'(input(), index()) -> parse_result().
154'declaration'(Input, Index) ->
155  p(Input, Index, 'declaration', fun(I,D) -> (p_seq([fun 'nonterminal'/2, p_zero_or_more(fun 'space'/2), p_string(<<"<-">>), p_zero_or_more(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2), p_string(<<";">>)]))(I,D) end, fun(Node, _Idx) ->
156  [{nonterminal,Symbol}|Tail] = Node,
157  add_lhs(Symbol, Index),
158  Transform = case lists:nth(6,Tail) of
159                  {code, CodeBlock} -> CodeBlock;
160                  _ ->
161                      ets:insert_new(memo_table_name(),{gen_transform, true}),
162                      ["transform('",Symbol,"', Node, Idx)"]
163                  end,
164  TransformArgs = case used_transform_variables(Transform) of
165    []              -> "_Node, _Idx";
166    ['Idx']         -> "_Node, Idx";
167    ['Node']        -> "Node, _Idx";
168    ['Idx', 'Node'] -> "Node, Idx"
169  end,
170  ["-spec '", Symbol, "'(input(), index()) -> parse_result().\n",
171   "'",Symbol,"'","(Input, Index) ->\n  ",
172        "p(Input, Index, '",Symbol,"', fun(I,D) -> (",
173        lists:nth(4, Tail),
174        ")(I,D) end, fun(", TransformArgs, ") ->",Transform," end)."]
175 end).
176
177-spec 'parsing_expression'(input(), index()) -> parse_result().
178'parsing_expression'(Input, Index) ->
179  p(Input, Index, 'parsing_expression', fun(I,D) -> (p_choose([fun 'choice'/2, fun 'sequence'/2, fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
180
181-spec 'choice'(input(), index()) -> parse_result().
182'choice'(Input, Index) ->
183  p(Input, Index, 'choice', fun(I,D) -> (p_seq([p_label('head', fun 'alternative'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, p_string(<<"\/">>), fun 'space'/2, fun 'alternative'/2])))]))(I,D) end, fun(Node, _Idx) ->
184  Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)],
185  Head = proplists:get_value(head, Node),
186  Statements = [[", ", TS] ||  TS <- Tail],
187  used_combinator(p_choose),
188  ["p_choose([", Head, Statements, "])"]
189 end).
190
191-spec 'alternative'(input(), index()) -> parse_result().
192'alternative'(Input, Index) ->
193  p(Input, Index, 'alternative', fun(I,D) -> (p_choose([fun 'sequence'/2, fun 'labeled_primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
194
195-spec 'primary'(input(), index()) -> parse_result().
196'primary'(Input, Index) ->
197  p(Input, Index, 'primary', fun(I,D) -> (p_choose([p_seq([fun 'prefix'/2, fun 'atomic'/2]), p_seq([fun 'atomic'/2, fun 'suffix'/2]), fun 'atomic'/2]))(I,D) end, fun(Node, _Idx) ->
198case Node of
199  [Atomic, one_or_more] ->
200        used_combinator(p_one_or_more),
201        used_combinator(p_scan),
202        ["p_one_or_more(", Atomic, ")"];
203  [Atomic, zero_or_more] ->
204        used_combinator(p_zero_or_more),
205        used_combinator(p_scan),
206        ["p_zero_or_more(", Atomic, ")"];
207  [Atomic, optional] ->
208        used_combinator(p_optional),
209        ["p_optional(", Atomic, ")"];
210  [assert, Atomic] ->
211        used_combinator(p_assert),
212        ["p_assert(", Atomic, ")"];
213  [not_, Atomic] ->
214        used_combinator(p_not),
215        ["p_not(", Atomic, ")"];
216  _ -> Node
217end
218 end).
219
220-spec 'sequence'(input(), index()) -> parse_result().
221'sequence'(Input, Index) ->
222  p(Input, Index, 'sequence', fun(I,D) -> (p_seq([p_label('head', fun 'labeled_primary'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, fun 'labeled_primary'/2])))]))(I,D) end, fun(Node, _Idx) ->
223  Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)],
224  Head = proplists:get_value(head, Node),
225  Statements = [[", ", TS] || TS <- Tail],
226  used_combinator(p_seq),
227  ["p_seq([", Head, Statements, "])"]
228 end).
229
230-spec 'labeled_primary'(input(), index()) -> parse_result().
231'labeled_primary'(Input, Index) ->
232  p(Input, Index, 'labeled_primary', fun(I,D) -> (p_seq([p_optional(fun 'label'/2), fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->
233  case hd(Node) of
234    [] -> lists:nth(2, Node);
235    Label ->
236          used_combinator(p_label),
237          ["p_label('",  Label, "', ", lists:nth(2, Node), ")"]
238  end
239 end).
240
241-spec 'label'(input(), index()) -> parse_result().
242'label'(Input, Index) ->
243  p(Input, Index, 'label', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2), p_string(<<":">>)]))(I,D) end, fun(Node, _Idx) ->
244  lists:sublist(Node, length(Node)-1)
245 end).
246
247-spec 'suffix'(input(), index()) -> parse_result().
248'suffix'(Input, Index) ->
249  p(Input, Index, 'suffix', fun(I,D) -> (p_choose([fun 'repetition_suffix'/2, fun 'optional_suffix'/2]))(I,D) end, fun(Node, _Idx) ->
250  case Node of
251    <<"*">> -> zero_or_more;
252    <<"+">> -> one_or_more;
253    <<"?">> -> optional
254  end
255 end).
256
257-spec 'optional_suffix'(input(), index()) -> parse_result().
258'optional_suffix'(Input, Index) ->
259  p(Input, Index, 'optional_suffix', fun(I,D) -> (p_string(<<"?">>))(I,D) end, fun(Node, _Idx) ->Node end).
260
261-spec 'repetition_suffix'(input(), index()) -> parse_result().
262'repetition_suffix'(Input, Index) ->
263  p(Input, Index, 'repetition_suffix', fun(I,D) -> (p_choose([p_string(<<"+">>), p_string(<<"*">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
264
265-spec 'prefix'(input(), index()) -> parse_result().
266'prefix'(Input, Index) ->
267  p(Input, Index, 'prefix', fun(I,D) -> (p_choose([p_string(<<"&">>), p_string(<<"!">>)]))(I,D) end, fun(Node, _Idx) ->
268  case Node of
269    <<"&">> -> assert;
270    <<"!">> -> not_
271  end
272 end).
273
274-spec 'atomic'(input(), index()) -> parse_result().
275'atomic'(Input, Index) ->
276  p(Input, Index, 'atomic', fun(I,D) -> (p_choose([fun 'terminal'/2, fun 'nonterminal'/2, fun 'parenthesized_expression'/2]))(I,D) end, fun(Node, _Idx) ->
277case Node of
278  {nonterminal, Symbol} ->
279                [<<"fun '">>, Symbol, <<"'/2">>];
280  _ -> Node
281end
282 end).
283
284-spec 'parenthesized_expression'(input(), index()) -> parse_result().
285'parenthesized_expression'(Input, Index) ->
286  p(Input, Index, 'parenthesized_expression', fun(I,D) -> (p_seq([p_string(<<"(">>), p_optional(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_string(<<")">>)]))(I,D) end, fun(Node, _Idx) ->lists:nth(3, Node) end).
287
288-spec 'nonterminal'(input(), index()) -> parse_result().
289'nonterminal'(Input, Index) ->
290  p(Input, Index, 'nonterminal', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2)]))(I,D) end, fun(Node, Idx) ->
291  Symbol = iolist_to_binary(Node),
292  add_nt(Symbol, Idx),
293  {nonterminal, Symbol}
294 end).
295
296-spec 'terminal'(input(), index()) -> parse_result().
297'terminal'(Input, Index) ->
298  p(Input, Index, 'terminal', fun(I,D) -> (p_choose([fun 'regexp_string'/2, fun 'quoted_string'/2, fun 'character_class'/2, fun 'anything_symbol'/2]))(I,D) end, fun(Node, _Idx) ->Node end).
299
300-spec 'regexp_string'(input(), index()) -> parse_result().
301'regexp_string'(Input, Index) ->
302  p(Input, Index, 'regexp_string', fun(I,D) -> (p_seq([p_string(<<"#">>), p_label('string', p_one_or_more(p_seq([p_not(p_string(<<"#">>)), p_choose([p_string(<<"\\#">>), p_anything()])]))), p_string(<<"#">>)]))(I,D) end, fun(Node, _Idx) ->
303  used_combinator(p_regexp),
304  ["p_regexp(<<\"",
305	% Escape \ and " as they are used in erlang string. Other sumbol stay as is.
306	%  \ -> \\
307	%  " -> \"
308   re:replace(proplists:get_value(string, Node), "\"|\\\\", "\\\\&", [{return, binary}, global]),
309   "\">>)"]
310 end).
311
312-spec 'quoted_string'(input(), index()) -> parse_result().
313'quoted_string'(Input, Index) ->
314  p(Input, Index, 'quoted_string', fun(I,D) -> (p_choose([fun 'single_quoted_string'/2, fun 'double_quoted_string'/2]))(I,D) end, fun(Node, _Idx) ->
315  used_combinator(p_string),
316  lists:flatten(["p_string(<<\"",
317   escape_string(binary_to_list(iolist_to_binary(proplists:get_value(string, Node)))),
318   "\">>)"])
319 end).
320
321-spec 'double_quoted_string'(input(), index()) -> parse_result().
322'double_quoted_string'(Input, Index) ->
323  p(Input, Index, 'double_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\"">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\"">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\"">>), p_anything()])]))), p_string(<<"\"">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
324
325-spec 'single_quoted_string'(input(), index()) -> parse_result().
326'single_quoted_string'(Input, Index) ->
327  p(Input, Index, 'single_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\'">>), p_anything()])]))), p_string(<<"\'">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
328
329-spec 'character_class'(input(), index()) -> parse_result().
330'character_class'(Input, Index) ->
331  p(Input, Index, 'character_class', fun(I,D) -> (p_seq([p_string(<<"[">>), p_label('characters', p_one_or_more(p_seq([p_not(p_string(<<"]">>)), p_choose([p_seq([p_string(<<"\\\\">>), p_anything()]), p_seq([p_not(p_string(<<"\\\\">>)), p_anything()])])]))), p_string(<<"]">>)]))(I,D) end, fun(Node, _Idx) ->
332  used_combinator(p_charclass),
333  ["p_charclass(<<\"[",
334   escape_string(binary_to_list(iolist_to_binary(proplists:get_value(characters, Node)))),
335   "]\">>)"]
336 end).
337
338-spec 'anything_symbol'(input(), index()) -> parse_result().
339'anything_symbol'(Input, Index) ->
340  p(Input, Index, 'anything_symbol', fun(I,D) -> (p_string(<<".">>))(I,D) end, fun(_Node, _Idx) -> used_combinator(p_anything), <<"p_anything()">>  end).
341
342-spec 'alpha_char'(input(), index()) -> parse_result().
343'alpha_char'(Input, Index) ->
344  p(Input, Index, 'alpha_char', fun(I,D) -> (p_charclass(<<"[A-Za-z_]">>))(I,D) end, fun(Node, _Idx) ->Node end).
345
346-spec 'alphanumeric_char'(input(), index()) -> parse_result().
347'alphanumeric_char'(Input, Index) ->
348  p(Input, Index, 'alphanumeric_char', fun(I,D) -> (p_choose([fun 'alpha_char'/2, p_charclass(<<"[0-9]">>)]))(I,D) end, fun(Node, _Idx) ->Node end).
349
350-spec 'space'(input(), index()) -> parse_result().
351'space'(Input, Index) ->
352  p(Input, Index, 'space', fun(I,D) -> (p_one_or_more(p_choose([fun 'white'/2, fun 'comment_to_eol'/2])))(I,D) end, fun(Node, _Idx) ->Node end).
353
354-spec 'comment_to_eol'(input(), index()) -> parse_result().
355'comment_to_eol'(Input, Index) ->
356  p(Input, Index, 'comment_to_eol', fun(I,D) -> (p_seq([p_not(p_string(<<"%{">>)), p_string(<<"%">>), p_zero_or_more(p_seq([p_not(p_string(<<"\n">>)), p_anything()]))]))(I,D) end, fun(Node, _Idx) ->Node end).
357
358-spec 'white'(input(), index()) -> parse_result().
359'white'(Input, Index) ->
360  p(Input, Index, 'white', fun(I,D) -> (p_charclass(<<"[\s\t\n\r]">>))(I,D) end, fun(Node, _Idx) ->Node end).
361
362-spec 'code_block'(input(), index()) -> parse_result().
363'code_block'(Input, Index) ->
364  p(Input, Index, 'code_block', fun(I,D) -> (p_choose([p_seq([p_string(<<"%{">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\%">>), p_string(<<"$%">>), p_seq([p_not(p_string(<<"%}">>)), p_anything()])]))), p_string(<<"%}">>)]), p_seq([p_string(<<"`">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\`">>), p_string(<<"$`">>), p_seq([p_not(p_string(<<"`">>)), p_anything()])]))), p_string(<<"`">>)]), p_string(<<"~">>)]))(I,D) end, fun(Node, _Idx) ->
365   case Node of
366       <<"~">> -> {code, <<"Node">>};
367       _   -> {code, proplists:get_value('code', Node)}
368   end
369 end).
370
371
372
373-file("peg_includes.hrl", 1).
374-type index() :: {{line, pos_integer()}, {column, pos_integer()}}.
375-type input() :: binary().
376-type parse_failure() :: {fail, term()}.
377-type parse_success() :: {term(), input(), index()}.
378-type parse_result() :: parse_failure() | parse_success().
379-type parse_fun() :: fun((input(), index()) -> parse_result()).
380-type xform_fun() :: fun((input(), index()) -> term()).
381
382-spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result().
383p(Inp, StartIndex, Name, ParseFun, TransformFun) ->
384  case get_memo(StartIndex, Name) of      % See if the current reduction is memoized
385    {ok, Memo} -> %Memo;                     % If it is, return the stored result
386      Memo;
387    _ ->                                        % If not, attempt to parse
388      Result = case ParseFun(Inp, StartIndex) of
389        {fail,_} = Failure ->                       % If it fails, memoize the failure
390          Failure;
391        {Match, InpRem, NewIndex} ->               % If it passes, transform and memoize the result.
392          Transformed = TransformFun(Match, StartIndex),
393          {Transformed, InpRem, NewIndex}
394      end,
395      memoize(StartIndex, Name, Result),
396      Result
397  end.
398
399-spec setup_memo() -> ets:tid().
400setup_memo() ->
401  put({parse_memo_table, ?MODULE}, ets:new(?MODULE, [set])).
402
403-spec release_memo() -> true.
404release_memo() ->
405  ets:delete(memo_table_name()).
406
407-spec memoize(index(), atom(), parse_result()) -> true.
408memoize(Index, Name, Result) ->
409  Memo = case ets:lookup(memo_table_name(), Index) of
410              [] -> [];
411              [{Index, Plist}] -> Plist
412         end,
413  ets:insert(memo_table_name(), {Index, [{Name, Result}|Memo]}).
414
415-spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}.
416get_memo(Index, Name) ->
417  case ets:lookup(memo_table_name(), Index) of
418    [] -> {error, not_found};
419    [{Index, Plist}] ->
420      case proplists:lookup(Name, Plist) of
421        {Name, Result}  -> {ok, Result};
422        _  -> {error, not_found}
423      end
424    end.
425
426-spec memo_table_name() -> ets:tid().
427memo_table_name() ->
428    get({parse_memo_table, ?MODULE}).
429
430-ifdef(p_eof).
431-spec p_eof() -> parse_fun().
432p_eof() ->
433  fun(<<>>, Index) -> {eof, [], Index};
434     (_, Index) -> {fail, {expected, eof, Index}} end.
435-endif.
436
437-ifdef(p_optional).
438-spec p_optional(parse_fun()) -> parse_fun().
439p_optional(P) ->
440  fun(Input, Index) ->
441      case P(Input, Index) of
442        {fail,_} -> {[], Input, Index};
443        {_, _, _} = Success -> Success
444      end
445  end.
446-endif.
447
448-ifdef(p_not).
449-spec p_not(parse_fun()) -> parse_fun().
450p_not(P) ->
451  fun(Input, Index)->
452      case P(Input,Index) of
453        {fail,_} ->
454          {[], Input, Index};
455        {Result, _, _} -> {fail, {expected, {no_match, Result},Index}}
456      end
457  end.
458-endif.
459
460-ifdef(p_assert).
461-spec p_assert(parse_fun()) -> parse_fun().
462p_assert(P) ->
463  fun(Input,Index) ->
464      case P(Input,Index) of
465        {fail,_} = Failure-> Failure;
466        _ -> {[], Input, Index}
467      end
468  end.
469-endif.
470
471-ifdef(p_seq).
472-spec p_seq([parse_fun()]) -> parse_fun().
473p_seq(P) ->
474  fun(Input, Index) ->
475      p_all(P, Input, Index, [])
476  end.
477
478-spec p_all([parse_fun()], input(), index(), [term()]) -> parse_result().
479p_all([], Inp, Index, Accum ) -> {lists:reverse( Accum ), Inp, Index};
480p_all([P|Parsers], Inp, Index, Accum) ->
481  case P(Inp, Index) of
482    {fail, _} = Failure -> Failure;
483    {Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum])
484  end.
485-endif.
486
487-ifdef(p_choose).
488-spec p_choose([parse_fun()]) -> parse_fun().
489p_choose(Parsers) ->
490  fun(Input, Index) ->
491      p_attempt(Parsers, Input, Index, none)
492  end.
493
494-spec p_attempt([parse_fun()], input(), index(), none | parse_failure()) -> parse_result().
495p_attempt([], _Input, _Index, Failure) -> Failure;
496p_attempt([P|Parsers], Input, Index, FirstFailure)->
497  case P(Input, Index) of
498    {fail, _} = Failure ->
499      case FirstFailure of
500        none -> p_attempt(Parsers, Input, Index, Failure);
501        _ -> p_attempt(Parsers, Input, Index, FirstFailure)
502      end;
503    Result -> Result
504  end.
505-endif.
506
507-ifdef(p_zero_or_more).
508-spec p_zero_or_more(parse_fun()) -> parse_fun().
509p_zero_or_more(P) ->
510  fun(Input, Index) ->
511      p_scan(P, Input, Index, [])
512  end.
513-endif.
514
515-ifdef(p_one_or_more).
516-spec p_one_or_more(parse_fun()) -> parse_fun().
517p_one_or_more(P) ->
518  fun(Input, Index)->
519      Result = p_scan(P, Input, Index, []),
520      case Result of
521        {[_|_], _, _} ->
522          Result;
523        _ ->
524          {fail, {expected, Failure, _}} = P(Input,Index),
525          {fail, {expected, {at_least_one, Failure}, Index}}
526      end
527  end.
528-endif.
529
530-ifdef(p_label).
531-spec p_label(atom(), parse_fun()) -> parse_fun().
532p_label(Tag, P) ->
533  fun(Input, Index) ->
534      case P(Input, Index) of
535        {fail,_} = Failure ->
536           Failure;
537        {Result, InpRem, NewIndex} ->
538          {{Tag, Result}, InpRem, NewIndex}
539      end
540  end.
541-endif.
542
543-ifdef(p_scan).
544-spec p_scan(parse_fun(), input(), index(), [term()]) -> {[term()], input(), index()}.
545p_scan(_, <<>>, Index, Accum) -> {lists:reverse(Accum), <<>>, Index};
546p_scan(P, Inp, Index, Accum) ->
547  case P(Inp, Index) of
548    {fail,_} -> {lists:reverse(Accum), Inp, Index};
549    {Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum])
550  end.
551-endif.
552
553-ifdef(p_string).
554-spec p_string(binary()) -> parse_fun().
555p_string(S) ->
556    Length = erlang:byte_size(S),
557    fun(Input, Index) ->
558      try
559          <<S:Length/binary, Rest/binary>> = Input,
560          {S, Rest, p_advance_index(S, Index)}
561      catch
562          error:{badmatch,_} -> {fail, {expected, {string, S}, Index}}
563      end
564    end.
565-endif.
566
567-ifdef(p_anything).
568-spec p_anything() -> parse_fun().
569p_anything() ->
570  fun(<<>>, Index) -> {fail, {expected, any_character, Index}};
571     (Input, Index) when is_binary(Input) ->
572          <<C/utf8, Rest/binary>> = Input,
573          {<<C/utf8>>, Rest, p_advance_index(<<C/utf8>>, Index)}
574  end.
575-endif.
576
577-ifdef(p_charclass).
578-spec p_charclass(string() | binary()) -> parse_fun().
579p_charclass(Class) ->
580    {ok, RE} = re:compile(Class, [unicode, dotall]),
581    fun(Inp, Index) ->
582            case re:run(Inp, RE, [anchored]) of
583                {match, [{0, Length}|_]} ->
584                    {Head, Tail} = erlang:split_binary(Inp, Length),
585                    {Head, Tail, p_advance_index(Head, Index)};
586                _ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}}
587            end
588    end.
589-endif.
590
591-ifdef(p_regexp).
592-spec p_regexp(binary()) -> parse_fun().
593p_regexp(Regexp) ->
594    {ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]),
595    fun(Inp, Index) ->
596        case re:run(Inp, RE) of
597            {match, [{0, Length}|_]} ->
598                {Head, Tail} = erlang:split_binary(Inp, Length),
599                {Head, Tail, p_advance_index(Head, Index)};
600            _ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}}
601        end
602    end.
603-endif.
604
605-ifdef(line).
606-spec line(index() | term()) -> pos_integer() | undefined.
607line({{line,L},_}) -> L;
608line(_) -> undefined.
609-endif.
610
611-ifdef(column).
612-spec column(index() | term()) -> pos_integer() | undefined.
613column({_,{column,C}}) -> C;
614column(_) -> undefined.
615-endif.
616
617-spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index().
618p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings
619  lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput));
620p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters
621  {{line, Line}, {column, Col}} = Index,
622  case MatchedInput of
623    $\n -> {{line, Line+1}, {column, 1}};
624    _ -> {{line, Line}, {column, Col+1}}
625  end.
626