1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2008-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20-module(re).
21-export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]).
22
23-type mp() :: {re_pattern, _, _, _, _}.
24
25-type nl_spec() :: cr | crlf | lf | anycrlf | any.
26
27-type compile_option() :: unicode | anchored | caseless | dollar_endonly
28                        | dotall | extended | firstline | multiline
29                        | no_auto_capture | dupnames | ungreedy
30                        | {newline, nl_spec()}
31                        | bsr_anycrlf | bsr_unicode
32                        | no_start_optimize | ucp | never_utf.
33
34%%% BIFs
35
36-export([internal_run/4]).
37
38-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]).
39
40-spec version() -> binary().
41
42%% We must inline these functions so that the stacktrace points to
43%% the correct function.
44-compile({inline, [badarg_with_cause/2, badarg_with_info/1]}).
45
46version() ->
47    erlang:nif_error(undef).
48
49-spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when
50      Regexp :: iodata(),
51      MP :: mp(),
52      ErrSpec :: {ErrString :: string(), Position :: non_neg_integer()}.
53
54compile(_) ->
55    erlang:nif_error(undef).
56
57-spec compile(Regexp, Options) -> {ok, MP} | {error, ErrSpec} when
58      Regexp :: iodata() | unicode:charlist(),
59      Options :: [Option],
60      Option :: compile_option(),
61      MP :: mp(),
62      ErrSpec :: {ErrString :: string(), Position :: non_neg_integer()}.
63
64compile(_, _) ->
65    erlang:nif_error(undef).
66
67-spec run(Subject, RE) -> {match, Captured} | nomatch when
68      Subject :: iodata() | unicode:charlist(),
69      RE :: mp() | iodata(),
70      Captured :: [CaptureData],
71      CaptureData :: {integer(), integer()}.
72
73run(_, _) ->
74    erlang:nif_error(undef).
75
76-spec run(Subject, RE, Options) -> {match, Captured} |
77                                   match |
78                                   nomatch |
79				   {error, ErrType} when
80      Subject :: iodata() | unicode:charlist(),
81      RE :: mp() | iodata() | unicode:charlist(),
82      Options :: [Option],
83      Option :: anchored | global | notbol | noteol | notempty
84	      | notempty_atstart | report_errors
85              | {offset, non_neg_integer()} |
86		{match_limit, non_neg_integer()} |
87		{match_limit_recursion, non_neg_integer()} |
88                {newline, NLSpec :: nl_spec()} |
89                bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
90                {capture, ValueSpec, Type} | CompileOpt,
91      Type :: index | list | binary,
92      ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
93      ValueList :: [ValueID],
94      ValueID :: integer() | string() | atom(),
95      CompileOpt :: compile_option(),
96      Captured :: [CaptureData] | [[CaptureData]],
97      CaptureData :: {integer(), integer()}
98                   | ListConversionData
99                   | binary(),
100      ListConversionData :: string()
101                          | {error, string(), binary()}
102                          | {incomplete, string(), binary()},
103      ErrType :: match_limit | match_limit_recursion | {compile,  CompileErr},
104      CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}.
105
106run(_, _, _) ->
107    erlang:nif_error(undef).
108
109-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} |
110                                                       match |
111                                                       nomatch |
112                                                       {error, ErrType} when
113      Subject :: iodata() | unicode:charlist(),
114      RE :: mp() | iodata() | unicode:charlist(),
115      Options :: [Option],
116      Option :: anchored | global | notbol | noteol | notempty
117	      | notempty_atstart | report_errors
118              | {offset, non_neg_integer()} |
119		{match_limit, non_neg_integer()} |
120		{match_limit_recursion, non_neg_integer()} |
121                {newline, NLSpec :: nl_spec()} |
122                bsr_anycrlf | bsr_unicode | {capture, ValueSpec} |
123                {capture, ValueSpec, Type} | CompileOpt,
124      Type :: index | list | binary,
125      ValueSpec :: all | all_but_first | all_names | first | none | ValueList,
126      ValueList :: [ValueID],
127      ValueID :: integer() | string() | atom(),
128      CompileOpt :: compile_option(),
129      Captured :: [CaptureData] | [[CaptureData]],
130      CaptureData :: {integer(), integer()}
131                   | ListConversionData
132                   | binary(),
133      ListConversionData :: string()
134                          | {error, string(), binary()}
135                          | {incomplete, string(), binary()},
136      ErrType :: match_limit | match_limit_recursion | {compile,  CompileErr},
137      CompileErr :: {ErrString :: string(), Position :: non_neg_integer()},
138      FirstCall :: boolean().
139
140internal_run(_, _, _, _) ->
141    erlang:nif_error(undef).
142
143-spec inspect(MP,Item) -> {namelist, [ binary() ]} when
144      MP :: mp(),
145      Item :: namelist.
146
147inspect(_,_) ->
148    erlang:nif_error(undef).
149
150
151%%% End of BIFs
152
153-spec split(Subject, RE) -> SplitList when
154      Subject :: iodata() | unicode:charlist(),
155      RE :: mp() | iodata(),
156      SplitList :: [iodata() | unicode:charlist()].
157
158split(Subject,RE) ->
159    try
160        split(Subject,RE,[])
161    catch
162        error:_ ->
163            badarg_with_info([Subject,RE])
164    end.
165
166-spec split(Subject, RE, Options) -> SplitList when
167      Subject :: iodata() | unicode:charlist(),
168      RE :: mp() | iodata() | unicode:charlist(),
169      Options :: [ Option ],
170      Option :: anchored | notbol | noteol | notempty | notempty_atstart
171              | {offset, non_neg_integer()} | {newline, nl_spec()}
172              | {match_limit, non_neg_integer()}
173              | {match_limit_recursion, non_neg_integer()}
174              | bsr_anycrlf | bsr_unicode | {return, ReturnType}
175              | {parts, NumParts} | group | trim | CompileOpt,
176      NumParts :: non_neg_integer() | infinity,
177      ReturnType :: iodata | list | binary,
178      CompileOpt :: compile_option(),
179      SplitList :: [RetData] | [GroupedRetData],
180      GroupedRetData :: [RetData],
181      RetData :: iodata() | unicode:charlist() | binary() | list().
182
183split(Subject,RE,Options) ->
184    try
185    {NewOpt,Convert,Limit,Strip,Group} =
186	process_split_params(Options,iodata,-1,false,false),
187    Unicode = check_for_unicode(RE, Options),
188    FlatSubject = to_binary(Subject, Unicode),
189    case compile_split(RE,NewOpt) of
190	{error,_Err} ->
191	    throw(badre);
192	{PreCompiled, NumSub, RunOpt} ->
193	    %% OK, lets run
194	    case re:run(FlatSubject,PreCompiled,RunOpt ++ [global]) of
195		nomatch ->
196		    case Group of
197			true ->
198			    convert_any_split_result([[FlatSubject]],
199						     Convert, Unicode, true);
200			false ->
201			    convert_any_split_result([FlatSubject],
202						     Convert, Unicode, false)
203		    end;
204		{match, Matches} ->
205		    Res = do_split(FlatSubject, 0, Matches, NumSub,
206				   Limit, Group),
207		    Stripped = case Strip of
208				   true ->
209				       backstrip_empty(Res,Group);
210				   false ->
211				       Res
212			       end,
213		    convert_any_split_result(Stripped, Convert, Unicode, Group)
214	    end
215    end
216    catch
217	throw:badopt ->
218	    badarg_with_cause([Subject,RE,Options], badopt);
219	throw:badre ->
220	    badarg_with_info([Subject,RE,Options]);
221	error:badarg ->
222	    badarg_with_info([Subject,RE,Options])
223    end.
224
225backstrip_empty(List, false) ->
226    do_backstrip_empty(List);
227backstrip_empty(List, true) ->
228    do_backstrip_empty_g(List).
229
230do_backstrip_empty_g([]) ->
231    [];
232do_backstrip_empty_g([H]) ->
233    case do_backstrip_empty(H) of
234	[] ->
235	    [];
236	_ ->
237	    [H]
238    end;
239do_backstrip_empty_g([H|T]) ->
240    case do_backstrip_empty_g(T) of
241	[] ->
242	    case do_backstrip_empty(H) of
243		[] ->
244		    [];
245		_ ->
246		    [H]
247	    end;
248	Other ->
249	    [H|Other]
250    end.
251
252do_backstrip_empty([]) ->
253    [];
254do_backstrip_empty([<<>>]) ->
255    [];
256do_backstrip_empty([<<>>|T]) ->
257    case do_backstrip_empty(T) of
258	[] ->
259	    [];
260	Other ->
261	    [<<>>|Other]
262    end;
263do_backstrip_empty([H|T]) ->
264    [H|do_backstrip_empty(T)].
265
266convert_any_split_result(List,Type,Uni,true) ->
267    [ convert_split_result(Part,Type,Uni) || Part <- List ];
268convert_any_split_result(List,Type,Uni, false) ->
269    convert_split_result(List,Type,Uni).
270
271convert_split_result(List, iodata, _Unicode) ->
272    List;
273convert_split_result(List, binary, _Unicode) ->
274    %% As it happens, the iodata is actually binaries
275    List;
276convert_split_result(List, list, true) ->
277    [unicode:characters_to_list(Element,unicode) || Element <- List];
278convert_split_result(List, list, false) ->
279    [binary_to_list(Element) || Element <- List].
280
281do_split(Subj, Off,  _, _, 0, false) ->
282    <<_:Off/binary,Rest/binary>> = Subj,
283    [Rest];
284do_split(Subj, Off, [], _, _, false) ->
285    <<_:Off/binary,Rest/binary>> = Subj,
286    [Rest];
287do_split(Subj, Off, _, _, _,false) when byte_size(Subj) =< Off ->
288    [<<>>];
289do_split(Subj, Off,  _, _, 0, true) ->
290    <<_:Off/binary,Rest/binary>> = Subj,
291    [[Rest]];
292do_split(Subj, Off, [], _, _, true) ->
293    <<_:Off/binary,Rest/binary>> = Subj,
294    [[Rest]];
295do_split(Subj, Off, _, _, _,true) when byte_size(Subj) =< Off ->
296    [[<<>>]];
297do_split(Subj, Offset, [[{MainI,MainL}|Sub]|T], NumSub, Limit, Group) ->
298    NewOffset = MainI+MainL,
299    KeptLen =  MainI - Offset,
300    case {KeptLen,empty_sub(Sub),MainL} of
301	{0,true,0} ->
302	    do_split(Subj,NewOffset,T,NumSub,Limit,Group);
303	_ ->
304	    <<_:Offset/binary,Keep:KeptLen/binary,_/binary>> = Subj,
305	    ESub = extend_subpatterns(Sub,NumSub),
306	    Tail = do_split(Subj, NewOffset, T, NumSub, Limit - 1,Group),
307	    case Group of
308		false ->
309		    [Keep | dig_subpatterns(Subj,lists:reverse(ESub),Tail)];
310		true ->
311		    [[Keep | dig_subpatterns(Subj,lists:reverse(ESub),[])]|
312		     Tail]
313	    end
314    end.
315empty_sub([]) ->
316    true;
317empty_sub([{_,0}|T]) ->
318    empty_sub(T);
319empty_sub(_) ->
320    false.
321
322dig_subpatterns(_,[],Acc) ->
323    Acc;
324dig_subpatterns(Subj,[{-1,0}|T],Acc) ->
325    dig_subpatterns(Subj,T,[<<>>|Acc]);
326dig_subpatterns(Subj,[{I,L}|T],Acc) ->
327    <<_:I/binary,Part:L/binary,_/binary>> = Subj,
328    dig_subpatterns(Subj,T,[Part|Acc]).
329
330extend_subpatterns(_,0) ->
331    [];
332extend_subpatterns([],N) ->
333    [{0,0} | extend_subpatterns([],N-1)];
334extend_subpatterns([H|T],N) ->
335    [H | extend_subpatterns(T,N-1)].
336
337compile_split({re_pattern,N,_,_,_} = Comp, Options) ->
338    {Comp,N,Options};
339compile_split(Pat,Options0) when not is_tuple(Pat) ->
340    Options = lists:filter(fun(O) ->
341				   (not runopt(O))
342			   end, Options0),
343    case re:compile(Pat,Options) of
344	{error,Err} ->
345	    {error,Err};
346	{ok, {re_pattern,N,_,_,_} = Comp} ->
347	    NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0),
348	    {Comp,N,NewOpt}
349    end;
350compile_split(_,_) ->
351    throw(badre).
352
353-spec replace(Subject, RE, Replacement) -> iodata() | unicode:charlist() when
354      Subject :: iodata() | unicode:charlist(),
355      RE :: mp() | iodata(),
356      Replacement :: iodata() | unicode:charlist().
357
358replace(Subject,RE,Replacement) ->
359    try
360        replace(Subject,RE,Replacement,[])
361    catch
362        error:_ ->
363            badarg_with_info([Subject,RE,Replacement])
364    end.
365
366-spec replace(Subject, RE, Replacement, Options) -> iodata() | unicode:charlist() when
367      Subject :: iodata() | unicode:charlist(),
368      RE :: mp() | iodata() | unicode:charlist(),
369      Replacement :: iodata() | unicode:charlist(),
370      Options :: [Option],
371      Option :: anchored | global | notbol | noteol | notempty
372	      | notempty_atstart
373              | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf
374              | {match_limit, non_neg_integer()}
375              | {match_limit_recursion, non_neg_integer()}
376              | bsr_unicode | {return, ReturnType} | CompileOpt,
377      ReturnType :: iodata | list | binary,
378      CompileOpt :: compile_option(),
379      NLSpec :: cr | crlf | lf | anycrlf | any.
380
381replace(Subject,RE,Replacement,Options) ->
382    try
383    {NewOpt,Convert} = process_repl_params(Options,iodata),
384    Unicode = check_for_unicode(RE, Options),
385    FlatSubject = to_binary(Subject, Unicode),
386    FlatReplacement = to_binary(Replacement, Unicode),
387    IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt),
388	case Convert of
389	    iodata ->
390		IoList;
391	    binary ->
392		case Unicode of
393		    false ->
394			iolist_to_binary(IoList);
395		    true ->
396			unicode:characters_to_binary(IoList,unicode)
397		end;
398	    list ->
399		case Unicode of
400		    false ->
401			binary_to_list(iolist_to_binary(IoList));
402		    true ->
403			unicode:characters_to_list(IoList,unicode)
404		end
405	end
406    catch
407	throw:badopt ->
408	    badarg_with_cause([Subject,RE,Replacement,Options], badopt);
409	throw:badre ->
410	    badarg_with_info([Subject,RE,Replacement,Options]);
411	error:badarg ->
412	    badarg_with_info([Subject,RE,Replacement,Options])
413    end.
414
415
416do_replace(FlatSubject,Subject,RE,Replacement,Options) ->
417    case re:run(FlatSubject,RE,Options) of
418	nomatch ->
419	    Subject;
420	{match,[Mlist|T]} when is_list(Mlist) ->
421	    apply_mlist(FlatSubject,Replacement,[Mlist|T]);
422	{match,Slist} ->
423	    apply_mlist(FlatSubject,Replacement,[Slist])
424    end.
425
426process_repl_params([],Convert) ->
427    {[],Convert};
428process_repl_params([report_errors|_],_) ->
429    throw(badopt);
430process_repl_params([{capture,_,_}|_],_) ->
431    throw(badopt);
432process_repl_params([{capture,_}|_],_) ->
433    throw(badopt);
434process_repl_params([{return,iodata}|T],_C) ->
435    process_repl_params(T,iodata);
436process_repl_params([{return,list}|T],_C) ->
437    process_repl_params(T,list);
438process_repl_params([{return,binary}|T],_C) ->
439    process_repl_params(T,binary);
440process_repl_params([{return,_}|_],_) ->
441    throw(badopt);
442process_repl_params([H|T],C) ->
443    {NT,NC} = process_repl_params(T,C),
444    {[H|NT],NC};
445process_repl_params(_,_) ->
446    throw(badopt).
447
448process_split_params([],Convert,Limit,Strip,Group) ->
449    {[],Convert,Limit,Strip,Group};
450process_split_params([trim|T],C,_L,_S,G) ->
451    process_split_params(T,C,-1,true,G);
452process_split_params([{parts,0}|T],C,_L,_S,G) ->
453    process_split_params(T,C,-1,true,G);
454process_split_params([{parts,N}|T],C,_L,_S,G) when is_integer(N), N >= 1 ->
455    process_split_params(T,C,N-1,false,G);
456process_split_params([{parts,infinity}|T],C,_L,_S,G) ->
457    process_split_params(T,C,-1,false,G);
458process_split_params([{parts,_}|_],_,_,_,_) ->
459    throw(badopt);
460process_split_params([group|T],C,L,S,_G) ->
461    process_split_params(T,C,L,S,true);
462process_split_params([global|_],_,_,_,_) ->
463    throw(badopt);
464process_split_params([report_errors|_],_,_,_,_) ->
465    throw(badopt);
466process_split_params([{capture,_,_}|_],_,_,_,_) ->
467    throw(badopt);
468process_split_params([{capture,_}|_],_,_,_,_) ->
469    throw(badopt);
470process_split_params([{return,iodata}|T],_C,L,S,G) ->
471    process_split_params(T,iodata,L,S,G);
472process_split_params([{return,list}|T],_C,L,S,G) ->
473    process_split_params(T,list,L,S,G);
474process_split_params([{return,binary}|T],_C,L,S,G) ->
475    process_split_params(T,binary,L,S,G);
476process_split_params([{return,_}|_],_,_,_,_) ->
477    throw(badopt);
478process_split_params([H|T],C,L,S,G) ->
479    {NT,NC,NL,NS,NG} = process_split_params(T,C,L,S,G),
480    {[H|NT],NC,NL,NS,NG};
481process_split_params(_,_,_,_,_) ->
482    throw(badopt).
483
484apply_mlist(Subject,Replacement,Mlist) ->
485    do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist).
486
487
488precomp_repl(<<>>) ->
489    [];
490precomp_repl(<<$\\,$g,${,Rest/binary>>) when byte_size(Rest) > 0 ->
491    {NS, <<$},NRest/binary>>} = pick_int(Rest),
492    [list_to_integer(NS) | precomp_repl(NRest)];
493precomp_repl(<<$\\,$g,Rest/binary>>) when byte_size(Rest) > 0 ->
494    {NS,NRest} = pick_int(Rest),
495    [list_to_integer(NS) | precomp_repl(NRest)];
496precomp_repl(<<$\\,X,Rest/binary>>) when X < $1 ; X > $9 ->
497    %% Escaped character
498    case precomp_repl(Rest) of
499	[BHead | T0] when is_binary(BHead) ->
500	    [<<X,BHead/binary>> | T0];
501	Other ->
502	    [<<X>> | Other]
503    end;
504precomp_repl(<<$\\,Rest/binary>>) when byte_size(Rest) > 0->
505    {NS,NRest} = pick_int(Rest),
506    [list_to_integer(NS) | precomp_repl(NRest)];
507precomp_repl(<<$&,Rest/binary>>) ->
508    [0 | precomp_repl(Rest)];
509precomp_repl(<<X,Rest/binary>>) ->
510    case precomp_repl(Rest) of
511	[BHead | T0] when is_binary(BHead) ->
512	    [<<X,BHead/binary>> | T0];
513	Other ->
514	    [<<X>> | Other]
515    end.
516
517
518
519pick_int(<<X,R/binary>>) when X >= $0, X =< $9 ->
520    {Found,Rest} = pick_int(R),
521    {[X|Found],Rest};
522pick_int(Bin) ->
523    {[],Bin}.
524
525do_mlist(_,<<>>,_,_,[]) ->
526    []; %Avoid empty binary tail
527do_mlist(_,Subject,_,_,[]) ->
528    Subject;
529do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail])
530  when MPos > Pos ->
531    EatLength = MPos - Pos,
532    <<Untouched:EatLength/binary, Rest/binary>> = Subject,
533    [Untouched | do_mlist(Whole,Rest, MPos, Repl,
534			  [[{MPos,Count} | Sub] | Tail])];
535do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail])
536  when MPos =:= Pos ->
537    EatLength = Count,
538    <<_:EatLength/binary,Rest/binary>> = Subject,
539    NewData = do_replace(Whole,Repl,[{MPos,Count} | Sub]),
540    [NewData | do_mlist(Whole,Rest,Pos+EatLength,Repl,Tail)].
541
542
543do_replace(_,[Bin],_) when is_binary(Bin) ->
544    Bin;
545do_replace(Subject,Repl,SubExprs0) ->
546    SubExprs = list_to_tuple(SubExprs0),
547    [ case Part of
548	  N when is_integer(N) ->
549	      if
550		  tuple_size(SubExprs) =< N ->
551		      <<>>;
552		  true ->
553		      {SPos,SLen} = element(N+1,SubExprs),
554		      if
555			  SPos < 0 ->
556			      <<>>;
557			  true ->
558			      <<_:SPos/binary,Res:SLen/binary,_/binary>> =
559				  Subject,
560			      Res
561		      end
562	      end;
563	  Other ->
564	      Other
565      end || Part <- Repl ].
566
567
568check_for_unicode({re_pattern,_,1,_,_},_) ->
569    true;
570check_for_unicode({re_pattern,_,0,_,_},_) ->
571    false;
572check_for_unicode(_,L) ->
573    lists:member(unicode,L).
574
575check_for_crlf({re_pattern,_,_,1,_},_) ->
576    true;
577check_for_crlf({re_pattern,_,_,0,_},_) ->
578    false;
579check_for_crlf(_,L) ->
580    case lists:keysearch(newline,1,L) of
581	{value,{newline,any}} -> true;
582	{value,{newline,crlf}} -> true;
583	{value,{newline,anycrlf}} -> true;
584	_ -> false
585    end.
586
587% SelectReturn = false | all | stirpfirst | none
588% ConvertReturn = index | list | binary
589% {capture, all} -> all (untouchded)
590% {capture, all_names} -> if names are present: treated as a name {capture, [...]}
591%                                      else:    same as {capture, []}
592% {capture, first} -> kept in argument list and Select all
593% {capture, all_but_first} -> removed from argument list and selects stripfirst
594% {capture, none} ->  removed from argument list and selects none
595% {capture, []} -> removed from argument list and selects none
596% {capture,[...]} -> 0 added to selection list and selects stripfirst
597% SelectReturn false is same as all in the end.
598
599% Call as process_parameters([],0,false,index,NeedClean)
600
601process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) ->
602    {[], InitialOffset, SelectReturn, ConvertReturn};
603process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) ->
604    process_parameters(T,N,Select0,Return0,CC,RE);
605process_parameters([global | T],Init0,Select0,Return0,CC,RE) ->
606    process_parameters(T,Init0,Select0,Return0,CC,RE);
607process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) ->
608    process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE);
609process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) ->
610    % First process the rest to see if capture was already present
611    {NewTail, Init1, Select1, Return1} =
612	process_parameters(T,Init0,Select0,Return0,CC,RE),
613    case Select1 of
614	false ->
615	    case Values of
616		all ->
617		    {[{capture,all} | NewTail], Init1, all, Return0};
618		all_names ->
619		    case re:inspect(RE,namelist) of
620			{namelist, []} ->
621			    {[{capture,first} | NewTail], Init1, none, Return0};
622			{namelist, List} ->
623			    {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0}
624		    end;
625		first ->
626		    {[{capture,first} | NewTail], Init1, all, Return0};
627		all_but_first ->
628		    {[{capture,all} | NewTail], Init1, stripfirst, Return0};
629		none ->
630		    {[{capture,first} | NewTail], Init1, none, Return0};
631		[] ->
632		    {[{capture,first} | NewTail], Init1, none, Return0};
633		List when is_list(List) ->
634		    {[{capture,[0|List]} | NewTail],
635		     Init1, stripfirst, Return0};
636		_ ->
637		    throw(badlist)
638	    end;
639	_ ->
640	    % Found overriding further down list, ignore this one
641	    {NewTail, Init1, Select1, Return1}
642    end;
643process_parameters([H|T],Init0,Select0,Return0,true,RE) ->
644    case copt(H) of
645	true ->
646	    process_parameters(T,Init0,Select0,Return0,true,RE);
647	false ->
648	    {NewT,Init,Select,Return} =
649		process_parameters(T,Init0,Select0,Return0,true,RE),
650	    {[H|NewT],Init,Select,Return}
651    end;
652process_parameters([H|T],Init0,Select0,Return0,false,RE) ->
653    {NewT,Init,Select,Return} =
654		process_parameters(T,Init0,Select0,Return0,false,RE),
655    {[H|NewT],Init,Select,Return};
656process_parameters(_,_,_,_,_,_) ->
657    throw(badlist).
658
659postprocess({match,[]},_,_,_,_) ->
660    nomatch;
661postprocess({match,_},none,_,_,_) ->
662    match;
663postprocess({match,M},Any,binary,Flat,Uni) ->
664    binarify(postprocess({match,M},Any,index,Flat,Uni),Flat);
665postprocess({match,M},Any,list,Flat,Uni) ->
666    listify(postprocess({match,M},Any,index,Flat,Uni),Flat,Uni);
667postprocess({match,M},all,index,_,_) ->
668    {match,M};
669postprocess({match,M},false,index,_,_) ->
670    {match,M};
671postprocess({match,M},stripfirst,index,_,_) ->
672    {match, [ T || [_|T] <- M ]}.
673
674binarify({match,M},Flat) ->
675    {match, [ [ case {I,L} of
676		    {-1,0} ->
677			<<>>;
678		    {SPos,SLen} ->
679			<<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat,
680			Res
681		end || {I,L} <- One ] || One <- M ]}.
682listify({match,M},Flat,Uni) ->
683    {match, [ [ case {I,L} of
684	    {_,0} ->
685		[];
686	    {SPos,SLen} ->
687		case Uni of
688		    true ->
689			<<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat,
690			unicode:characters_to_list(Res,unicode);
691		    false ->
692			Start = SPos + 1,
693			End = SPos + SLen,
694			binary_to_list(Flat,Start,End)
695		end
696	end || {I,L} <- One ] || One <- M ]}.
697
698ubinarify({match,M},Flat) ->
699    {match, [ case {I,L} of
700		  {-1,0} ->
701		      <<>>;
702		  {SPos,SLen} ->
703		      <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat,
704		      Res
705		end || {I,L} <- M ]};
706ubinarify(Else,_) ->
707    Else.
708ulistify({match,M},Flat) ->
709    {match, [ case {I,L} of
710	    {_,0} ->
711		[];
712	    {SPos,SLen} ->
713		      <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat,
714		      unicode:characters_to_list(Res,unicode)
715	      end || {I,L} <- M ]};
716ulistify(Else,_) ->
717    Else.
718
719process_uparams([global|_T],_RetType) ->
720    throw(false);
721process_uparams([{capture,Values,Type}|T],_OldType) ->
722    process_uparams([{capture,Values}|T],Type);
723process_uparams([H|T],Type) ->
724    {NL,NType} = process_uparams(T,Type),
725    {[H|NL],NType};
726process_uparams([],Type) ->
727    {[],Type}.
728
729
730ucompile(RE,Options) ->
731    try
732	re:compile(unicode:characters_to_binary(RE,unicode),Options)
733    catch
734	error:AnyError ->
735	    {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} =
736		(catch erlang:error(new_stacktrace,
737				    [RE,Options])),
738	    erlang:raise(error,AnyError,[{Mod,compile,L,Loc}|Rest])
739    end.
740
741
742urun(Subject,RE,Options) ->
743    try
744	urun2(Subject,RE,Options)
745    catch
746	error:AnyError ->
747	    {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} =
748		(catch erlang:error(new_stacktrace,
749				    [Subject,RE,Options])),
750	    erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest])
751    end.
752
753urun2(Subject0,RE0,Options0) ->
754    {Options,RetType} = case (catch process_uparams(Options0,index)) of
755			    {A,B} ->
756				{A,B};
757			    _ ->
758				{Options0,false}
759			end,
760    Subject = unicode:characters_to_binary(Subject0,unicode),
761    RE = case RE0 of
762	     BinRE when is_binary(BinRE) ->
763		 BinRE;
764	     {re_pattern,_,_,_,_} = ReCompiled ->
765		 ReCompiled;
766	     ListRE ->
767		 unicode:characters_to_binary(ListRE,unicode)
768	 end,
769    Ret = re:run(Subject,RE,Options),
770    case RetType of
771	binary ->
772	    ubinarify(Ret,Subject);
773	list ->
774	    ulistify(Ret,Subject);
775	_ ->
776	    Ret
777    end.
778
779
780%% Might be called either with two-tuple (if regexp was already compiled)
781%% or with 3-tuple (saving original RE for exceptions
782grun(Subject,RE,{Options,NeedClean}) ->
783    try
784	grun2(Subject,RE,{Options,NeedClean})
785    catch
786	error:AnyError ->
787	    {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} =
788		(catch erlang:error(new_stacktrace,
789				    [Subject,RE,Options])),
790	    erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest])
791    end;
792grun(Subject,RE,{Options,NeedClean,OrigRE}) ->
793    try
794	grun2(Subject,RE,{Options,NeedClean})
795    catch
796	error:AnyError ->
797	    {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} =
798		(catch erlang:error(new_stacktrace,
799				    [Subject,OrigRE,Options])),
800	    erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest])
801    end.
802
803grun2(Subject,RE,{Options,NeedClean}) ->
804    Unicode = check_for_unicode(RE,Options),
805    CRLF = check_for_crlf(RE,Options),
806    FlatSubject = to_binary(Subject, Unicode),
807    do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options,NeedClean}).
808
809do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) ->
810    {StrippedOptions, InitialOffset,
811     SelectReturn, ConvertReturn} =
812	case (catch
813		  process_parameters(Options0, 0, false, index, NeedClean,RE)) of
814	    badlist ->
815		erlang:error(badarg,[Subject,RE,Options0]);
816	    CorrectReturn ->
817		CorrectReturn
818	end,
819    try
820	postprocess(loopexec(FlatSubject,RE,InitialOffset,
821			     byte_size(FlatSubject),
822			     Unicode,CRLF,StrippedOptions,true),
823		    SelectReturn,ConvertReturn,FlatSubject,Unicode)
824    catch
825	throw:ErrTuple ->
826	    ErrTuple
827    end.
828
829loopexec(_,_,X,Y,_,_,_,_) when X > Y ->
830    {match,[]};
831loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) ->
832    case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of
833	{error, Err} ->
834	    throw({error,Err});
835	nomatch ->
836	    {match,[]};
837	{match,[{A,B}|More]} ->
838	    {match,Rest} =
839		case B>0 of
840		    true ->
841			loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false);
842		    false ->
843			{match,M} =
844			    case re:internal_run(Subject,RE,[{offset,X},notempty_atstart,
845                                                             anchored]++Options,false) of
846				nomatch ->
847				    {match,[]};
848				{match,Other} ->
849				    {match,Other}
850			    end,
851			NewA = case M of
852				   [{_,NStep}|_] when NStep > 0 ->
853				       A+NStep;
854				   _ ->
855				       forward(Subject,A,1,Unicode,CRLF)
856			       end,
857			{match,MM} = loopexec(Subject,RE,NewA,Y,
858					      Unicode,CRLF,Options,false),
859			case M of
860			    [] ->
861				{match,MM};
862			    _ ->
863				{match,[M | MM]}
864			end
865		end,
866	    {match,[[{A,B}|More] | Rest]}
867    end.
868
869forward(_Chal,A,0,_,_) ->
870    A;
871forward(Chal,A,N,U,true) ->
872    <<_:A/binary,Tl/binary>> = Chal,
873    case Tl of
874	<<$\r,$\n,_/binary>> ->
875	    forward(Chal,A+2,N-1,U,true);
876	_ ->
877	    forward2(Chal,A,N,U,true)
878    end;
879forward(Chal,A,N,U,false) ->
880    forward2(Chal,A,N,U,false).
881
882forward2(Chal,A,N,false,CRLF) ->
883    forward(Chal,A+1,N-1,false,CRLF);
884forward2(Chal,A,N,true,CRLF) ->
885    <<_:A/binary,Tl/binary>> = Chal,
886    Forw = case Tl of
887	       <<1:1,1:1,0:1,_:5,_/binary>>  ->
888		   2;
889	       <<1:1,1:1,1:1,0:1,_:4,_/binary>>  ->
890		   3;
891	       <<1:1,1:1,1:1,1:1,0:1,_:3,_/binary>>  ->
892		   4;
893	       _ ->
894		   1
895	   end,
896    forward(Chal,A+Forw,N-1,true,CRLF).
897
898copt(caseless) ->
899    true;
900copt(no_start_optimize) ->
901    true;
902copt(never_utf) ->
903    true;
904copt(ucp) ->
905    true;
906copt(dollar_endonly) ->
907    true;
908copt(dotall) ->
909    true;
910copt(extended) ->
911    true;
912copt(firstline) ->
913    true;
914copt(multiline) ->
915    true;
916copt(no_auto_capture) ->
917    true;
918copt(dupnames) ->
919    true;
920copt(ungreedy) ->
921    true;
922copt(unicode) ->
923    true;
924copt(_) ->
925    false.
926
927%bothopt({newline,_}) ->
928%    true;
929%bothopt(anchored) ->
930%    true;
931%bothopt(_) ->
932%    false.
933
934runopt(notempty) ->
935    true;
936runopt(notempty_atstart) ->
937    true;
938runopt(notbol) ->
939    true;
940runopt(noteol) ->
941    true;
942runopt({offset,_}) ->
943    true;
944runopt({capture,_,_}) ->
945    true;
946runopt({capture,_}) ->
947    true;
948runopt(global) ->
949    true;
950runopt({match_limit,_}) ->
951    true;
952runopt({match_limit_recursion,_}) ->
953    true;
954runopt(_) ->
955    false.
956
957to_binary(Bin, _IsUnicode) when is_binary(Bin) ->
958    Bin;
959to_binary(Data, true) ->
960    unicode:characters_to_binary(Data,unicode);
961to_binary(Data, false) ->
962    iolist_to_binary(Data).
963
964badarg_with_cause(Args, Cause) ->
965    erlang:error(badarg, Args, [{error_info, #{module => erl_stdlib_errors,
966                                               cause => Cause}}]).
967
968badarg_with_info(Args) ->
969    erlang:error(badarg, Args, [{error_info, #{module => erl_stdlib_errors}}]).
970