1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2008-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20-module(re). 21-export([grun/3,urun/3,ucompile/2,replace/3,replace/4,split/2,split/3]). 22 23-type mp() :: {re_pattern, _, _, _, _}. 24 25-type nl_spec() :: cr | crlf | lf | anycrlf | any. 26 27-type compile_option() :: unicode | anchored | caseless | dollar_endonly 28 | dotall | extended | firstline | multiline 29 | no_auto_capture | dupnames | ungreedy 30 | {newline, nl_spec()} 31 | bsr_anycrlf | bsr_unicode 32 | no_start_optimize | ucp | never_utf. 33 34%%% BIFs 35 36-export([internal_run/4]). 37 38-export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]). 39 40-spec version() -> binary(). 41 42%% We must inline these functions so that the stacktrace points to 43%% the correct function. 44-compile({inline, [badarg_with_cause/2, badarg_with_info/1]}). 45 46version() -> 47 erlang:nif_error(undef). 48 49-spec compile(Regexp) -> {ok, MP} | {error, ErrSpec} when 50 Regexp :: iodata(), 51 MP :: mp(), 52 ErrSpec :: {ErrString :: string(), Position :: non_neg_integer()}. 53 54compile(_) -> 55 erlang:nif_error(undef). 56 57-spec compile(Regexp, Options) -> {ok, MP} | {error, ErrSpec} when 58 Regexp :: iodata() | unicode:charlist(), 59 Options :: [Option], 60 Option :: compile_option(), 61 MP :: mp(), 62 ErrSpec :: {ErrString :: string(), Position :: non_neg_integer()}. 63 64compile(_, _) -> 65 erlang:nif_error(undef). 66 67-spec run(Subject, RE) -> {match, Captured} | nomatch when 68 Subject :: iodata() | unicode:charlist(), 69 RE :: mp() | iodata(), 70 Captured :: [CaptureData], 71 CaptureData :: {integer(), integer()}. 72 73run(_, _) -> 74 erlang:nif_error(undef). 75 76-spec run(Subject, RE, Options) -> {match, Captured} | 77 match | 78 nomatch | 79 {error, ErrType} when 80 Subject :: iodata() | unicode:charlist(), 81 RE :: mp() | iodata() | unicode:charlist(), 82 Options :: [Option], 83 Option :: anchored | global | notbol | noteol | notempty 84 | notempty_atstart | report_errors 85 | {offset, non_neg_integer()} | 86 {match_limit, non_neg_integer()} | 87 {match_limit_recursion, non_neg_integer()} | 88 {newline, NLSpec :: nl_spec()} | 89 bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | 90 {capture, ValueSpec, Type} | CompileOpt, 91 Type :: index | list | binary, 92 ValueSpec :: all | all_but_first | all_names | first | none | ValueList, 93 ValueList :: [ValueID], 94 ValueID :: integer() | string() | atom(), 95 CompileOpt :: compile_option(), 96 Captured :: [CaptureData] | [[CaptureData]], 97 CaptureData :: {integer(), integer()} 98 | ListConversionData 99 | binary(), 100 ListConversionData :: string() 101 | {error, string(), binary()} 102 | {incomplete, string(), binary()}, 103 ErrType :: match_limit | match_limit_recursion | {compile, CompileErr}, 104 CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}. 105 106run(_, _, _) -> 107 erlang:nif_error(undef). 108 109-spec internal_run(Subject, RE, Options, FirstCall) -> {match, Captured} | 110 match | 111 nomatch | 112 {error, ErrType} when 113 Subject :: iodata() | unicode:charlist(), 114 RE :: mp() | iodata() | unicode:charlist(), 115 Options :: [Option], 116 Option :: anchored | global | notbol | noteol | notempty 117 | notempty_atstart | report_errors 118 | {offset, non_neg_integer()} | 119 {match_limit, non_neg_integer()} | 120 {match_limit_recursion, non_neg_integer()} | 121 {newline, NLSpec :: nl_spec()} | 122 bsr_anycrlf | bsr_unicode | {capture, ValueSpec} | 123 {capture, ValueSpec, Type} | CompileOpt, 124 Type :: index | list | binary, 125 ValueSpec :: all | all_but_first | all_names | first | none | ValueList, 126 ValueList :: [ValueID], 127 ValueID :: integer() | string() | atom(), 128 CompileOpt :: compile_option(), 129 Captured :: [CaptureData] | [[CaptureData]], 130 CaptureData :: {integer(), integer()} 131 | ListConversionData 132 | binary(), 133 ListConversionData :: string() 134 | {error, string(), binary()} 135 | {incomplete, string(), binary()}, 136 ErrType :: match_limit | match_limit_recursion | {compile, CompileErr}, 137 CompileErr :: {ErrString :: string(), Position :: non_neg_integer()}, 138 FirstCall :: boolean(). 139 140internal_run(_, _, _, _) -> 141 erlang:nif_error(undef). 142 143-spec inspect(MP,Item) -> {namelist, [ binary() ]} when 144 MP :: mp(), 145 Item :: namelist. 146 147inspect(_,_) -> 148 erlang:nif_error(undef). 149 150 151%%% End of BIFs 152 153-spec split(Subject, RE) -> SplitList when 154 Subject :: iodata() | unicode:charlist(), 155 RE :: mp() | iodata(), 156 SplitList :: [iodata() | unicode:charlist()]. 157 158split(Subject,RE) -> 159 try 160 split(Subject,RE,[]) 161 catch 162 error:_ -> 163 badarg_with_info([Subject,RE]) 164 end. 165 166-spec split(Subject, RE, Options) -> SplitList when 167 Subject :: iodata() | unicode:charlist(), 168 RE :: mp() | iodata() | unicode:charlist(), 169 Options :: [ Option ], 170 Option :: anchored | notbol | noteol | notempty | notempty_atstart 171 | {offset, non_neg_integer()} | {newline, nl_spec()} 172 | {match_limit, non_neg_integer()} 173 | {match_limit_recursion, non_neg_integer()} 174 | bsr_anycrlf | bsr_unicode | {return, ReturnType} 175 | {parts, NumParts} | group | trim | CompileOpt, 176 NumParts :: non_neg_integer() | infinity, 177 ReturnType :: iodata | list | binary, 178 CompileOpt :: compile_option(), 179 SplitList :: [RetData] | [GroupedRetData], 180 GroupedRetData :: [RetData], 181 RetData :: iodata() | unicode:charlist() | binary() | list(). 182 183split(Subject,RE,Options) -> 184 try 185 {NewOpt,Convert,Limit,Strip,Group} = 186 process_split_params(Options,iodata,-1,false,false), 187 Unicode = check_for_unicode(RE, Options), 188 FlatSubject = to_binary(Subject, Unicode), 189 case compile_split(RE,NewOpt) of 190 {error,_Err} -> 191 throw(badre); 192 {PreCompiled, NumSub, RunOpt} -> 193 %% OK, lets run 194 case re:run(FlatSubject,PreCompiled,RunOpt ++ [global]) of 195 nomatch -> 196 case Group of 197 true -> 198 convert_any_split_result([[FlatSubject]], 199 Convert, Unicode, true); 200 false -> 201 convert_any_split_result([FlatSubject], 202 Convert, Unicode, false) 203 end; 204 {match, Matches} -> 205 Res = do_split(FlatSubject, 0, Matches, NumSub, 206 Limit, Group), 207 Stripped = case Strip of 208 true -> 209 backstrip_empty(Res,Group); 210 false -> 211 Res 212 end, 213 convert_any_split_result(Stripped, Convert, Unicode, Group) 214 end 215 end 216 catch 217 throw:badopt -> 218 badarg_with_cause([Subject,RE,Options], badopt); 219 throw:badre -> 220 badarg_with_info([Subject,RE,Options]); 221 error:badarg -> 222 badarg_with_info([Subject,RE,Options]) 223 end. 224 225backstrip_empty(List, false) -> 226 do_backstrip_empty(List); 227backstrip_empty(List, true) -> 228 do_backstrip_empty_g(List). 229 230do_backstrip_empty_g([]) -> 231 []; 232do_backstrip_empty_g([H]) -> 233 case do_backstrip_empty(H) of 234 [] -> 235 []; 236 _ -> 237 [H] 238 end; 239do_backstrip_empty_g([H|T]) -> 240 case do_backstrip_empty_g(T) of 241 [] -> 242 case do_backstrip_empty(H) of 243 [] -> 244 []; 245 _ -> 246 [H] 247 end; 248 Other -> 249 [H|Other] 250 end. 251 252do_backstrip_empty([]) -> 253 []; 254do_backstrip_empty([<<>>]) -> 255 []; 256do_backstrip_empty([<<>>|T]) -> 257 case do_backstrip_empty(T) of 258 [] -> 259 []; 260 Other -> 261 [<<>>|Other] 262 end; 263do_backstrip_empty([H|T]) -> 264 [H|do_backstrip_empty(T)]. 265 266convert_any_split_result(List,Type,Uni,true) -> 267 [ convert_split_result(Part,Type,Uni) || Part <- List ]; 268convert_any_split_result(List,Type,Uni, false) -> 269 convert_split_result(List,Type,Uni). 270 271convert_split_result(List, iodata, _Unicode) -> 272 List; 273convert_split_result(List, binary, _Unicode) -> 274 %% As it happens, the iodata is actually binaries 275 List; 276convert_split_result(List, list, true) -> 277 [unicode:characters_to_list(Element,unicode) || Element <- List]; 278convert_split_result(List, list, false) -> 279 [binary_to_list(Element) || Element <- List]. 280 281do_split(Subj, Off, _, _, 0, false) -> 282 <<_:Off/binary,Rest/binary>> = Subj, 283 [Rest]; 284do_split(Subj, Off, [], _, _, false) -> 285 <<_:Off/binary,Rest/binary>> = Subj, 286 [Rest]; 287do_split(Subj, Off, _, _, _,false) when byte_size(Subj) =< Off -> 288 [<<>>]; 289do_split(Subj, Off, _, _, 0, true) -> 290 <<_:Off/binary,Rest/binary>> = Subj, 291 [[Rest]]; 292do_split(Subj, Off, [], _, _, true) -> 293 <<_:Off/binary,Rest/binary>> = Subj, 294 [[Rest]]; 295do_split(Subj, Off, _, _, _,true) when byte_size(Subj) =< Off -> 296 [[<<>>]]; 297do_split(Subj, Offset, [[{MainI,MainL}|Sub]|T], NumSub, Limit, Group) -> 298 NewOffset = MainI+MainL, 299 KeptLen = MainI - Offset, 300 case {KeptLen,empty_sub(Sub),MainL} of 301 {0,true,0} -> 302 do_split(Subj,NewOffset,T,NumSub,Limit,Group); 303 _ -> 304 <<_:Offset/binary,Keep:KeptLen/binary,_/binary>> = Subj, 305 ESub = extend_subpatterns(Sub,NumSub), 306 Tail = do_split(Subj, NewOffset, T, NumSub, Limit - 1,Group), 307 case Group of 308 false -> 309 [Keep | dig_subpatterns(Subj,lists:reverse(ESub),Tail)]; 310 true -> 311 [[Keep | dig_subpatterns(Subj,lists:reverse(ESub),[])]| 312 Tail] 313 end 314 end. 315empty_sub([]) -> 316 true; 317empty_sub([{_,0}|T]) -> 318 empty_sub(T); 319empty_sub(_) -> 320 false. 321 322dig_subpatterns(_,[],Acc) -> 323 Acc; 324dig_subpatterns(Subj,[{-1,0}|T],Acc) -> 325 dig_subpatterns(Subj,T,[<<>>|Acc]); 326dig_subpatterns(Subj,[{I,L}|T],Acc) -> 327 <<_:I/binary,Part:L/binary,_/binary>> = Subj, 328 dig_subpatterns(Subj,T,[Part|Acc]). 329 330extend_subpatterns(_,0) -> 331 []; 332extend_subpatterns([],N) -> 333 [{0,0} | extend_subpatterns([],N-1)]; 334extend_subpatterns([H|T],N) -> 335 [H | extend_subpatterns(T,N-1)]. 336 337compile_split({re_pattern,N,_,_,_} = Comp, Options) -> 338 {Comp,N,Options}; 339compile_split(Pat,Options0) when not is_tuple(Pat) -> 340 Options = lists:filter(fun(O) -> 341 (not runopt(O)) 342 end, Options0), 343 case re:compile(Pat,Options) of 344 {error,Err} -> 345 {error,Err}; 346 {ok, {re_pattern,N,_,_,_} = Comp} -> 347 NewOpt = lists:filter(fun(OO) -> (not copt(OO)) end, Options0), 348 {Comp,N,NewOpt} 349 end; 350compile_split(_,_) -> 351 throw(badre). 352 353-spec replace(Subject, RE, Replacement) -> iodata() | unicode:charlist() when 354 Subject :: iodata() | unicode:charlist(), 355 RE :: mp() | iodata(), 356 Replacement :: iodata() | unicode:charlist(). 357 358replace(Subject,RE,Replacement) -> 359 try 360 replace(Subject,RE,Replacement,[]) 361 catch 362 error:_ -> 363 badarg_with_info([Subject,RE,Replacement]) 364 end. 365 366-spec replace(Subject, RE, Replacement, Options) -> iodata() | unicode:charlist() when 367 Subject :: iodata() | unicode:charlist(), 368 RE :: mp() | iodata() | unicode:charlist(), 369 Replacement :: iodata() | unicode:charlist(), 370 Options :: [Option], 371 Option :: anchored | global | notbol | noteol | notempty 372 | notempty_atstart 373 | {offset, non_neg_integer()} | {newline, NLSpec} | bsr_anycrlf 374 | {match_limit, non_neg_integer()} 375 | {match_limit_recursion, non_neg_integer()} 376 | bsr_unicode | {return, ReturnType} | CompileOpt, 377 ReturnType :: iodata | list | binary, 378 CompileOpt :: compile_option(), 379 NLSpec :: cr | crlf | lf | anycrlf | any. 380 381replace(Subject,RE,Replacement,Options) -> 382 try 383 {NewOpt,Convert} = process_repl_params(Options,iodata), 384 Unicode = check_for_unicode(RE, Options), 385 FlatSubject = to_binary(Subject, Unicode), 386 FlatReplacement = to_binary(Replacement, Unicode), 387 IoList = do_replace(FlatSubject,Subject,RE,FlatReplacement,NewOpt), 388 case Convert of 389 iodata -> 390 IoList; 391 binary -> 392 case Unicode of 393 false -> 394 iolist_to_binary(IoList); 395 true -> 396 unicode:characters_to_binary(IoList,unicode) 397 end; 398 list -> 399 case Unicode of 400 false -> 401 binary_to_list(iolist_to_binary(IoList)); 402 true -> 403 unicode:characters_to_list(IoList,unicode) 404 end 405 end 406 catch 407 throw:badopt -> 408 badarg_with_cause([Subject,RE,Replacement,Options], badopt); 409 throw:badre -> 410 badarg_with_info([Subject,RE,Replacement,Options]); 411 error:badarg -> 412 badarg_with_info([Subject,RE,Replacement,Options]) 413 end. 414 415 416do_replace(FlatSubject,Subject,RE,Replacement,Options) -> 417 case re:run(FlatSubject,RE,Options) of 418 nomatch -> 419 Subject; 420 {match,[Mlist|T]} when is_list(Mlist) -> 421 apply_mlist(FlatSubject,Replacement,[Mlist|T]); 422 {match,Slist} -> 423 apply_mlist(FlatSubject,Replacement,[Slist]) 424 end. 425 426process_repl_params([],Convert) -> 427 {[],Convert}; 428process_repl_params([report_errors|_],_) -> 429 throw(badopt); 430process_repl_params([{capture,_,_}|_],_) -> 431 throw(badopt); 432process_repl_params([{capture,_}|_],_) -> 433 throw(badopt); 434process_repl_params([{return,iodata}|T],_C) -> 435 process_repl_params(T,iodata); 436process_repl_params([{return,list}|T],_C) -> 437 process_repl_params(T,list); 438process_repl_params([{return,binary}|T],_C) -> 439 process_repl_params(T,binary); 440process_repl_params([{return,_}|_],_) -> 441 throw(badopt); 442process_repl_params([H|T],C) -> 443 {NT,NC} = process_repl_params(T,C), 444 {[H|NT],NC}; 445process_repl_params(_,_) -> 446 throw(badopt). 447 448process_split_params([],Convert,Limit,Strip,Group) -> 449 {[],Convert,Limit,Strip,Group}; 450process_split_params([trim|T],C,_L,_S,G) -> 451 process_split_params(T,C,-1,true,G); 452process_split_params([{parts,0}|T],C,_L,_S,G) -> 453 process_split_params(T,C,-1,true,G); 454process_split_params([{parts,N}|T],C,_L,_S,G) when is_integer(N), N >= 1 -> 455 process_split_params(T,C,N-1,false,G); 456process_split_params([{parts,infinity}|T],C,_L,_S,G) -> 457 process_split_params(T,C,-1,false,G); 458process_split_params([{parts,_}|_],_,_,_,_) -> 459 throw(badopt); 460process_split_params([group|T],C,L,S,_G) -> 461 process_split_params(T,C,L,S,true); 462process_split_params([global|_],_,_,_,_) -> 463 throw(badopt); 464process_split_params([report_errors|_],_,_,_,_) -> 465 throw(badopt); 466process_split_params([{capture,_,_}|_],_,_,_,_) -> 467 throw(badopt); 468process_split_params([{capture,_}|_],_,_,_,_) -> 469 throw(badopt); 470process_split_params([{return,iodata}|T],_C,L,S,G) -> 471 process_split_params(T,iodata,L,S,G); 472process_split_params([{return,list}|T],_C,L,S,G) -> 473 process_split_params(T,list,L,S,G); 474process_split_params([{return,binary}|T],_C,L,S,G) -> 475 process_split_params(T,binary,L,S,G); 476process_split_params([{return,_}|_],_,_,_,_) -> 477 throw(badopt); 478process_split_params([H|T],C,L,S,G) -> 479 {NT,NC,NL,NS,NG} = process_split_params(T,C,L,S,G), 480 {[H|NT],NC,NL,NS,NG}; 481process_split_params(_,_,_,_,_) -> 482 throw(badopt). 483 484apply_mlist(Subject,Replacement,Mlist) -> 485 do_mlist(Subject,Subject,0,precomp_repl(Replacement), Mlist). 486 487 488precomp_repl(<<>>) -> 489 []; 490precomp_repl(<<$\\,$g,${,Rest/binary>>) when byte_size(Rest) > 0 -> 491 {NS, <<$},NRest/binary>>} = pick_int(Rest), 492 [list_to_integer(NS) | precomp_repl(NRest)]; 493precomp_repl(<<$\\,$g,Rest/binary>>) when byte_size(Rest) > 0 -> 494 {NS,NRest} = pick_int(Rest), 495 [list_to_integer(NS) | precomp_repl(NRest)]; 496precomp_repl(<<$\\,X,Rest/binary>>) when X < $1 ; X > $9 -> 497 %% Escaped character 498 case precomp_repl(Rest) of 499 [BHead | T0] when is_binary(BHead) -> 500 [<<X,BHead/binary>> | T0]; 501 Other -> 502 [<<X>> | Other] 503 end; 504precomp_repl(<<$\\,Rest/binary>>) when byte_size(Rest) > 0-> 505 {NS,NRest} = pick_int(Rest), 506 [list_to_integer(NS) | precomp_repl(NRest)]; 507precomp_repl(<<$&,Rest/binary>>) -> 508 [0 | precomp_repl(Rest)]; 509precomp_repl(<<X,Rest/binary>>) -> 510 case precomp_repl(Rest) of 511 [BHead | T0] when is_binary(BHead) -> 512 [<<X,BHead/binary>> | T0]; 513 Other -> 514 [<<X>> | Other] 515 end. 516 517 518 519pick_int(<<X,R/binary>>) when X >= $0, X =< $9 -> 520 {Found,Rest} = pick_int(R), 521 {[X|Found],Rest}; 522pick_int(Bin) -> 523 {[],Bin}. 524 525do_mlist(_,<<>>,_,_,[]) -> 526 []; %Avoid empty binary tail 527do_mlist(_,Subject,_,_,[]) -> 528 Subject; 529do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail]) 530 when MPos > Pos -> 531 EatLength = MPos - Pos, 532 <<Untouched:EatLength/binary, Rest/binary>> = Subject, 533 [Untouched | do_mlist(Whole,Rest, MPos, Repl, 534 [[{MPos,Count} | Sub] | Tail])]; 535do_mlist(Whole,Subject,Pos,Repl,[[{MPos,Count} | Sub] | Tail]) 536 when MPos =:= Pos -> 537 EatLength = Count, 538 <<_:EatLength/binary,Rest/binary>> = Subject, 539 NewData = do_replace(Whole,Repl,[{MPos,Count} | Sub]), 540 [NewData | do_mlist(Whole,Rest,Pos+EatLength,Repl,Tail)]. 541 542 543do_replace(_,[Bin],_) when is_binary(Bin) -> 544 Bin; 545do_replace(Subject,Repl,SubExprs0) -> 546 SubExprs = list_to_tuple(SubExprs0), 547 [ case Part of 548 N when is_integer(N) -> 549 if 550 tuple_size(SubExprs) =< N -> 551 <<>>; 552 true -> 553 {SPos,SLen} = element(N+1,SubExprs), 554 if 555 SPos < 0 -> 556 <<>>; 557 true -> 558 <<_:SPos/binary,Res:SLen/binary,_/binary>> = 559 Subject, 560 Res 561 end 562 end; 563 Other -> 564 Other 565 end || Part <- Repl ]. 566 567 568check_for_unicode({re_pattern,_,1,_,_},_) -> 569 true; 570check_for_unicode({re_pattern,_,0,_,_},_) -> 571 false; 572check_for_unicode(_,L) -> 573 lists:member(unicode,L). 574 575check_for_crlf({re_pattern,_,_,1,_},_) -> 576 true; 577check_for_crlf({re_pattern,_,_,0,_},_) -> 578 false; 579check_for_crlf(_,L) -> 580 case lists:keysearch(newline,1,L) of 581 {value,{newline,any}} -> true; 582 {value,{newline,crlf}} -> true; 583 {value,{newline,anycrlf}} -> true; 584 _ -> false 585 end. 586 587% SelectReturn = false | all | stirpfirst | none 588% ConvertReturn = index | list | binary 589% {capture, all} -> all (untouchded) 590% {capture, all_names} -> if names are present: treated as a name {capture, [...]} 591% else: same as {capture, []} 592% {capture, first} -> kept in argument list and Select all 593% {capture, all_but_first} -> removed from argument list and selects stripfirst 594% {capture, none} -> removed from argument list and selects none 595% {capture, []} -> removed from argument list and selects none 596% {capture,[...]} -> 0 added to selection list and selects stripfirst 597% SelectReturn false is same as all in the end. 598 599% Call as process_parameters([],0,false,index,NeedClean) 600 601process_parameters([],InitialOffset, SelectReturn, ConvertReturn,_,_) -> 602 {[], InitialOffset, SelectReturn, ConvertReturn}; 603process_parameters([{offset, N} | T],_Init0,Select0,Return0,CC,RE) -> 604 process_parameters(T,N,Select0,Return0,CC,RE); 605process_parameters([global | T],Init0,Select0,Return0,CC,RE) -> 606 process_parameters(T,Init0,Select0,Return0,CC,RE); 607process_parameters([{capture,Values,Type}|T],Init0,Select0,_Return0,CC,RE) -> 608 process_parameters([{capture,Values}|T],Init0,Select0,Type,CC,RE); 609process_parameters([{capture,Values}|T],Init0,Select0,Return0,CC,RE) -> 610 % First process the rest to see if capture was already present 611 {NewTail, Init1, Select1, Return1} = 612 process_parameters(T,Init0,Select0,Return0,CC,RE), 613 case Select1 of 614 false -> 615 case Values of 616 all -> 617 {[{capture,all} | NewTail], Init1, all, Return0}; 618 all_names -> 619 case re:inspect(RE,namelist) of 620 {namelist, []} -> 621 {[{capture,first} | NewTail], Init1, none, Return0}; 622 {namelist, List} -> 623 {[{capture,[0|List]} | NewTail], Init1, stripfirst, Return0} 624 end; 625 first -> 626 {[{capture,first} | NewTail], Init1, all, Return0}; 627 all_but_first -> 628 {[{capture,all} | NewTail], Init1, stripfirst, Return0}; 629 none -> 630 {[{capture,first} | NewTail], Init1, none, Return0}; 631 [] -> 632 {[{capture,first} | NewTail], Init1, none, Return0}; 633 List when is_list(List) -> 634 {[{capture,[0|List]} | NewTail], 635 Init1, stripfirst, Return0}; 636 _ -> 637 throw(badlist) 638 end; 639 _ -> 640 % Found overriding further down list, ignore this one 641 {NewTail, Init1, Select1, Return1} 642 end; 643process_parameters([H|T],Init0,Select0,Return0,true,RE) -> 644 case copt(H) of 645 true -> 646 process_parameters(T,Init0,Select0,Return0,true,RE); 647 false -> 648 {NewT,Init,Select,Return} = 649 process_parameters(T,Init0,Select0,Return0,true,RE), 650 {[H|NewT],Init,Select,Return} 651 end; 652process_parameters([H|T],Init0,Select0,Return0,false,RE) -> 653 {NewT,Init,Select,Return} = 654 process_parameters(T,Init0,Select0,Return0,false,RE), 655 {[H|NewT],Init,Select,Return}; 656process_parameters(_,_,_,_,_,_) -> 657 throw(badlist). 658 659postprocess({match,[]},_,_,_,_) -> 660 nomatch; 661postprocess({match,_},none,_,_,_) -> 662 match; 663postprocess({match,M},Any,binary,Flat,Uni) -> 664 binarify(postprocess({match,M},Any,index,Flat,Uni),Flat); 665postprocess({match,M},Any,list,Flat,Uni) -> 666 listify(postprocess({match,M},Any,index,Flat,Uni),Flat,Uni); 667postprocess({match,M},all,index,_,_) -> 668 {match,M}; 669postprocess({match,M},false,index,_,_) -> 670 {match,M}; 671postprocess({match,M},stripfirst,index,_,_) -> 672 {match, [ T || [_|T] <- M ]}. 673 674binarify({match,M},Flat) -> 675 {match, [ [ case {I,L} of 676 {-1,0} -> 677 <<>>; 678 {SPos,SLen} -> 679 <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, 680 Res 681 end || {I,L} <- One ] || One <- M ]}. 682listify({match,M},Flat,Uni) -> 683 {match, [ [ case {I,L} of 684 {_,0} -> 685 []; 686 {SPos,SLen} -> 687 case Uni of 688 true -> 689 <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, 690 unicode:characters_to_list(Res,unicode); 691 false -> 692 Start = SPos + 1, 693 End = SPos + SLen, 694 binary_to_list(Flat,Start,End) 695 end 696 end || {I,L} <- One ] || One <- M ]}. 697 698ubinarify({match,M},Flat) -> 699 {match, [ case {I,L} of 700 {-1,0} -> 701 <<>>; 702 {SPos,SLen} -> 703 <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, 704 Res 705 end || {I,L} <- M ]}; 706ubinarify(Else,_) -> 707 Else. 708ulistify({match,M},Flat) -> 709 {match, [ case {I,L} of 710 {_,0} -> 711 []; 712 {SPos,SLen} -> 713 <<_:SPos/binary,Res:SLen/binary,_/binary>> = Flat, 714 unicode:characters_to_list(Res,unicode) 715 end || {I,L} <- M ]}; 716ulistify(Else,_) -> 717 Else. 718 719process_uparams([global|_T],_RetType) -> 720 throw(false); 721process_uparams([{capture,Values,Type}|T],_OldType) -> 722 process_uparams([{capture,Values}|T],Type); 723process_uparams([H|T],Type) -> 724 {NL,NType} = process_uparams(T,Type), 725 {[H|NL],NType}; 726process_uparams([],Type) -> 727 {[],Type}. 728 729 730ucompile(RE,Options) -> 731 try 732 re:compile(unicode:characters_to_binary(RE,unicode),Options) 733 catch 734 error:AnyError -> 735 {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} = 736 (catch erlang:error(new_stacktrace, 737 [RE,Options])), 738 erlang:raise(error,AnyError,[{Mod,compile,L,Loc}|Rest]) 739 end. 740 741 742urun(Subject,RE,Options) -> 743 try 744 urun2(Subject,RE,Options) 745 catch 746 error:AnyError -> 747 {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} = 748 (catch erlang:error(new_stacktrace, 749 [Subject,RE,Options])), 750 erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest]) 751 end. 752 753urun2(Subject0,RE0,Options0) -> 754 {Options,RetType} = case (catch process_uparams(Options0,index)) of 755 {A,B} -> 756 {A,B}; 757 _ -> 758 {Options0,false} 759 end, 760 Subject = unicode:characters_to_binary(Subject0,unicode), 761 RE = case RE0 of 762 BinRE when is_binary(BinRE) -> 763 BinRE; 764 {re_pattern,_,_,_,_} = ReCompiled -> 765 ReCompiled; 766 ListRE -> 767 unicode:characters_to_binary(ListRE,unicode) 768 end, 769 Ret = re:run(Subject,RE,Options), 770 case RetType of 771 binary -> 772 ubinarify(Ret,Subject); 773 list -> 774 ulistify(Ret,Subject); 775 _ -> 776 Ret 777 end. 778 779 780%% Might be called either with two-tuple (if regexp was already compiled) 781%% or with 3-tuple (saving original RE for exceptions 782grun(Subject,RE,{Options,NeedClean}) -> 783 try 784 grun2(Subject,RE,{Options,NeedClean}) 785 catch 786 error:AnyError -> 787 {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} = 788 (catch erlang:error(new_stacktrace, 789 [Subject,RE,Options])), 790 erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest]) 791 end; 792grun(Subject,RE,{Options,NeedClean,OrigRE}) -> 793 try 794 grun2(Subject,RE,{Options,NeedClean}) 795 catch 796 error:AnyError -> 797 {'EXIT',{new_stacktrace,[{Mod,_,L,Loc}|Rest]}} = 798 (catch erlang:error(new_stacktrace, 799 [Subject,OrigRE,Options])), 800 erlang:raise(error,AnyError,[{Mod,run,L,Loc}|Rest]) 801 end. 802 803grun2(Subject,RE,{Options,NeedClean}) -> 804 Unicode = check_for_unicode(RE,Options), 805 CRLF = check_for_crlf(RE,Options), 806 FlatSubject = to_binary(Subject, Unicode), 807 do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options,NeedClean}). 808 809do_grun(FlatSubject,Subject,Unicode,CRLF,RE,{Options0,NeedClean}) -> 810 {StrippedOptions, InitialOffset, 811 SelectReturn, ConvertReturn} = 812 case (catch 813 process_parameters(Options0, 0, false, index, NeedClean,RE)) of 814 badlist -> 815 erlang:error(badarg,[Subject,RE,Options0]); 816 CorrectReturn -> 817 CorrectReturn 818 end, 819 try 820 postprocess(loopexec(FlatSubject,RE,InitialOffset, 821 byte_size(FlatSubject), 822 Unicode,CRLF,StrippedOptions,true), 823 SelectReturn,ConvertReturn,FlatSubject,Unicode) 824 catch 825 throw:ErrTuple -> 826 ErrTuple 827 end. 828 829loopexec(_,_,X,Y,_,_,_,_) when X > Y -> 830 {match,[]}; 831loopexec(Subject,RE,X,Y,Unicode,CRLF,Options, First) -> 832 case re:internal_run(Subject,RE,[{offset,X}]++Options,First) of 833 {error, Err} -> 834 throw({error,Err}); 835 nomatch -> 836 {match,[]}; 837 {match,[{A,B}|More]} -> 838 {match,Rest} = 839 case B>0 of 840 true -> 841 loopexec(Subject,RE,A+B,Y,Unicode,CRLF,Options,false); 842 false -> 843 {match,M} = 844 case re:internal_run(Subject,RE,[{offset,X},notempty_atstart, 845 anchored]++Options,false) of 846 nomatch -> 847 {match,[]}; 848 {match,Other} -> 849 {match,Other} 850 end, 851 NewA = case M of 852 [{_,NStep}|_] when NStep > 0 -> 853 A+NStep; 854 _ -> 855 forward(Subject,A,1,Unicode,CRLF) 856 end, 857 {match,MM} = loopexec(Subject,RE,NewA,Y, 858 Unicode,CRLF,Options,false), 859 case M of 860 [] -> 861 {match,MM}; 862 _ -> 863 {match,[M | MM]} 864 end 865 end, 866 {match,[[{A,B}|More] | Rest]} 867 end. 868 869forward(_Chal,A,0,_,_) -> 870 A; 871forward(Chal,A,N,U,true) -> 872 <<_:A/binary,Tl/binary>> = Chal, 873 case Tl of 874 <<$\r,$\n,_/binary>> -> 875 forward(Chal,A+2,N-1,U,true); 876 _ -> 877 forward2(Chal,A,N,U,true) 878 end; 879forward(Chal,A,N,U,false) -> 880 forward2(Chal,A,N,U,false). 881 882forward2(Chal,A,N,false,CRLF) -> 883 forward(Chal,A+1,N-1,false,CRLF); 884forward2(Chal,A,N,true,CRLF) -> 885 <<_:A/binary,Tl/binary>> = Chal, 886 Forw = case Tl of 887 <<1:1,1:1,0:1,_:5,_/binary>> -> 888 2; 889 <<1:1,1:1,1:1,0:1,_:4,_/binary>> -> 890 3; 891 <<1:1,1:1,1:1,1:1,0:1,_:3,_/binary>> -> 892 4; 893 _ -> 894 1 895 end, 896 forward(Chal,A+Forw,N-1,true,CRLF). 897 898copt(caseless) -> 899 true; 900copt(no_start_optimize) -> 901 true; 902copt(never_utf) -> 903 true; 904copt(ucp) -> 905 true; 906copt(dollar_endonly) -> 907 true; 908copt(dotall) -> 909 true; 910copt(extended) -> 911 true; 912copt(firstline) -> 913 true; 914copt(multiline) -> 915 true; 916copt(no_auto_capture) -> 917 true; 918copt(dupnames) -> 919 true; 920copt(ungreedy) -> 921 true; 922copt(unicode) -> 923 true; 924copt(_) -> 925 false. 926 927%bothopt({newline,_}) -> 928% true; 929%bothopt(anchored) -> 930% true; 931%bothopt(_) -> 932% false. 933 934runopt(notempty) -> 935 true; 936runopt(notempty_atstart) -> 937 true; 938runopt(notbol) -> 939 true; 940runopt(noteol) -> 941 true; 942runopt({offset,_}) -> 943 true; 944runopt({capture,_,_}) -> 945 true; 946runopt({capture,_}) -> 947 true; 948runopt(global) -> 949 true; 950runopt({match_limit,_}) -> 951 true; 952runopt({match_limit_recursion,_}) -> 953 true; 954runopt(_) -> 955 false. 956 957to_binary(Bin, _IsUnicode) when is_binary(Bin) -> 958 Bin; 959to_binary(Data, true) -> 960 unicode:characters_to_binary(Data,unicode); 961to_binary(Data, false) -> 962 iolist_to_binary(Data). 963 964badarg_with_cause(Args, Cause) -> 965 erlang:error(badarg, Args, [{error_info, #{module => erl_stdlib_errors, 966 cause => Cause}}]). 967 968badarg_with_info(Args) -> 969 erlang:error(badarg, Args, [{error_info, #{module => erl_stdlib_errors}}]). 970