1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2008-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20-module(re_SUITE). 21 22-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 23 init_per_group/2,end_per_group/2, pcre/1,compile_options/1, 24 run_options/1,combined_options/1,replace_autogen/1, 25 global_capture/1,replace_input_types/1,replace_return/1, 26 split_autogen/1,split_options/1,split_specials/1, 27 error_handling/1,pcre_cve_2008_2371/1,re_version/1, 28 pcre_compile_workspace_overflow/1,re_infinite_loop/1, 29 re_backwards_accented/1,opt_dupnames/1,opt_all_names/1,inspect/1, 30 opt_no_start_optimize/1,opt_never_utf/1,opt_ucp/1, 31 match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1, 32 yield_on_subject_validation/1, bad_utf8_subject/1, 33 error_info/1]). 34 35-include_lib("common_test/include/ct.hrl"). 36-include_lib("kernel/include/file.hrl"). 37 38suite() -> 39 [{ct_hooks,[ts_install_cth]}, 40 {timetrap,{minutes,3}}]. 41 42all() -> 43 [pcre, compile_options, run_options, combined_options, 44 replace_autogen, global_capture, replace_input_types, 45 replace_return, split_autogen, split_options, 46 split_specials, error_handling, pcre_cve_2008_2371, 47 pcre_compile_workspace_overflow, re_infinite_loop, 48 re_backwards_accented, opt_dupnames, opt_all_names, 49 inspect, opt_no_start_optimize,opt_never_utf,opt_ucp, 50 match_limit, sub_binaries, re_version, global_unicode_validation, 51 yield_on_subject_validation, bad_utf8_subject, 52 error_info]. 53 54groups() -> 55 []. 56 57init_per_suite(Config) -> 58 Config. 59 60end_per_suite(_Config) -> 61 ok. 62 63init_per_group(_GroupName, Config) -> 64 Config. 65 66end_per_group(_GroupName, Config) -> 67 Config. 68 69 70%% Run all applicable tests from the PCRE testsuites. 71pcre(Config) when is_list(Config) -> 72 RootDir = proplists:get_value(data_dir, Config), 73 Res = run_pcre_tests:test(RootDir), 74 0 = lists:sum([ X || {X,_,_} <- Res ]), 75 {comment,Res}. 76 77%% Test all documented compile options. 78compile_options(Config) when is_list(Config) -> 79 ok = ctest("ABDabcdABCD","abcd",[],true,{match,[{3,4}]}), 80 ok = ctest("ABDabcdABCD","abcd",[anchored],true,nomatch), 81 ok = ctest("ABDabcdABCD",".*abcd",[anchored],true,{match,[{0,7}]}), 82 ok = ctest("ABCabcdABC","ABCD",[],true,nomatch), 83 ok = ctest("ABCabcdABC","ABCD",[caseless],true,{match,[{3,4}]}), 84 ok = ctest("abcdABC\n","ABC$",[],true,{match,[{4,3}]}), 85 ok = ctest("abcdABC\n","ABC$",[dollar_endonly],true,nomatch), 86 ok = ctest("abcdABC\n","ABC.",[],true,nomatch), 87 ok = ctest("abcdABC\n","ABC.",[dotall],true,{match,[{4,4}]}), 88 ok = ctest("abcdABCD","ABC .",[],true,nomatch), 89 ok = ctest("abcdABCD","ABC .",[extended],true,{match,[{4,4}]}), 90 ok = ctest("abcd\nABCD","ABC",[],true,{match,[{5,3}]}), 91 ok = ctest("abcd\nABCD","ABC",[firstline],true,nomatch), 92 ok = ctest("abcd\nABCD","^ABC",[],true,nomatch), 93 ok = ctest("abcd\nABCD","^ABC",[multiline],true,{match,[{5,3}]}), 94 ok = ctest("abcdABCD","(ABC)",[],true,{match,[{4,3},{4,3}]}), 95 ok = ctest("abcdABCD","(ABC)",[no_auto_capture],true,{match,[{4,3}]}), 96 ok = ctest(notused,"(?<FOO>ABC)|(?<FOO>DEF)",[],false,notused), 97 ok = ctest("abcdABCD","(?<FOO>ABC)|(?<FOO>DEF)",[dupnames],true,{match,[{4,3},{4,3}]}), 98 ok = ctest("abcdABCDabcABCD","abcd.*D",[],true,{match,[{0,15}]}), 99 ok = ctest("abcdABCDabcABCD","abcd.*D",[ungreedy],true,{match,[{0,8}]}), 100 ok = ctest("abcdABCabcABC\nD","abcd.*D",[],true,nomatch), 101 ok = ctest("abcdABCabcABC\nD","abcd.*D",[{newline,cr}],true,{match,[{0,15}]}), 102 ok = ctest("abcdABCabcABC\rD","abcd.*D",[],true,{match,[{0,15}]}), 103 ok = ctest("abcdABCabcABC\rD","abcd.*D",[{newline,lf}],true,{match,[{0,15}]}), 104 ok = ctest("abcdABCabcd\r\n","abcd$",[{newline,lf}],true,nomatch), 105 ok = ctest("abcdABCabcd\r\n","abcd$",[{newline,cr}],true,nomatch), 106 ok = ctest("abcdABCabcd\r\n","abcd$",[{newline,crlf}],true,{match,[{7,4}]}), 107 108 ok = ctest("abcdABCabcd\r","abcd$",[{newline,crlf}],true,nomatch), 109 ok = ctest("abcdABCabcd\n","abcd$",[{newline,crlf}],true,nomatch), 110 ok = ctest("abcdABCabcd\r\n","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 111 112 ok = ctest("abcdABCabcd\r","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 113 ok = ctest("abcdABCabcd\n","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 114 ok. 115 116%% Test all documented run specific options. 117run_options(Config) when is_list(Config) -> 118 rtest("ABCabcdABC","abc",[],[],true), 119 rtest("ABCabcdABC","abc",[anchored],[],false), 120 %% Anchored in run overrides unanchored in compilation 121 rtest("ABCabcdABC","abc",[],[anchored],false), 122 123 rtest("","a?b?",[],[],true), 124 rtest("","a?b?",[],[notempty],false), 125 126 rtest("abc","^a",[],[],true), 127 rtest("abc","^a",[],[notbol],false), 128 rtest("ab\nc","^a",[multiline],[],true), 129 rtest("ab\nc","^a",[multiline],[notbol],false), 130 rtest("ab\nc","^c",[multiline],[notbol],true), 131 132 rtest("abc","c$",[],[],true), 133 rtest("abc","c$",[],[noteol],false), 134 135 rtest("ab\nc","b$",[multiline],[],true), 136 rtest("ab\nc","c$",[multiline],[],true), 137 rtest("ab\nc","b$",[multiline],[noteol],true), 138 rtest("ab\nc","c$",[multiline],[noteol],false), 139 140 rtest("abc","ab",[],[{offset,0}],true), 141 rtest("abc","ab",[],[{offset,1}],false), 142 143 rtest("abcdABCabcABC\nD","abcd.*D",[],[],false), 144 rtest("abcdABCabcABC\nD","abcd.*D",[],[{newline,cr}],true), 145 rtest("abcdABCabcABC\rD","abcd.*D",[],[],true), 146 rtest("abcdABCabcABC\rD","abcd.*D",[{newline,cr}],[{newline,lf}],true), 147 rtest("abcdABCabcd\r\n","abcd$",[],[{newline,lf}],false), 148 rtest("abcdABCabcd\r\n","abcd$",[],[{newline,cr}],false), 149 rtest("abcdABCabcd\r\n","abcd$",[],[{newline,crlf}],true), 150 151 rtest("abcdABCabcd\r","abcd$",[],[{newline,crlf}],false), 152 rtest("abcdABCabcd\n","abcd$",[],[{newline,crlf}],false), 153 rtest("abcdABCabcd\r\n","abcd$",[],[{newline,anycrlf}],true), 154 155 rtest("abcdABCabcd\r","abcd$",[],[{newline,anycrlf}],true), 156 rtest("abcdABCabcd\n","abcd$",[],[{newline,anycrlf}],true), 157 158 {ok,MP} = re:compile(".*(abcd).*"), 159 {match,[{0,10},{3,4}]} = re:run("ABCabcdABC",MP,[]), 160 {match,[{0,10},{3,4}]} = re:run("ABCabcdABC",MP,[{capture,all}]), 161 {match,[{0,10},{3,4}]} = re:run("ABCabcdABC",MP,[{capture,all,index}]), 162 {match,["ABCabcdABC","abcd"]} = re:run("ABCabcdABC",MP,[{capture,all,list}]), 163 {match,[<<"ABCabcdABC">>,<<"abcd">>]} = re:run("ABCabcdABC",MP,[{capture,all,binary}]), 164 {match,[{0,10}]} = re:run("ABCabcdABC",MP,[{capture,first}]), 165 {match,[{0,10}]} = re:run("ABCabcdABC",MP,[{capture,first,index}]), ?line {match,["ABCabcdABC"]} = re:run("ABCabcdABC",MP,[{capture,first,list}]), 166 {match,[<<"ABCabcdABC">>]} = re:run("ABCabcdABC",MP,[{capture,first,binary}]), 167 168 {match,[{3,4}]} = re:run("ABCabcdABC",MP,[{capture,all_but_first}]), 169 {match,[{3,4}]} = re:run("ABCabcdABC",MP,[{capture,all_but_first,index}]), 170 {match,["abcd"]} = re:run("ABCabcdABC",MP,[{capture,all_but_first,list}]), 171 {match,[<<"abcd">>]} = re:run("ABCabcdABC",MP,[{capture,all_but_first,binary}]), 172 173 match = re:run("ABCabcdABC",MP,[{capture,none}]), 174 match = re:run("ABCabcdABC",MP,[{capture,none,index}]), 175 match = re:run("ABCabcdABC",MP,[{capture,none,list}]), 176 match = re:run("ABCabcdABC",MP,[{capture,none,binary}]), 177 178 {ok,MP2} = re:compile(".*(?<FOO>abcd).*"), 179 {match,[{3,4}]} = re:run("ABCabcdABC",MP2,[{capture,[1]}]), 180 {match,[{3,4}]} = re:run("ABCabcdABC",MP2,[{capture,['FOO']}]), 181 {match,[{3,4}]} = re:run("ABCabcdABC",MP2,[{capture,["FOO"]}]), 182 {match,["abcd"]} = re:run("ABCabcdABC",MP2,[{capture,["FOO"],list}]), 183 {match,[<<"abcd">>]} = re:run("ABCabcdABC",MP2,[{capture,["FOO"],binary}]), 184 185 {match,[{-1,0}]} = re:run("ABCabcdABC",MP2,[{capture,[200]}]), 186 {match,[{-1,0}]} = re:run("ABCabcdABC",MP2,[{capture,['BAR']}]), 187 {match,[""]} = re:run("ABCabcdABC",MP2,[{capture,[200],list}]), 188 {match,[""]} = re:run("ABCabcdABC",MP2,[{capture,['BAR'],list}]), 189 {match,[<<>>]} = re:run("ABCabcdABC",MP2,[{capture,[200],binary}]), 190 {match,[<<>>]} = re:run("ABCabcdABC",MP2,[{capture,['BAR'],binary}]), 191 192 {ok, MP3} = re:compile(".*((?<FOO>abdd)|a(..d)).*"), 193 {match,[{0,10},{3,4},{-1,0},{4,3}]} = re:run("ABCabcdABC",MP3,[]), 194 {match,[{0,10},{3,4},{-1,0},{4,3}]} = re:run("ABCabcdABC",MP3,[{capture,all,index}]), 195 {match,[<<"ABCabcdABC">>,<<"abcd">>,<<>>,<<"bcd">>]} = re:run("ABCabcdABC",MP3,[{capture,all,binary}]), 196 {match,["ABCabcdABC","abcd",[],"bcd"]} = re:run("ABCabcdABC",MP3,[{capture,all,list}]), 197 ok. 198 199 200 201%% Test the version is retorned correctly 202re_version(_Config) -> 203 Version = re:version(), 204 {match,[Version]} = re:run(Version,"^[0-9]\\.[0-9]{2} 20[0-9]{2}-[0-9]{2}-[0-9]{2}",[{capture,all,binary}]), 205 ok. 206 207global_unicode_validation(Config) when is_list(Config) -> 208 %% Test that unicode validation of the subject is not done 209 %% for every match found... 210 Bin = binary:copy(<<"abc\n">>,100000), 211 {TimeAscii, _} = take_time(fun () -> 212 re:run(Bin, <<"b">>, [global]) 213 end), 214 {TimeUnicode, _} = take_time(fun () -> 215 re:run(Bin, <<"b">>, [unicode,global]) 216 end), 217 if TimeAscii == 0; TimeUnicode == 0 -> 218 {comment, "Not good enough resolution to compare results"}; 219 true -> 220 %% The time the operations takes should be in the 221 %% same order of magnitude. If validation of the 222 %% whole subject occurs for every match, the unicode 223 %% variant will take way longer time... 224 true = TimeUnicode div TimeAscii < 10 225 end. 226 227take_time(Fun) -> 228 Start = erlang:monotonic_time(nanosecond), 229 Res = Fun(), 230 End = erlang:monotonic_time(nanosecond), 231 {End-Start, Res}. 232 233yield_on_subject_validation(Config) when is_list(Config) -> 234 Go = make_ref(), 235 Bin = binary:copy(<<"abc\n">>,100000), 236 {P, M} = spawn_opt(fun () -> 237 receive Go -> ok end, 238 {match,[{1,1}]} = re:run(Bin, <<"b">>, [unicode]) 239 end, 240 [link, monitor]), 241 1 = erlang:trace(P, true, [running]), 242 P ! Go, 243 N = count_re_run_trap_out(P, M), 244 true = N >= 5, 245 ok. 246 247count_re_run_trap_out(P, M) when is_reference(M) -> 248 receive {'DOWN',M,process,P,normal} -> ok end, 249 TD = erlang:trace_delivered(P), 250 receive {trace_delivered, P, TD} -> ok end, 251 count_re_run_trap_out(P, 0); 252count_re_run_trap_out(P, N) when is_integer(N) -> 253 receive 254 {trace,P,out,{erlang,re_run_trap,3}} -> 255 count_re_run_trap_out(P, N+1) 256 after 0 -> 257 N 258 end. 259 260%% Test compile options given directly to run. 261combined_options(Config) when is_list(Config) -> 262 ok = crtest("ABDabcdABCD","abcd",[],true,{match,[{3,4}]}), 263 ok = crtest("ABDabcdABCD","abcd",[anchored],true,nomatch), 264 ok = crtest("ABDabcdABCD",".*abcd",[anchored],true,{match,[{0,7}]}), 265 ok = crtest("ABCabcdABC","ABCD",[],true,nomatch), 266 ok = crtest("ABCabcdABC","ABCD",[caseless],true,{match,[{3,4}]}), 267 ok = crtest("abcdABC\n","ABC$",[],true,{match,[{4,3}]}), 268 ok = crtest("abcdABC\n","ABC$",[dollar_endonly],true,nomatch), 269 ok = crtest("abcdABC\n","ABC.",[],true,nomatch), 270 ok = crtest("abcdABC\n","ABC.",[dotall],true,{match,[{4,4}]}), 271 ok = crtest("abcdABCD","ABC .",[],true,nomatch), 272 ok = crtest("abcdABCD","ABC .",[extended],true,{match,[{4,4}]}), 273 ok = crtest("abcd\nABCD","ABC",[],true,{match,[{5,3}]}), 274 ok = crtest("abcd\nABCD","ABC",[firstline],true,nomatch), 275 ok = crtest("abcd\nABCD","^ABC",[],true,nomatch), 276 ok = crtest("abcd\nABCD","^ABC",[multiline],true,{match,[{5,3}]}), 277 ok = crtest("abcdABCD","(ABC)",[],true,{match,[{4,3},{4,3}]}), 278 ok = crtest("abcdABCD","(ABC)",[no_auto_capture],true,{match,[{4,3}]}), 279 ok = crtest(notused,"(?<FOO>ABC)|(?<FOO>DEF)",[],false,notused), 280 ok = crtest("abcdABCD","(?<FOO>ABC)|(?<FOO>DEF)",[dupnames],true,{match,[{4,3},{4,3}]}), 281 ok = crtest("abcdABCDabcABCD","abcd.*D",[],true,{match,[{0,15}]}), 282 ok = crtest("abcdABCDabcABCD","abcd.*D",[ungreedy],true,{match,[{0,8}]}), 283 ok = ctest("abcdABCabcABC\nD","abcd.*D",[],true,nomatch), 284 ok = crtest("abcdABCabcABC\nD","abcd.*D",[{newline,cr}],true,{match,[{0,15}]}), 285 ok = crtest("abcdABCabcABC\rD","abcd.*D",[],true,{match,[{0,15}]}), 286 ok = crtest("abcdABCabcABC\rD","abcd.*D",[{newline,lf}],true,{match,[{0,15}]}), 287 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,lf}],true,nomatch), 288 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,cr}],true,nomatch), 289 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,crlf}],true,{match,[{7,4}]}), 290 291 ok = crtest("abcdABCabcd\r","abcd$",[{newline,crlf}],true,nomatch), 292 ok = crtest("abcdABCabcd\n","abcd$",[{newline,crlf}],true,nomatch), 293 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 294 295 ok = crtest("abcdABCabcd\r","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 296 ok = crtest("abcdABCabcd\n","abcd$",[{newline,anycrlf}],true,{match,[{7,4}]}), 297 298 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,anycrlf},{capture,all,list}],true,{match,["abcd"]}), 299 300 ok = crtest("abcdABCabcd\r","abcd$",[{newline,anycrlf},{capture,all,list}],true,{match,["abcd"]}), 301 302 ok = crtest("abcdABCabcd\n","abcd$",[{newline,anycrlf},{capture,all,list}],true,{match,["abcd"]}), 303 304 ok = crtest("abcdABCabcd\r\n","abcd$",[{newline,anycrlf},{capture,all,binary}],true,{match,[<<"abcd">>]}), 305 306 ok = crtest("abcdABCabcd\r","abcd$",[{newline,anycrlf},{capture,all,binary}],true,{match,[<<"abcd">>]}), 307 ok = crtest("abcdABCabcd\n","abcd$",[{newline,anycrlf},{capture,all,binary}],true,{match,[<<"abcd">>]}), 308 309 %% Check that unique run-options fail in compile only case: 310 {'EXIT',{badarg,_}} = (catch re:compile("abcd$",[{newline,anycrlf},{capture,all,binary}])), 311 {'EXIT',{badarg,_}} = (catch re:compile("abcd$",[{newline,anycrlf},{offset,3}])), 312 {'EXIT',{badarg,_}} = (catch re:compile("abcd$",[{newline,anycrlf},notempty])), 313 {'EXIT',{badarg,_}} = (catch re:compile("abcd$",[{newline,anycrlf},notbol])), 314 {'EXIT',{badarg,_}} = (catch re:compile("abcd$",[{newline,anycrlf},noteol])), 315 316 317 {match,_} = re:run("abcdABCabcd\r\n","abcd$",[{newline,crlf}]), 318 nomatch = re:run("abcdABCabcd\r\nefgh","abcd$",[{newline,crlf}]), 319 {match,_} = re:run("abcdABCabcd\r\nefgh","abcd$",[{newline,crlf},multiline]), 320 nomatch = re:run("abcdABCabcd\r\nefgh","efgh$",[{newline,crlf},multiline,noteol]), 321 {match,_} = re:run("abcdABCabcd\r\nefgh","abcd$",[{newline,crlf},multiline,noteol]), 322 {match,_} = re:run("abcdABCabcd\r\nefgh","^abcd",[{newline,crlf},multiline,noteol]), 323 nomatch = re:run("abcdABCabcd\r\nefgh","^abcd",[{newline,crlf},multiline,notbol]), 324 {match,_} = re:run("abcdABCabcd\r\nefgh","^efgh",[{newline,crlf},multiline,notbol]), 325 {match,_} = re:run("ABC\nD","[a-z]*",[{newline,crlf}]), 326 nomatch = re:run("ABC\nD","[a-z]*",[{newline,crlf},notempty]), 327 ok. 328 329%% Test replace with autogenerated erlang module. 330replace_autogen(Config) when is_list(Config) -> 331 re_testoutput1_replacement_test:run(), 332 ok. 333 334%% Test capture options together with global searching. 335global_capture(Config) when is_list(Config) -> 336 {match,[{3,4}]} = re:run("ABCabcdABC",".*(?<FOO>abcd).*",[{capture,[1]}]), 337 {match,[{10,4}]} = re:run("ABCabcdABCabcdA",".*(?<FOO>abcd).*",[{capture,[1]}]), 338 {match,[[{10,4}]]} = re:run("ABCabcdABCabcdA",".*(?<FOO>abcd).*",[global,{capture,[1]}]), 339 {match,[{3,4}]} = re:run("ABCabcdABC",".*(?<FOO>abcd).*",[{capture,['FOO']}]), 340 {match,[{10,4}]} = re:run("ABCabcdABCabcdA",".*(?<FOO>abcd).*",[{capture,['FOO']}]), 341 {match,[[{10,4}]]} = re:run("ABCabcdABCabcdA",".*(?<FOO>abcd).*",[global,{capture,['FOO']}]), 342 {match,[[{3,4},{3,4}],[{10,4},{10,4}]]} = re:run("ABCabcdABCabcdA","(?<FOO>abcd)",[global]), 343 {match,[[{3,4},{3,4}],[{10,4},{10,4}]]} = re:run("ABCabcdABCabcdA","(?<FOO>abcd)",[global,{capture,all}]), 344 {match,[[{3,4},{3,4}],[{10,4},{10,4}]]} = re:run("ABCabcdABCabcdA","(?<FOO>abcd)",[global,{capture,all,index}]), 345 {match,[[{3,4}],[{10,4}]]} = re:run("ABCabcdABCabcdA","(?<FOO>abcd)",[global,{capture,first}]), 346 {match,[[{3,4}],[{10,4}]]} = re:run("ABCabcdABCabcdA","(?<FOO>abcd)",[global,{capture,all_but_first}]), 347 {match,[[<<"bcd">>],[<<"bcd">>]]} = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,all_but_first,binary}]), 348 {match,[["bcd"],["bcd"]]} = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,all_but_first,list}]), 349 {match,[["abcd","bcd"],["abcd","bcd"]]} = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,all,list}]), 350 {match,[[<<"abcd">>,<<"bcd">>],[<<"abcd">>,<<"bcd">>]]} = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,all,binary}]), 351 {match,[[{3,4},{4,3}],[{10,4},{11,3}]]} = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,all,index}]), 352 match = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,none,index}]), 353 match = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,none,binary}]), 354 match = re:run("ABCabcdABCabcdA","a(?<FOO>bcd)",[global,{capture,none,list}]), 355 {match,[[<<195,133,98,99,100>>,<<"bcd">>],[<<"abcd">>,<<"bcd">>]]} = re:run("ABCÅbcdABCabcdA",".(?<FOO>bcd)",[global,{capture,all,binary},unicode]), 356 {match,[["Åbcd","bcd"],["abcd","bcd"]]} = re:run(<<"ABC",8#303,8#205,"bcdABCabcdA">>,".(?<FOO>bcd)",[global,{capture,all,list},unicode]), 357 {match,[["Åbcd","bcd"],["abcd","bcd"]]} = re:run("ABCÅbcdABCabcdA",".(?<FOO>bcd)",[global,{capture,all,list},unicode]), 358 {match,[[{3,5},{5,3}],[{11,4},{12,3}]]} = re:run("ABCÅbcdABCabcdA",".(?<FOO>bcd)",[global,{capture,all,index},unicode]), 359 ok. 360 361%% Test replace with different input types. 362replace_input_types(Config) when is_list(Config) -> 363 <<"abcd">> = re:replace("abcd","Z","X",[{return,binary},unicode]), 364 <<"abcd">> = re:replace("abcd","\x{400}","X",[{return,binary},unicode]), 365 <<"a",208,128,"cd">> = re:replace(<<"abcd">>,"b","\x{400}",[{return,binary},unicode]), 366 ok. 367 368%% Test return options of replace together with global searching. 369replace_return(Config) when is_list(Config) -> 370 {'EXIT',{badarg,_}} = (catch re:replace("na","(a","")), 371 ok = replacetest(<<"nisse">>,"i","a",[{return,binary}],<<"nasse">>), 372 ok = replacetest("ABC\305abcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary}],<<"ABCÅXABCXA">>), 373 ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,iodata}],[<<"ABCÅ">>,<<"X">>,<<"ABC">>,<<"X">>|<<"A">>]), 374 ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,list},unicode],"ABCÅXABCXA"), 375 ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[global,{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,88,65>>), 376 ok = replacetest("ABCÅabcdABCabcdA","a(?<FOO>bcd)","X",[{return,binary},unicode],<<65,66,67,195,133,88,65,66,67,97,98,99,100,65>>), 377 ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\9X",[{return,binary}],<<"iXk">>), 378 ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\10X",[{return,binary}],<<"jXk">>), 379 ok = replacetest("abcdefghijk","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\11X",[{return,binary}],<<"Xk">>), 380 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g9X",[{return,binary}],<<"9X1">>), 381 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g10X",[{return,binary}],<<"0X1">>), 382 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g11X",[{return,binary}],<<"X1">>), 383 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{9}7",[{return,binary}],<<"971">>), 384 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{10}7",[{return,binary}],<<"071">>), 385 ok = replacetest("12345678901","(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)","\\g{11}7",[{return,binary}],<<"71">>), 386 ok = replacetest("a\x{400}bcd","d","X",[global,{return,list},unicode],"a\x{400}bcX"), 387 ok = replacetest("a\x{400}bcd","d","X",[global,{return,binary},unicode],<<"a",208,128,"bcX">>), 388 ok = replacetest("a\x{400}bcd","Z","X",[global,{return,list},unicode],"a\x{400}bcd"), 389 ok = replacetest("a\x{400}bcd","Z","X",[global,{return,binary},unicode],<<"a",208,128,"bcd">>), 390 ok. 391 392rtest(Subj, RE, Copt, Ropt, true) -> 393 {ok,MP} = re:compile(RE,Copt), 394 {match,_} = re:run(Subj,MP,Ropt), 395 ok; 396rtest(Subj, RE, Copt, Ropt, false) -> 397 {ok,MP} = re:compile(RE,Copt), 398 nomatch = re:run(Subj,MP,Ropt), 399 ok. 400 401ctest(_,RE,Options,false,_) -> 402 case re:compile(RE,Options) of 403 {ok,_} -> 404 error; 405 {error,_} -> 406 ok 407 end; 408ctest(Subject,RE,Options,true,Result) -> 409 try 410 {ok, Prog} = re:compile(RE,Options), 411 Result = re:run(Subject,Prog,[]), 412 ok 413 catch 414 _:_ -> 415 error 416 end. 417crtest(_,RE,Options,false,_) -> 418 case (catch re:run("",RE,Options)) of 419 {'EXIT',{badarg,_}} -> 420 ok; 421 _ -> 422 error 423 end; 424crtest(Subject,RE,Options,true,Result) -> 425 try 426 Result = re:run(Subject,RE,Options), 427 ok 428 catch 429 _:_ -> 430 error 431 end. 432 433replacetest(Subject,RE,Replacement,Options,Result) -> 434 Result = re:replace(Subject,RE,Replacement,Options), 435 {CompileOptions,ReplaceOptions} = lists:partition(fun copt/1, Options), 436 {ok,MP} = re:compile(RE,CompileOptions), 437 Result = re:replace(Subject,MP,Replacement,ReplaceOptions), 438 ok. 439 440splittest(Subject,RE,Options,Result) -> 441 Result = re:split(Subject,RE,Options), 442 {CompileOptions,SplitOptions} = lists:partition(fun copt/1, Options), 443 {ok,MP} = re:compile(RE,CompileOptions), 444 Result = re:split(Subject,MP,SplitOptions), 445 ok. 446 447copt(caseless) -> true; 448copt(no_start_optimize) -> true; 449copt(never_utf) -> true; 450copt(ucp) -> true; 451copt(dollar_endonly) -> true; 452copt(dotall) -> true; 453copt(extended) -> true; 454copt(firstline) -> true; 455copt(multiline) -> true; 456copt(no_auto_capture) -> true; 457copt(dupnames) -> true; 458copt(ungreedy) -> true; 459copt(unicode) -> true; 460copt(_) -> false. 461 462%% Test split with autogenerated erlang module. 463split_autogen(Config) when is_list(Config) -> 464 re_testoutput1_split_test:run(), 465 ok. 466 467%% Test special options to split. 468split_options(Config) when is_list(Config) -> 469 ok = splittest("a b c ","( )",[group,trim],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]), 470 ok = splittest("a b c ","( )",[group,{parts,0}],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>]]), 471 ok = splittest("a b c ","( )",[{parts,infinity},group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]), 472 ok = splittest("a b c ","( )",[group],[[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<>>]]), 473 ok = splittest(" a b c d ","( +)",[group,trim],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]), 474 ok = splittest(" a b c d ","( +)",[{parts,0},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>]]), 475 ok = splittest(" a b c d ","( +)",[{parts,infinity},group],[[<<>>,<<" ">>],[<<"a">>,<<" ">>],[<<"b">>,<<" ">>],[<<"c">>,<<" ">>],[<<"d">>,<<" ">>],[<<>>]]), 476 ok = splittest("a b c d","( +)",[{parts,2},group],[[<<"a">>,<<" ">>],[<<"b c d">>]]), 477 ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,list},unicode],[[[967]," "],["b c d"]]), 478 ok = splittest([967]++" b c d","( +)",[{parts,2},group,{return,binary},unicode],[[<<207,135>>,<<" ">>],[<<"b c d">>]]), 479 {'EXIT',{badarg,_}} = (catch re:split([967]++" b c d","( +)",[{parts,2},group,{return,binary}])), 480 {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,-2}])), 481 {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{parts,banan}])), 482 {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,all}])), 483 {'EXIT',{badarg,_}} = (catch re:split("a b c d","( +)",[{capture,[],binary}])), 484 %% Parts 0 is equal to no parts specification (implicit strip) 485 ok = splittest("a b c d","( *)",[{parts,0},{return,list}],["a"," ","b"," ","c"," ","d"]), 486 ok. 487 488join([]) -> []; 489join([A]) -> [A]; 490join([H|T]) -> [H,<<":">>|join(T)]. 491 492%% Some special cases of split that are easy to get wrong. 493split_specials(Config) when is_list(Config) -> 494 %% More or less just to remember these icky cases 495 <<"::abd:f">> = 496 iolist_to_binary(join(re:split("abdf","^(?!(ab)de|x)(abd)(f)",[trim]))), 497 <<":abc2xyzabc3">> = 498 iolist_to_binary(join(re:split("abc1abc2xyzabc3","\\Aabc.",[trim]))), 499 ok. 500 501 502%% Test that errors are handled correctly by the erlang code. 503error_handling(_Config) -> 504 %% This test checks the exception tuples manufactured in the erlang 505 %% code to hide the trapping from the user at least when it comes to errors 506 507 %% The malformed precomiled RE is detected after 508 %% the trap to re:grun from grun, in the grun function clause 509 %% that handles precompiled expressions 510 {'EXIT',{badarg,[{re,run,["apa",{1,2,3,4},[global]],_}, 511 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 512 (catch re:run("apa",{1,2,3,4},[global])), 513 %% An invalid capture list will also cause a badarg late, 514 %% but with a non pre compiled RE, the exception should be thrown by the 515 %% grun function clause that handles RE's compiled implicitly by 516 %% the run/3 BIF before trapping. 517 {'EXIT',{badarg,[{re,run,["apa","p",[{capture,[1,{a}]},global]],_}, 518 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 519 (catch re:run("apa","p",[{capture,[1,{a}]},global])), 520 %% And so the case of a precompiled expression together with 521 %% a compile-option (binary and list subject): 522 {ok,RE} = re:compile("(p)"), 523 {match,[[{1,1},{1,1}]]} = re:run(<<"apa">>,RE,[global]), 524 {match,[[{1,1},{1,1}]]} = re:run("apa",RE,[global]), 525 {'EXIT',{badarg,[{re,run, 526 [<<"apa">>, 527 {re_pattern,1,0,_,_}, 528 [global,unicode]],_}, 529 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 530 (catch re:run(<<"apa">>,RE,[global,unicode])), 531 {'EXIT',{badarg,[{re,run, 532 ["apa", 533 {re_pattern,1,0,_,_}, 534 [global,unicode]],_}, 535 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 536 (catch re:run("apa",RE,[global,unicode])), 537 {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[])), 538 {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors]), 539 {'EXIT',{badarg,_}} = (catch re:run("apa","(p",[global])), 540 {error, {compile, {_,_}}} = re:run("apa","(p",[report_errors,global]), 541 %% Badly formed options 542 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,["global"])), 543 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{offset,-1}])), 544 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{offset,ett}])), 545 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{captore,[1,2],binary}])), 546 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{capture,[1,2],binary,list}])), 547 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{capture,[1,2],bunary}])), 548 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{capture,{1,2},binary}])), 549 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{newline,3}])), 550 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{newline,apa}])), 551 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{njuline,cr}])), 552 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{<<"newline">>,cr}])), 553 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[global|dupnames])), 554 {'EXIT',{badarg,_}} = (catch re:run([<<"ap">>|$a],RE,[])), % Not an IO-list 555 {'EXIT',{badarg,_}} = (catch re:compile([<<"ap">>|$a],[])), % Not an IO-list 556 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE,[{capture,[0|1],binary}])), 557 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE, 558 [{capture,[<<"apa">>|1],binary}])), 559 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,RE, 560 [{capture,[[<<"ap">>|$a]],binary}])), 561 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,[<<"(p">>|41],[])), 562 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,{re_pattern,3,0,0,[]},[])), 563 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,{re_pattern,3,0,0,<<"apa">>},[])), 564 {'EXIT',{badarg,_}} = (catch re:run(<<"apa">>,{re_pattern,3,0,0,<<"apa",3:2>>},[])), 565 {'EXIT',{badarg,_}} = (catch re:run(<<"apa",2:2>>,<<"(p)">>,[{capture,[0,1],binary}])), 566 <<_:4,Temp:3/binary,_:4>> = <<38,23,6,18>>, 567 {match,[{1,1},{1,1}]} = re:run(Temp,<<"(p)">>,[]), % Unaligned works 568 %% The replace errors: 569 {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[]],_}, 570 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 571 (catch re:replace("apa",{1,2,3,4},"X",[])), 572 {'EXIT',{badarg,[{re,replace,["apa",{1,2,3,4},"X",[global]],_}, 573 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 574 (catch re:replace("apa",{1,2,3,4},"X",[global])), 575 {'EXIT',{badarg,[{re,replace, 576 ["apa", 577 {re_pattern,1,0,_,_}, 578 "X", 579 [unicode]],_}, 580 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 581 (catch re:replace("apa",RE,"X",[unicode])), 582 <<"aXa">> = iolist_to_binary(re:replace("apa","p","X",[])), 583 {'EXIT',{badarg,[{re,replace, 584 ["apa","p","X",[report_errors]],_}, 585 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 586 (catch iolist_to_binary(re:replace("apa","p","X", 587 [report_errors]))), 588 {'EXIT',{badarg,[{re,replace, 589 ["apa","p","X",[{capture,all,binary}]],_}, 590 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 591 (catch iolist_to_binary(re:replace("apa","p","X", 592 [{capture,all,binary}]))), 593 {'EXIT',{badarg,[{re,replace, 594 ["apa","p","X",[{capture,all}]],_}, 595 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 596 (catch iolist_to_binary(re:replace("apa","p","X", 597 [{capture,all}]))), 598 {'EXIT',{badarg,[{re,replace, 599 ["apa","p","X",[{return,banana}]],_}, 600 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 601 (catch iolist_to_binary(re:replace("apa","p","X", 602 [{return,banana}]))), 603 {'EXIT',{badarg,_}} = (catch re:replace("apa","(p","X",[])), 604 %% Badarg, not compile error. 605 {'EXIT',{badarg,[{re,replace, 606 ["apa","(p","X",[{return,banana}]],_}, 607 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 608 (catch iolist_to_binary(re:replace("apa","(p","X", 609 [{return,banana}]))), 610 %% And the split errors: 611 [<<"a">>,<<"a">>] = (catch re:split("apa","p",[])), 612 [<<"a">>,<<"p">>,<<"a">>] = (catch re:split("apa",RE,[])), 613 {'EXIT',{badarg,[{re,split,["apa","p",[report_errors]],_}, 614 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 615 (catch re:split("apa","p",[report_errors])), 616 {'EXIT',{badarg,[{re,split,["apa","p",[global]],_}, 617 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 618 (catch re:split("apa","p",[global])), 619 {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all}]],_}, 620 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 621 (catch re:split("apa","p",[{capture,all}])), 622 {'EXIT',{badarg,[{re,split,["apa","p",[{capture,all,binary}]],_}, 623 {?MODULE, ?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 624 (catch re:split("apa","p",[{capture,all,binary}])), 625 {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4}],_}, 626 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 627 (catch re:split("apa",{1,2,3,4})), 628 {'EXIT',{badarg,[{re,split,["apa",{1,2,3,4},[]],_}, 629 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 630 (catch re:split("apa",{1,2,3,4},[])), 631 {'EXIT',{badarg,[{re,split, 632 ["apa", 633 RE, 634 [unicode]],_}, 635 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 636 (catch re:split("apa",RE,[unicode])), 637 {'EXIT',{badarg,[{re,split, 638 ["apa", 639 RE, 640 [{return,banana}]],_}, 641 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 642 (catch re:split("apa",RE,[{return,banana}])), 643 {'EXIT',{badarg,[{re,split, 644 ["apa", 645 RE, 646 [banana]],_}, 647 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 648 (catch re:split("apa",RE,[banana])), 649 {'EXIT',{badarg,_}} = (catch re:split("apa","(p")), 650 %%Exception on bad argument, not compilation error 651 {'EXIT',{badarg,[{re,split, 652 ["apa", 653 "(p", 654 [banana]],_}, 655 {?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY,_} | _]}} = 656 (catch re:split("apa","(p",[banana])), 657 ok. 658 659%% Fix as in http://vcs.pcre.org/viewvc?revision=360&view=revision 660pcre_cve_2008_2371(Config) when is_list(Config) -> 661 %% Make sure it doesn't crash the emulator. 662 re:compile(<<"(?i)[\xc3\xa9\xc3\xbd]|[\xc3\xa9\xc3\xbdA]">>, [unicode]), 663 ok. 664 665%% Patch from 666%% http://vcs.pcre.org/viewvc/code/trunk/pcre_compile.c?r1=504&r2=505&view=patch 667pcre_compile_workspace_overflow(Config) when is_list(Config) -> 668 N = 819, 669 ExpStr = "Got expected error: ", 670 case re:compile([lists:duplicate(N, $(), lists:duplicate(N, $))]) of 671 {error, {"regular expression is too complicated" = Str,799}} -> 672 {comment, ExpStr ++ Str}; 673 {error, {"parentheses are too deeply nested (stack check)" = Str, _No}} -> 674 {comment, ExpStr ++ Str}; 675 Other -> 676 ct:fail({unexpected, Other}) 677 end. 678 679%% Make sure matches that really loop infinitely actually fail. 680re_infinite_loop(Config) when is_list(Config) -> 681 Str = 682 "http:/www.flickr.com/slideShow/index.gne?group_id=&user_id=69845378@N0", 683 EMail_regex = "[a-z0-9!#$%&'*+/=?^_`{|}~-]+" 684 ++ "(\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*" 685 ++ "@.*([a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+" 686 ++ "([a-zA-Z]{2}|com|org|net|gov|mil" 687 ++ "|biz|info|mobi|name|aero|jobs|museum)", 688 nomatch = re:run(Str, EMail_regex), 689 nomatch = re:run(Str, EMail_regex, [global]), 690 {error,match_limit} = re:run(Str, EMail_regex,[report_errors]), 691 {error,match_limit} = re:run(Str, EMail_regex,[report_errors,global]), 692 ok. 693 694%% Check for nasty bug where accented graphemes can make PCRE back 695%% past beginning of subject. 696re_backwards_accented(Config) when is_list(Config) -> 697 match = re:run(<<65,204,128,65,204,128,97,98,99>>, 698 <<"\\X?abc">>, 699 [unicode,{capture,none}]), 700 ok. 701 702%% Check correct handling of dupnames option to re. 703opt_dupnames(Config) when is_list(Config) -> 704 Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"], 705 _ = [ begin 706 Short = lists:sublist(Day,3), 707 {match,[Short]} = 708 re:run(Day, 709 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" 710 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" 711 "(?<DN>Sat)(?:urday)?", 712 [dupnames, {capture, ['DN'], list}]) 713 end || Day <- Days ], 714 _ = [ begin 715 Short = list_to_binary(lists:sublist(Day,3)), 716 {match,[Short]} = 717 re:run(Day, 718 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" 719 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" 720 "(?<DN>Sat)(?:urday)?", 721 [dupnames, {capture, ['DN'], binary}]) 722 end || Day <- Days ], 723 _ = [ begin 724 {match,[{0,3}]} = 725 re:run(Day, 726 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" 727 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" 728 "(?<DN>Sat)(?:urday)?", 729 [dupnames, {capture, ['DN'], index}]) 730 end || Day <- Days ], 731 {match,[{0,1},{1,3},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" 732 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" 733 "(?<DN>Sat)(?:urday)?", 734 [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), 735 {match,[{-1,0},{0,3},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" 736 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" 737 "(?<DN>Sat)(?:urday)?", 738 [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), 739 nomatch = re:run("Wednsday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" 740 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" 741 "(?<DN>Sat)(?:urday)?", 742 [dupnames, {capture, ['Skrap','DN','Skrap2'],index}]), 743 {match,[<<>>]} = re:run("Subject","b",[dupnames,{capture,['B'],binary}]), 744 {match,[<<"S">>,<<"u">>,<<"b">>,<<"j">>,<<"e">>,<<"c">>, 745 <<"t">>,<<"I">>,<<"s">>,<<"M">>,<<"o">>,<<"r">>,<<"e">>, 746 <<"T">>,<<"h">>,<<"a">>,<<"n">>,<<"T">>,<<"e">>,<<"n">>]} = 747 re:run("SubjectIsMoreThanTen", 748 "(?<S>S)(?<u>u)(?<b>b)(?<j>j)(?<e>e)(?<c>c)(?<t>t)" 749 "(?<I>I)(?<s>s)(?<M>M)(?<o>o)(?<r>r)(?<e>e)(?<T>T)" 750 "(?<h>h)(?<a>a)(?<n>n)(?<T>T)(?<e>e)(?<n>n)", 751 [dupnames,{capture,['S','u','b','j','e','c','t', 752 'I','s','M','o','r','e','T', 753 'h','a','n','T','e','n'],binary}]), 754 {match,[<<"S">>,<<"u">>,<<"b">>,<<"j">>,<<"e">>,<<"c">>, 755 <<"t">>,<<"I">>,<<"s">>,<<"M">>,<<"o">>,<<"r">>,<<"e">>, 756 <<"T">>,<<"h">>,<<"a">>,<<"n">>,<<"T">>,<<"e">>,<<"n">>]} = 757 re:run("SubjectIsMoreThanTen", 758 "(?<S>S)(?<u>u)(?<b>b)(?<j>j)(?<e>e)(?<c>c)(?<t>t)" 759 "(?<I>I)(?<s>s)(?<M>M)(?<o>o)(?<r>r)(?<e>e)(?<T>T)" 760 "(?<h>h)(?<a>a)(?<n>n)(?<T>T)(?<e>e)(?<n>n)", 761 [dupnames, 762 {capture,all_but_first,list}, 763 {capture,['S','u','b','j','e','c','t', 764 'I','s','M','o','r','e','T', 765 'h','a','n','T','e','n'],binary}]), 766 {match,[<<"S">>,<<"u">>,<<"b">>,<<"j">>,<<"e">>,<<"c">>, 767 <<"t">>,<<"I">>,<<"s">>,<<"M">>,<<"o">>,<<"r">>,<<"e">>, 768 <<"T">>,<<"h">>,<<"a">>,<<"n">>,<<"T">>,<<"e">>,<<"n">>]} = 769 re:run("SubjectIsMoreThanTen", 770 "(?<S>S)(?<u>u)(?<b>b)(?<j>j)(?<e>e)(?<c>c)(?<t>t)" 771 "(?<I>I)(?<s>s)(?<M>M)(?<o>o)(?<r>r)(?<e>e)(?<T>T)" 772 "(?<h>h)(?<a>a)(?<n>n)(?<T>T)(?<e>e)(?<n>n)", 773 [dupnames, 774 {capture,["S","u","b","j","e","c","t", 775 "I","s","M","o","r","e","T", 776 "h","a","n","T","e","n"],binary}]), 777 {match,[<<"S">>,<<"u">>,<<"b">>,<<"j">>,<<"e">>,<<"c">>, 778 <<"t">>,<<"I">>,<<"s">>,<<"M">>,<<"o">>,<<"r">>,<<"e">>, 779 <<"T">>,<<"h">>,<<"a">>,<<"n">>,<<"T">>,<<"e">>,<<"n">>]} = 780 re:run("SubjectIsMoreThanTen", 781 "(?<S>S)(?<u>u)(?<b>b)(?<j>j)(?<e>e)(?<c>c)(?<t>t)" 782 "(?<I>I)(?<s>s)(?<M>M)(?<o>o)(?<r>r)(?<e>e)(?<T>T)" 783 "(?<h>h)(?<a>a)(?<n>n)(?<T>T)(?<e>e)(?<then>n)", 784 [dupnames, 785 {capture,["S","u","b","j","e","c","t", 786 "I","s","M","o","r","e","T", 787 "h","a","n","T","e","then"],binary}]), 788 ok. 789 790%% Test capturing of all_names. 791opt_all_names(Config) when is_list(Config) -> 792 Days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"], 793 {match,[{1,3},{0,1},{7,1}]} = re:run("SMondayX","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" 794 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" 795 "(?<DN>Sat)(?:urday)?", 796 [dupnames, {capture, all_names,index}]), 797 {match,[{0,3},{-1,0},{-1,0}]} = re:run("Wednesday","(?<Skrap>.)(?<DN>Mon|Fri|Sun)(?:day)?(?<Skrap2>.)|" 798 "(?<DN>Tue)(?:sday)?|(?<DN>Wed)nesday|(?<DN>Thu)(?:rsday)?|" 799 "(?<DN>Sat)(?:urday)?", 800 [dupnames, {capture, all_names,index}]), 801 802 _ = [ begin 803 {match,[{0,3}]} = 804 re:run(Day, 805 "(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|" 806 "(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|" 807 "(?<DN>Sat)(?:urday)?", 808 [dupnames, {capture, all_names, index}]) 809 end || Day <- Days ], 810 _ = [ begin 811 match = 812 re:run(Day, 813 "(Mon|Fri|Sun)(?:day)?|(Tue)(?:sday)?|" 814 "(Wed)(?:nesday)?|(Thu)(?:rsday)?|" 815 "(Sat)(?:urday)?", 816 [dupnames, {capture, all_names, index}]) 817 end || Day <- Days ], 818 {match,[{0,1},{-1,0},{-1,0}]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), 819 {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), 820 {match,[{-1,0},{-1,0},{0,1}]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, index}]), 821 {match,[<<"A">>,<<>>,<<>>]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), 822 {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), 823 {match,[<<>>,<<>>,<<"C">>]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, binary}]), 824 {match,["A",[],[]]} = re:run("A","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), 825 {match,[[],"B",[]]} = re:run("B","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), 826 {match,[[],[],"C"]} = re:run("C","(?<A>A)|(?<B>B)|(?<C>C)",[{capture, all_names, list}]), 827 {match,[{-1,0},{-1,0},{0,1}]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), 828 {match,[{-1,0},{0,1},{-1,0}]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), 829 {match,[{0,1},{-1,0},{-1,0}]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, index}]), 830 {match,[<<>>,<<>>,<<"A">>]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), 831 {match,[<<>>,<<>>,<<"A">>]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_but_first, binary},{capture, all_names, binary}]), 832 {match,[<<>>,<<"B">>,<<>>]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), 833 {match,[<<"C">>,<<>>,<<>>]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, binary}]), 834 {match,[[],[],"A"]} = re:run("A","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), 835 {match,[[],"B",[]]} = re:run("B","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), 836 {match,["C",[],[]]} = re:run("C","(?<C>A)|(?<B>B)|(?<A>C)",[{capture, all_names, list}]), 837 {match,[[<<>>,<<>>,<<"C">>], 838 [<<>>,<<>>,<<"C">>], 839 [<<>>,<<>>,<<"C">>]]} = re:run("CCC","(?<A>A)|(?<B>B)|(?<C>C)", 840 [global,{capture, all_names, binary}]), 841 {match,[[<<"C">>,<<>>], 842 [<<>>,<<"B">>], 843 [<<"C">>,<<>>]]} = re:run("CBC","(?<A>A)|(?<B>B)|(?<A>C)", 844 [global,dupnames,{capture, all_names, binary}]), 845 {match,[[]]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), 846 {match,["D"]} = re:run("ABCDE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), 847 {match,["F"]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A'],list}]), 848 {match,["F",[]]} = re:run("ABCFE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]), 849 {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,['A','B'],list}]), 850 {match,[[],"E"]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,list}]), 851 {match,[{-1,0},{3,1}]} = re:run("ABCE","(?<A>D)|(?<B>E)|(?<A>F)",[dupnames,{capture,all_names,index}]), 852 match = re:run("Subject","b",[dupnames,{capture,all_names,binary}]), 853 {match,[<<"I">>,<<"M">>,<<"S">>,<<"T">>,<<"a">>,<<"b">>, 854 <<"c">>,<<"e">>,<<"h">>,<<"j">>,<<"n">>,<<"o">>,<<"r">>, 855 <<"s">>,<<"t">>,<<"u">>]} = 856 re:run("SubjectIsMoreThanTen","(?<S>S)(?<u>u)(?<b>b)(?<j>j)" 857 "(?<e>e)(?<c>c)(?<t>t)(?<I>I)(?<s>s)(?<M>M)(?<o>o)(?<r>r)" 858 "(?<e>e)(?<T>T)(?<h>h)(?<a>a)(?<n>n)(?<T>T)(?<e>e)(?<n>n)", 859 [dupnames,{capture,all_names,binary}]), 860 ok. 861 862%% Test the minimal inspect function. 863inspect(Config) when is_list(Config)-> 864 {ok,MP} = re:compile("(?<A>A)|(?<B>B)|(?<C>C)."), 865 {namelist,[<<"A">>,<<"B">>,<<"C">>]} = re:inspect(MP,namelist), 866 {ok,MPD} = re:compile("(?<A>A)|(?<B>B)|(?<A>C).",[dupnames]), 867 {namelist,[<<"A">>,<<"B">>]} = re:inspect(MPD,namelist), 868 {ok,MPN} = re:compile("(A)|(B)|(C)."), 869 {namelist,[]} = re:inspect(MPN,namelist), 870 {'EXIT',{badarg,_}} = (catch re:inspect(MPD,namelistk)), 871 {'EXIT',{badarg,_}} = (catch re:inspect({re_pattern,3,0,0,<<"kalle">>},namelist)), 872 {'EXIT',{badarg,_}} = (catch re:inspect({re_pattern,3,0,0,<<"kalle",2:2>>},namelist)), 873 ok. 874 875%% Test that the no_start_optimize compilation flag works. 876opt_no_start_optimize(Config) when is_list(Config) -> 877 {match, [{3,3}]} = re:run("DEFABC","(*COMMIT)ABC",[]), % Start optimization makes this result wrong! 878 nomatch = re:run("DEFABC","(*COMMIT)ABC",[no_start_optimize]), % This is the correct result... 879 ok. 880 881%% Check that the never_utf option works. 882opt_never_utf(Config) when is_list(Config) -> 883 {match,[{0,3}]} = re:run("ABC","ABC",[never_utf]), 884 {match,[{0,3}]} = re:run("ABC","(*UTF)ABC",[]), 885 {ok,_} = re:compile("(*UTF)ABC"), 886 {ok,_} = re:compile("(*UTF)ABC",[unicode]), 887 {ok,_} = re:compile("(*UTF8)ABC"), 888 {'EXIT',{badarg,_}} = (catch re:run("ABC","ABC",[unicode,never_utf])), 889 {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF)ABC",[never_utf])), 890 {'EXIT',{badarg,_}} = (catch re:run("ABC","(*UTF8)ABC",[never_utf])), 891 {error,_} = (catch re:compile("ABC",[unicode,never_utf])), 892 {error,_} = (catch re:compile("(*UTF)ABC",[never_utf])), 893 {error,_} = (catch re:compile("(*UTF8)ABC",[never_utf])), 894 ok. 895 896%% Check that the ucp option is passed to PCRE. 897opt_ucp(Config) when is_list(Config) -> 898 {match,[{0,1}]} = re:run([$a],"\\w",[unicode]), 899 {match,[{0,2}]} = re:run([229],"\\w",[unicode]), % Latin1 works without UCP, as we have a default 900 %% Latin1 table 901 nomatch = re:run([1024],"\\w",[unicode]), % Latin1 word characters only, 1024 is not latin1 902 {match,[{0,2}]} = re:run([1024],"\\w",[unicode,ucp]), % Any Unicode word character works with 'ucp' 903 ok. 904 905%% Check that the match_limit and match_limit_recursion options work. 906match_limit(Config) when is_list(Config) -> 907 nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[]), 908 nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}]), 909 nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}]), 910 nomatch = re:run("aaaaaaaaaaaaaz","(a+)*zz",[report_errors]), 911 {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, 912 report_errors]), 913 {error,match_limit_recursion} = 914 re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, 915 report_errors]), 916 {error,match_limit} = re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit,3000}, 917 report_errors,global]), 918 {error,match_limit_recursion} = 919 re:run("aaaaaaaaaaaaaz","(a+)*zz",[{match_limit_recursion,10}, 920 report_errors,global]), 921 ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", 922 [{match_limit_recursion,10},{return,list}]), 923 ["aaaaaaaaaaaaaz"] = re:split("aaaaaaaaaaaaaz","(a+)*zz", 924 [{match_limit,3000},{return,list}]), 925 "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", 926 [{match_limit_recursion,10},{return,list}]), 927 "aaaaaaaaaaaaaz" = re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", 928 [{match_limit,3000},{return,list}]), 929 {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", 930 [{match_limit_recursion,-1},{return,list}])), 931 {'EXIT', {badarg,_}} = (catch re:replace("aaaaaaaaaaaaaz","(a+)*zz","!", 932 [{match_limit,-1},{return,list}])), 933 {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", 934 [{match_limit_recursion,-1}, 935 report_errors,global])), 936 {'EXIT', {badarg,_}} = (catch re:run("aaaaaaaaaaaaaz","(a+)*zz", 937 [{match_limit,-1}, 938 report_errors,global])), 939 ok. 940%% Test that we get sub-binaries if subject is a binary and we capture 941%% binaries. 942sub_binaries(Config) when is_list(Config) -> 943 %% The GC can auto-convert tiny sub-binaries to heap binaries, so we 944 %% extract large sequences to make the test more stable. 945 Bin = << <<I>> || I <- lists:seq(1, 4096) >>, 946 {match,[B,C]}=re:run(Bin,"a(.+)$",[{capture,all,binary}]), 947 true = byte_size(B) =/= byte_size(C), 948 4096 = binary:referenced_byte_size(B), 949 4096 = binary:referenced_byte_size(C), 950 {match,[D]}=re:run(Bin,"a(.+)$",[{capture,[1],binary}]), 951 4096 = binary:referenced_byte_size(D), 952 ok. 953 954bad_utf8_subject(Config) when is_list(Config) -> 955 %% OTP-16553: re:run() did not badarg 956 %% if both pattern and subject was binaries 957 %% even though subject contained illegal 958 %% utf8... 959 960 nomatch = re:run(<<255,255,255>>, <<"a">>, []), 961 nomatch = re:run(<<255,255,255>>, "a", []), 962 nomatch = re:run(<<"aaa">>, <<255>>, []), 963 nomatch = re:run(<<"aaa">>, [255], []), 964 {match,[{0,1}]} = re:run(<<255,255,255>>, <<255>>, []), 965 {match,[{0,1}]} = re:run(<<255,255,255>>, [255], []), 966 %% Badarg on illegal utf8 in subject as of OTP 23... 967 try 968 re:run(<<255,255,255>>, <<"a">>, [unicode]), 969 error(unexpected) 970 catch 971 error:badarg -> 972 ok 973 end, 974 try 975 re:run(<<255,255,255>>, "a", [unicode]), 976 error(unexpected) 977 catch 978 error:badarg -> 979 ok 980 end, 981 try 982 re:run(<<"aaa">>, <<255>>, [unicode]), 983 error(unexpected) 984 catch 985 error:badarg -> 986 ok 987 end, 988 nomatch = re:run(<<"aaa">>, [255], [unicode]), 989 try 990 re:run(<<255,255,255>>, <<255>>, [unicode]), 991 error(unexpected) 992 catch 993 error:badarg -> 994 ok 995 end, 996 try 997 re:run(<<255,255,255>>, [255], [unicode]), 998 error(unexpected) 999 catch 1000 error:badarg -> 1001 ok 1002 end. 1003 1004error_info(_Config) -> 1005 BadRegexp = {re_pattern,0,0,0,<<"xyz">>}, 1006 {ok,GoodRegexp} = re:compile(".*"), 1007 1008 L = [{compile, [not_iodata]}, 1009 {compile, [not_iodata, not_list],[{1,".*"},{2,".*"}]}, 1010 {compile, [<<".*">>, [a|b]]}, 1011 {compile, [<<".*">>, [bad_option]]}, 1012 {compile, [{a,b}, [bad_option]],[{1,".*"},{2,".*"}]}, 1013 1014 {grun, 3}, %Internal. 1015 1016 {inspect,[BadRegexp, namelist]}, 1017 {inspect,["", namelist]}, 1018 {inspect,[GoodRegexp, 999]}, 1019 {inspect,[GoodRegexp, bad_inspect_item]}, 1020 1021 {internal_run, 4}, %Internal. 1022 1023 {replace, [{a,b}, {x,y}, {z,z}],[{1,".*"},{2,".*"},{3,".*"}]}, 1024 {replace, [{a,b}, BadRegexp, {z,z}],[{1,".*"},{2,".*"},{3,".*"}]}, 1025 1026 {replace, [{a,b}, {x,y}, {z,z}, [a|b]],[{1,".*"},{2,".*"},{3,".*"},{4,".*"}]}, 1027 {replace, [{a,b}, BadRegexp, [bad_option]],[{1,".*"},{2,".*"},{3,".*"}]}, 1028 {replace, ["", "", {z,z}, not_a_list],[{3,".*"},{4,".*"}]}, 1029 1030 {run, [{a,b}, {x,y}],[{1,".*"},{2,".*"}]}, 1031 {run, [{a,b}, ".*"]}, 1032 {run, ["abc", {x,y}]}, 1033 {run, ["abc", BadRegexp]}, 1034 1035 {run, [{a,b}, {x,y}, []],[{1,".*"},{2,".*"}]}, 1036 {run, ["abc", BadRegexp, []]}, 1037 {run, [{a,b}, {x,y}, [a|b]],[{1,".*"},{2,".*"},{3,".*"}]}, 1038 {run, [{a,b}, ".*", bad_options],[{1,".*"},{3,".*"}]}, 1039 {run, ["abc", {x,y}, [bad_option]],[{2,".*"},{3,".*"}]}, 1040 {run, ["abc", BadRegexp, 9999],[{2,".*"},{3,".*"}]}, 1041 1042 {split, ["abc", BadRegexp]}, 1043 {split, [{a,b}, ".*"]}, 1044 1045 {split, ["abc", BadRegexp, [a|b]],[{2,".*"},{3,".*"}]}, 1046 {split, [{a,b}, ".*", [bad_option]]}, 1047 1048 {ucompile, 2}, %Internal. 1049 {urun, 3} %Internal. 1050 ], 1051 error_info_lib:test_error_info(re, L). 1052