1(**************************************************************************) 2(* *) 3(* OCaml *) 4(* *) 5(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 6(* *) 7(* Copyright 1996 Institut National de Recherche en Informatique et *) 8(* en Automatique. *) 9(* *) 10(* All rights reserved. This file is distributed under the terms of *) 11(* the GNU Lesser General Public License version 2.1, with the *) 12(* special exception on linking described in the file LICENSE. *) 13(* *) 14(**************************************************************************) 15 16(* In this module, [@ocaml.warning "-3"] is used in several places 17 that use deprecated functions to preserve legacy behavior. 18 It overrides -w @3 given on the command line. *) 19 20(** String utilities *) 21 22let string_before s n = String.sub s 0 n 23 24let string_after s n = String.sub s n (String.length s - n) 25 26let first_chars s n = String.sub s 0 n 27 28let last_chars s n = String.sub s (String.length s - n) n 29 30(** Representation of character sets **) 31 32module Charset = 33 struct 34 type t = bytes (* of length 32 *) 35 36 (*let empty = Bytes.make 32 '\000'*) 37 let full = Bytes.make 32 '\255' 38 39 let make_empty () = Bytes.make 32 '\000' 40 41 let add s c = 42 let i = Char.code c in 43 Bytes.set s (i lsr 3) 44 (Char.chr (Char.code (Bytes.get s (i lsr 3)) 45 lor (1 lsl (i land 7)))) 46 47 let add_range s c1 c2 = 48 for i = Char.code c1 to Char.code c2 do add s (Char.chr i) done 49 50 let singleton c = 51 let s = make_empty () in add s c; s 52 53 (*let range c1 c2 = 54 let s = make_empty () in add_range s c1 c2; s 55 *) 56 let complement s = 57 let r = Bytes.create 32 in 58 for i = 0 to 31 do 59 Bytes.set r i (Char.chr(Char.code (Bytes.get s i) lxor 0xFF)) 60 done; 61 r 62 63 let union s1 s2 = 64 let r = Bytes.create 32 in 65 for i = 0 to 31 do 66 Bytes.set r i (Char.chr(Char.code (Bytes.get s1 i) 67 lor Char.code (Bytes.get s2 i))) 68 done; 69 r 70 71 let disjoint s1 s2 = 72 try 73 for i = 0 to 31 do 74 if Char.code (Bytes.get s1 i) land Char.code (Bytes.get s2 i) 75 <> 0 76 then raise Exit 77 done; 78 true 79 with Exit -> 80 false 81 82 let iter fn s = 83 for i = 0 to 31 do 84 let c = Char.code (Bytes.get s i) in 85 if c <> 0 then 86 for j = 0 to 7 do 87 if c land (1 lsl j) <> 0 then fn (Char.chr ((i lsl 3) + j)) 88 done 89 done 90 91 let expand s = 92 let r = Bytes.make 256 '\000' in 93 iter (fun c -> Bytes.set r (Char.code c) '\001') s; 94 r 95 96 let fold_case s = 97 (let r = make_empty() in 98 iter (fun c -> add r (Char.lowercase c); add r (Char.uppercase c)) s; 99 r)[@ocaml.warning "-3"] 100 101 end 102 103(** Abstract syntax tree for regular expressions *) 104 105type re_syntax = 106 Char of char 107 | String of string 108 | CharClass of Charset.t * bool (* true = complemented, false = normal *) 109 | Seq of re_syntax list 110 | Alt of re_syntax * re_syntax 111 | Star of re_syntax 112 | Plus of re_syntax 113 | Option of re_syntax 114 | Group of int * re_syntax 115 | Refgroup of int 116 | Bol 117 | Eol 118 | Wordboundary 119 120(** Representation of compiled regular expressions *) 121 122type regexp = { 123 prog: int array; (* bytecode instructions *) 124 cpool: string array; (* constant pool (string literals) *) 125 normtable: string; (* case folding table (if any) *) 126 numgroups: int; (* number of \(...\) groups *) 127 numregisters: int; (* number of nullable Star or Plus *) 128 startchars: int (* index of set of starting chars, or -1 if none *) 129} 130 131(** Opcodes for bytecode instructions; see strstubs.c for description *) 132 133let op_CHAR = 0 134let op_CHARNORM = 1 135let op_STRING = 2 136let op_STRINGNORM = 3 137let op_CHARCLASS = 4 138let op_BOL = 5 139let op_EOL = 6 140let op_WORDBOUNDARY = 7 141let op_BEGGROUP = 8 142let op_ENDGROUP = 9 143let op_REFGROUP = 10 144let op_ACCEPT = 11 145let op_SIMPLEOPT = 12 146let op_SIMPLESTAR = 13 147let op_SIMPLEPLUS = 14 148let op_GOTO = 15 149let op_PUSHBACK = 16 150let op_SETMARK = 17 151let op_CHECKPROGRESS = 18 152 153(* Encoding of bytecode instructions *) 154 155let instr opc arg = opc lor (arg lsl 8) 156 157(* Computing relative displacements for GOTO and PUSHBACK instructions *) 158 159let displ dest from = dest - from - 1 160 161(** Compilation of a regular expression *) 162 163(* Determine if a regexp can match the empty string *) 164 165let rec is_nullable = function 166 Char _ -> false 167 | String s -> s = "" 168 | CharClass _ -> false 169 | Seq rl -> List.for_all is_nullable rl 170 | Alt (r1, r2) -> is_nullable r1 || is_nullable r2 171 | Star _ -> true 172 | Plus r -> is_nullable r 173 | Option _ -> true 174 | Group(_, r) -> is_nullable r 175 | Refgroup _ -> true 176 | Bol -> true 177 | Eol -> true 178 | Wordboundary -> true 179 180(* first r returns a set of characters C such that: 181 for all string s, s matches r => the first character of s is in C. 182 For convenience, return Charset.full if r is nullable. *) 183 184let rec first = function 185 Char c -> Charset.singleton c 186 | String s -> if s = "" then Charset.full else Charset.singleton s.[0] 187 | CharClass(cl, cmpl) -> if cmpl then Charset.complement cl else cl 188 | Seq rl -> first_seq rl 189 | Alt (r1, r2) -> Charset.union (first r1) (first r2) 190 | Star _ -> Charset.full 191 | Plus r -> first r 192 | Option _ -> Charset.full 193 | Group(_, r) -> first r 194 | Refgroup _ -> Charset.full 195 | Bol -> Charset.full 196 | Eol -> Charset.full 197 | Wordboundary -> Charset.full 198 199and first_seq = function 200 [] -> Charset.full 201 | (Bol | Eol | Wordboundary) :: rl -> first_seq rl 202 | Star r :: rl -> Charset.union (first r) (first_seq rl) 203 | Option r :: rl -> Charset.union (first r) (first_seq rl) 204 | r :: _ -> first r 205 206(* Transform a Char or CharClass regexp into a character class *) 207 208let charclass_of_regexp fold_case re = 209 let (cl1, compl) = 210 match re with 211 | Char c -> (Charset.singleton c, false) 212 | CharClass(cl, compl) -> (cl, compl) 213 | _ -> assert false in 214 let cl2 = if fold_case then Charset.fold_case cl1 else cl1 in 215 Bytes.to_string (if compl then Charset.complement cl2 else cl2) 216 217(* The case fold table: maps characters to their lowercase equivalent *) 218 219let fold_case_table = 220 (let t = Bytes.create 256 in 221 for i = 0 to 255 do Bytes.set t i (Char.lowercase(Char.chr i)) done; 222 Bytes.to_string t)[@ocaml.warning "-3"] 223 224module StringMap = 225 Map.Make(struct type t = string let compare (x:t) y = compare x y end) 226 227(* Compilation of a regular expression *) 228 229let compile fold_case re = 230 231 (* Instruction buffering *) 232 let prog = ref (Array.make 32 0) 233 and progpos = ref 0 234 and cpool = ref StringMap.empty 235 and cpoolpos = ref 0 236 and numgroups = ref 1 237 and numregs = ref 0 in 238 (* Add a new instruction *) 239 let emit_instr opc arg = 240 if !progpos >= Array.length !prog then begin 241 let newlen = ref (Array.length !prog) in 242 while !progpos >= !newlen do newlen := !newlen * 2 done; 243 let nprog = Array.make !newlen 0 in 244 Array.blit !prog 0 nprog 0 (Array.length !prog); 245 prog := nprog 246 end; 247 (!prog).(!progpos) <- (instr opc arg); 248 incr progpos in 249 (* Reserve an instruction slot and return its position *) 250 let emit_hole () = 251 let p = !progpos in emit_instr op_CHAR 0; p in 252 (* Fill a reserved instruction slot with a GOTO or PUSHBACK instruction *) 253 let patch_instr pos opc dest = 254 (!prog).(pos) <- (instr opc (displ dest pos)) in 255 (* Return the cpool index for the given string, adding it if not 256 already there *) 257 let cpool_index s = 258 try 259 StringMap.find s !cpool 260 with Not_found -> 261 let p = !cpoolpos in 262 cpool := StringMap.add s p !cpool; 263 incr cpoolpos; 264 p in 265 (* Allocate fresh register if regexp is nullable *) 266 let allocate_register_if_nullable r = 267 if is_nullable r then begin 268 let n = !numregs in 269 if n >= 64 then failwith "too many r* or r+ where r is nullable"; 270 incr numregs; 271 n 272 end else 273 -1 in 274 (* Main recursive compilation function *) 275 let rec emit_code = function 276 Char c -> 277 if fold_case then 278 emit_instr op_CHARNORM (Char.code (Char.lowercase c)) 279 [@ocaml.warning "-3"] 280 else 281 emit_instr op_CHAR (Char.code c) 282 | String s -> 283 begin match String.length s with 284 0 -> () 285 | 1 -> 286 if fold_case then 287 emit_instr op_CHARNORM (Char.code (Char.lowercase s.[0])) 288 [@ocaml.warning "-3"] 289 else 290 emit_instr op_CHAR (Char.code s.[0]) 291 | _ -> 292 try 293 (* null characters are not accepted by the STRING* instructions; 294 if one is found, split string at null character *) 295 let i = String.index s '\000' in 296 emit_code (String (string_before s i)); 297 emit_instr op_CHAR 0; 298 emit_code (String (string_after s (i+1))) 299 with Not_found -> 300 if fold_case then 301 emit_instr op_STRINGNORM (cpool_index (String.lowercase s)) 302 [@ocaml.warning "-3"] 303 else 304 emit_instr op_STRING (cpool_index s) 305 end 306 | CharClass(cl, compl) -> 307 let cl1 = if fold_case then Charset.fold_case cl else cl in 308 let cl2 = if compl then Charset.complement cl1 else cl1 in 309 emit_instr op_CHARCLASS (cpool_index (Bytes.to_string cl2)) 310 | Seq rl -> 311 emit_seq_code rl 312 | Alt(r1, r2) -> 313 (* PUSHBACK lbl1 314 <match r1> 315 GOTO lbl2 316 lbl1: <match r2> 317 lbl2: ... *) 318 let pos_pushback = emit_hole() in 319 emit_code r1; 320 let pos_goto_end = emit_hole() in 321 let lbl1 = !progpos in 322 emit_code r2; 323 let lbl2 = !progpos in 324 patch_instr pos_pushback op_PUSHBACK lbl1; 325 patch_instr pos_goto_end op_GOTO lbl2 326 | Star r -> 327 (* Implement longest match semantics for compatibility with old Str *) 328 (* General translation: 329 lbl1: PUSHBACK lbl2 330 SETMARK regno 331 <match r> 332 CHECKPROGRESS regno 333 GOTO lbl1 334 lbl2: 335 If r cannot match the empty string, code can be simplified: 336 lbl1: PUSHBACK lbl2 337 <match r> 338 GOTO lbl1 339 lbl2: 340 *) 341 let regno = allocate_register_if_nullable r in 342 let lbl1 = emit_hole() in 343 if regno >= 0 then emit_instr op_SETMARK regno; 344 emit_code r; 345 if regno >= 0 then emit_instr op_CHECKPROGRESS regno; 346 emit_instr op_GOTO (displ lbl1 !progpos); 347 let lbl2 = !progpos in 348 patch_instr lbl1 op_PUSHBACK lbl2 349 | Plus r -> 350 (* Implement longest match semantics for compatibility with old Str *) 351 (* General translation: 352 lbl1: <match r> 353 CHECKPROGRESS regno 354 PUSHBACK lbl2 355 SETMARK regno 356 GOTO lbl1 357 lbl2: 358 If r cannot match the empty string, code can be simplified: 359 lbl1: <match r> 360 PUSHBACK lbl2 361 GOTO_PLUS lbl1 362 lbl2: 363 *) 364 let regno = allocate_register_if_nullable r in 365 let lbl1 = !progpos in 366 emit_code r; 367 if regno >= 0 then emit_instr op_CHECKPROGRESS regno; 368 let pos_pushback = emit_hole() in 369 if regno >= 0 then emit_instr op_SETMARK regno; 370 emit_instr op_GOTO (displ lbl1 !progpos); 371 let lbl2 = !progpos in 372 patch_instr pos_pushback op_PUSHBACK lbl2 373 | Option r -> 374 (* Implement longest match semantics for compatibility with old Str *) 375 (* PUSHBACK lbl 376 <match r> 377 lbl: 378 *) 379 let pos_pushback = emit_hole() in 380 emit_code r; 381 let lbl = !progpos in 382 patch_instr pos_pushback op_PUSHBACK lbl 383 | Group(n, r) -> 384 emit_instr op_BEGGROUP n; 385 emit_code r; 386 emit_instr op_ENDGROUP n; 387 numgroups := max !numgroups (n+1) 388 | Refgroup n -> 389 emit_instr op_REFGROUP n; 390 numgroups := max !numgroups (n+1) 391 | Bol -> 392 emit_instr op_BOL 0 393 | Eol -> 394 emit_instr op_EOL 0 395 | Wordboundary -> 396 emit_instr op_WORDBOUNDARY 0 397 398 and emit_seq_code = function 399 [] -> () 400 | Star(Char _ | CharClass _ as r) :: rl 401 when disjoint_modulo_case (first r) (first_seq rl) -> 402 emit_instr op_SIMPLESTAR (cpool_index (charclass_of_regexp fold_case r)); 403 emit_seq_code rl 404 | Plus(Char _ | CharClass _ as r) :: rl 405 when disjoint_modulo_case (first r) (first_seq rl) -> 406 emit_instr op_SIMPLEPLUS (cpool_index (charclass_of_regexp fold_case r)); 407 emit_seq_code rl 408 | Option(Char _ | CharClass _ as r) :: rl 409 when disjoint_modulo_case (first r) (first_seq rl) -> 410 emit_instr op_SIMPLEOPT (cpool_index (charclass_of_regexp fold_case r)); 411 emit_seq_code rl 412 | r :: rl -> 413 emit_code r; 414 emit_seq_code rl 415 416 and disjoint_modulo_case c1 c2 = 417 if fold_case 418 then Charset.disjoint (Charset.fold_case c1) (Charset.fold_case c2) 419 else Charset.disjoint c1 c2 420 in 421 422 emit_code re; 423 emit_instr op_ACCEPT 0; 424 let start = first re in 425 let start' = if fold_case then Charset.fold_case start else start in 426 let start_pos = 427 if start = Charset.full 428 then -1 429 else cpool_index (Bytes.to_string (Charset.expand start')) in 430 let constantpool = Array.make !cpoolpos "" in 431 StringMap.iter (fun str idx -> constantpool.(idx) <- str) !cpool; 432 { prog = Array.sub !prog 0 !progpos; 433 cpool = constantpool; 434 normtable = if fold_case then fold_case_table else ""; 435 numgroups = !numgroups; 436 numregisters = !numregs; 437 startchars = start_pos } 438 439(** Parsing of a regular expression *) 440 441(* Efficient buffering of sequences *) 442 443module SeqBuffer = struct 444 445 type t = { sb_chars: Buffer.t; mutable sb_next: re_syntax list } 446 447 let create() = { sb_chars = Buffer.create 16; sb_next = [] } 448 449 let flush buf = 450 let s = Buffer.contents buf.sb_chars in 451 Buffer.clear buf.sb_chars; 452 match String.length s with 453 0 -> () 454 | 1 -> buf.sb_next <- Char s.[0] :: buf.sb_next 455 | _ -> buf.sb_next <- String s :: buf.sb_next 456 457 let add buf re = 458 match re with 459 Char c -> Buffer.add_char buf.sb_chars c 460 | _ -> flush buf; buf.sb_next <- re :: buf.sb_next 461 462 let extract buf = 463 flush buf; Seq(List.rev buf.sb_next) 464 465end 466 467(* The character class corresponding to `.' *) 468 469let dotclass = Charset.complement (Charset.singleton '\n') 470 471(* Parse a regular expression *) 472 473let parse s = 474 let len = String.length s in 475 let group_counter = ref 1 in 476 477 let rec regexp0 i = 478 let (r, j) = regexp1 i in 479 regexp0cont r j 480 and regexp0cont r1 i = 481 if i + 2 <= len && s.[i] = '\\' && s.[i+1] = '|' then 482 let (r2, j) = regexp1 (i+2) in 483 regexp0cont (Alt(r1, r2)) j 484 else 485 (r1, i) 486 and regexp1 i = 487 regexp1cont (SeqBuffer.create()) i 488 and regexp1cont sb i = 489 if i >= len 490 || i + 2 <= len && s.[i] = '\\' && (let c = s.[i+1] in c = '|' || c = ')') 491 then 492 (SeqBuffer.extract sb, i) 493 else 494 let (r, j) = regexp2 i in 495 SeqBuffer.add sb r; 496 regexp1cont sb j 497 and regexp2 i = 498 let (r, j) = regexp3 i in 499 regexp2cont r j 500 and regexp2cont r i = 501 if i >= len then (r, i) else 502 match s.[i] with 503 '?' -> regexp2cont (Option r) (i+1) 504 | '*' -> regexp2cont (Star r) (i+1) 505 | '+' -> regexp2cont (Plus r) (i+1) 506 | _ -> (r, i) 507 and regexp3 i = 508 match s.[i] with 509 '\\' -> regexpbackslash (i+1) 510 | '[' -> let (c, compl, j) = regexpclass0 (i+1) in 511 (CharClass(c, compl), j) 512 | '^' -> (Bol, i+1) 513 | '$' -> (Eol, i+1) 514 | '.' -> (CharClass(dotclass, false), i+1) 515 | c -> (Char c, i+1) 516 and regexpbackslash i = 517 if i >= len then (Char '\\', i) else 518 match s.[i] with 519 '|' | ')' -> 520 assert false 521 | '(' -> 522 let group_no = !group_counter in 523 incr group_counter; 524 let (r, j) = regexp0 (i+1) in 525 if j + 1 < len && s.[j] = '\\' && s.[j+1] = ')' then 526 (Group(group_no, r), j + 2) 527 else 528 failwith "\\( group not closed by \\)" 529 | '1' .. '9' as c -> 530 (Refgroup(Char.code c - 48), i + 1) 531 | 'b' -> 532 (Wordboundary, i + 1) 533 | c -> 534 (Char c, i + 1) 535 and regexpclass0 i = 536 if i < len && s.[i] = '^' 537 then let (c, j) = regexpclass1 (i+1) in (c, true, j) 538 else let (c, j) = regexpclass1 i in (c, false, j) 539 and regexpclass1 i = 540 let c = Charset.make_empty() in 541 let j = regexpclass2 c i i in 542 (c, j) 543 and regexpclass2 c start i = 544 if i >= len then failwith "[ class not closed by ]"; 545 if s.[i] = ']' && i > start then i+1 else begin 546 let c1 = s.[i] in 547 if i+2 < len && s.[i+1] = '-' && s.[i+2] <> ']' then begin 548 let c2 = s.[i+2] in 549 Charset.add_range c c1 c2; 550 regexpclass2 c start (i+3) 551 end else begin 552 Charset.add c c1; 553 regexpclass2 c start (i+1) 554 end 555 end in 556 557 let (r, j) = regexp0 0 in 558 if j = len then r else failwith "spurious \\) in regular expression" 559 560(** Parsing and compilation *) 561 562let regexp e = compile false (parse e) 563 564let regexp_case_fold e = compile true (parse e) 565 566let quote s = 567 let len = String.length s in 568 let buf = Bytes.create (2 * len) in 569 let pos = ref 0 in 570 for i = 0 to len - 1 do 571 match s.[i] with 572 '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c -> 573 Bytes.set buf !pos '\\'; 574 Bytes.set buf (!pos + 1) c; 575 pos := !pos + 2 576 | c -> 577 Bytes.set buf !pos c; 578 pos := !pos + 1 579 done; 580 Bytes.sub_string buf 0 !pos 581 582let regexp_string s = compile false (String s) 583 584let regexp_string_case_fold s = compile true (String s) 585 586(** Matching functions **) 587 588external re_string_match: regexp -> string -> int -> int array 589 = "re_string_match" 590external re_partial_match: regexp -> string -> int -> int array 591 = "re_partial_match" 592external re_search_forward: regexp -> string -> int -> int array 593 = "re_search_forward" 594external re_search_backward: regexp -> string -> int -> int array 595 = "re_search_backward" 596 597let last_search_result = ref [||] 598 599let string_match re s pos = 600 let res = re_string_match re s pos in 601 last_search_result := res; 602 Array.length res > 0 603 604let string_partial_match re s pos = 605 let res = re_partial_match re s pos in 606 last_search_result := res; 607 Array.length res > 0 608 609let search_forward re s pos = 610 let res = re_search_forward re s pos in 611 last_search_result := res; 612 if Array.length res = 0 then raise Not_found else res.(0) 613 614let search_backward re s pos = 615 let res = re_search_backward re s pos in 616 last_search_result := res; 617 if Array.length res = 0 then raise Not_found else res.(0) 618 619let group_beginning n = 620 let n2 = n + n in 621 if n < 0 || n2 >= Array.length !last_search_result then 622 invalid_arg "Str.group_beginning" 623 else 624 let pos = !last_search_result.(n2) in 625 if pos = -1 then raise Not_found else pos 626 627let group_end n = 628 let n2 = n + n in 629 if n < 0 || n2 >= Array.length !last_search_result then 630 invalid_arg "Str.group_end" 631 else 632 let pos = !last_search_result.(n2 + 1) in 633 if pos = -1 then raise Not_found else pos 634 635let matched_group n txt = 636 let n2 = n + n in 637 if n < 0 || n2 >= Array.length !last_search_result then 638 invalid_arg "Str.matched_group" 639 else 640 let b = !last_search_result.(n2) 641 and e = !last_search_result.(n2 + 1) in 642 if b = -1 then raise Not_found else String.sub txt b (e - b) 643 644let match_beginning () = group_beginning 0 645and match_end () = group_end 0 646and matched_string txt = matched_group 0 txt 647 648(** Replacement **) 649 650external re_replacement_text: string -> int array -> string -> string 651 = "re_replacement_text" 652 653let replace_matched repl matched = 654 re_replacement_text repl !last_search_result matched 655 656let substitute_first expr repl_fun text = 657 try 658 let pos = search_forward expr text 0 in 659 String.concat "" [string_before text pos; 660 repl_fun text; 661 string_after text (match_end())] 662 with Not_found -> 663 text 664 665let opt_search_forward re s pos = 666 try Some(search_forward re s pos) with Not_found -> None 667 668let global_substitute expr repl_fun text = 669 let rec replace accu start last_was_empty = 670 let startpos = if last_was_empty then start + 1 else start in 671 if startpos > String.length text then 672 string_after text start :: accu 673 else 674 match opt_search_forward expr text startpos with 675 | None -> 676 string_after text start :: accu 677 | Some pos -> 678 let end_pos = match_end() in 679 let repl_text = repl_fun text in 680 replace (repl_text :: String.sub text start (pos-start) :: accu) 681 end_pos (end_pos = pos) 682 in 683 String.concat "" (List.rev (replace [] 0 false)) 684 685let global_replace expr repl text = 686 global_substitute expr (replace_matched repl) text 687and replace_first expr repl text = 688 substitute_first expr (replace_matched repl) text 689 690(** Splitting *) 691 692let opt_search_forward_progress expr text start = 693 match opt_search_forward expr text start with 694 | None -> None 695 | Some pos -> 696 if match_end() > start then 697 Some pos 698 else if start < String.length text then 699 opt_search_forward expr text (start + 1) 700 else None 701 702let bounded_split expr text num = 703 let start = 704 if string_match expr text 0 then match_end() else 0 in 705 let rec split accu start n = 706 if start >= String.length text then accu else 707 if n = 1 then string_after text start :: accu else 708 match opt_search_forward_progress expr text start with 709 | None -> 710 string_after text start :: accu 711 | Some pos -> 712 split (String.sub text start (pos-start) :: accu) 713 (match_end()) (n-1) 714 in 715 List.rev (split [] start num) 716 717let split expr text = bounded_split expr text 0 718 719let bounded_split_delim expr text num = 720 let rec split accu start n = 721 if start > String.length text then accu else 722 if n = 1 then string_after text start :: accu else 723 match opt_search_forward_progress expr text start with 724 | None -> 725 string_after text start :: accu 726 | Some pos -> 727 split (String.sub text start (pos-start) :: accu) 728 (match_end()) (n-1) 729 in 730 if text = "" then [] else List.rev (split [] 0 num) 731 732let split_delim expr text = bounded_split_delim expr text 0 733 734type split_result = Text of string | Delim of string 735 736let bounded_full_split expr text num = 737 let rec split accu start n = 738 if start >= String.length text then accu else 739 if n = 1 then Text(string_after text start) :: accu else 740 match opt_search_forward_progress expr text start with 741 | None -> 742 Text(string_after text start) :: accu 743 | Some pos -> 744 let s = matched_string text in 745 if pos > start then 746 split (Delim(s) :: Text(String.sub text start (pos-start)) :: accu) 747 (match_end()) (n-1) 748 else 749 split (Delim(s) :: accu) 750 (match_end()) (n-1) 751 in 752 List.rev (split [] 0 num) 753 754let full_split expr text = bounded_full_split expr text 0 755