1(**************************************************************************)
2(*                                                                        *)
3(*                                 OCaml                                  *)
4(*                                                                        *)
5(*             Xavier Leroy, projet Cristal, INRIA Rocquencourt           *)
6(*                                                                        *)
7(*   Copyright 1996 Institut National de Recherche en Informatique et     *)
8(*     en Automatique.                                                    *)
9(*                                                                        *)
10(*   All rights reserved.  This file is distributed under the terms of    *)
11(*   the GNU Lesser General Public License version 2.1, with the          *)
12(*   special exception on linking described in the file LICENSE.          *)
13(*                                                                        *)
14(**************************************************************************)
15
16(* In this module, [@ocaml.warning "-3"] is used in several places
17   that use deprecated functions to preserve legacy behavior.
18   It overrides -w @3 given on the command line. *)
19
20(** String utilities *)
21
22let string_before s n = String.sub s 0 n
23
24let string_after s n = String.sub s n (String.length s - n)
25
26let first_chars s n = String.sub s 0 n
27
28let last_chars s n = String.sub s (String.length s - n) n
29
30(** Representation of character sets **)
31
32module Charset =
33  struct
34    type t = bytes (* of length 32 *)
35
36    (*let empty = Bytes.make 32 '\000'*)
37    let full = Bytes.make 32 '\255'
38
39    let make_empty () = Bytes.make 32 '\000'
40
41    let add s c =
42      let i = Char.code c in
43      Bytes.set s (i lsr 3)
44                (Char.chr (Char.code (Bytes.get s (i lsr 3))
45                           lor (1 lsl (i land 7))))
46
47    let add_range s c1 c2 =
48      for i = Char.code c1 to Char.code c2 do add s (Char.chr i) done
49
50    let singleton c =
51      let s = make_empty () in add s c; s
52
53    (*let range c1 c2 =
54      let s = make_empty () in add_range s c1 c2; s
55    *)
56    let complement s =
57      let r = Bytes.create 32 in
58      for i = 0 to 31 do
59        Bytes.set r i (Char.chr(Char.code (Bytes.get s i) lxor 0xFF))
60      done;
61      r
62
63    let union s1 s2 =
64      let r = Bytes.create 32 in
65      for i = 0 to 31 do
66        Bytes.set r i (Char.chr(Char.code (Bytes.get s1 i)
67                       lor Char.code (Bytes.get s2 i)))
68      done;
69      r
70
71    let disjoint s1 s2 =
72      try
73        for i = 0 to 31 do
74          if Char.code (Bytes.get s1 i) land Char.code (Bytes.get s2 i)
75             <> 0
76          then raise Exit
77        done;
78        true
79      with Exit ->
80        false
81
82    let iter fn s =
83      for i = 0 to 31 do
84        let c = Char.code (Bytes.get s i) in
85        if c <> 0 then
86          for j = 0 to 7 do
87            if c land (1 lsl j) <> 0 then fn (Char.chr ((i lsl 3) + j))
88          done
89      done
90
91    let expand s =
92      let r = Bytes.make 256 '\000' in
93      iter (fun c -> Bytes.set r (Char.code c) '\001') s;
94      r
95
96    let fold_case s =
97      (let r = make_empty() in
98       iter (fun c -> add r (Char.lowercase c); add r (Char.uppercase c)) s;
99       r)[@ocaml.warning "-3"]
100
101  end
102
103(** Abstract syntax tree for regular expressions *)
104
105type re_syntax =
106    Char of char
107  | String of string
108  | CharClass of Charset.t * bool  (* true = complemented, false = normal *)
109  | Seq of re_syntax list
110  | Alt of re_syntax * re_syntax
111  | Star of re_syntax
112  | Plus of re_syntax
113  | Option of re_syntax
114  | Group of int * re_syntax
115  | Refgroup of int
116  | Bol
117  | Eol
118  | Wordboundary
119
120(** Representation of compiled regular expressions *)
121
122type regexp = {
123  prog: int array;         (* bytecode instructions *)
124  cpool: string array;     (* constant pool (string literals) *)
125  normtable: string;       (* case folding table (if any) *)
126  numgroups: int;          (* number of \(...\) groups *)
127  numregisters: int;       (* number of nullable Star or Plus *)
128  startchars: int          (* index of set of starting chars, or -1 if none *)
129}
130
131(** Opcodes for bytecode instructions; see strstubs.c for description *)
132
133let op_CHAR = 0
134let op_CHARNORM = 1
135let op_STRING = 2
136let op_STRINGNORM = 3
137let op_CHARCLASS = 4
138let op_BOL = 5
139let op_EOL = 6
140let op_WORDBOUNDARY = 7
141let op_BEGGROUP = 8
142let op_ENDGROUP = 9
143let op_REFGROUP = 10
144let op_ACCEPT = 11
145let op_SIMPLEOPT = 12
146let op_SIMPLESTAR = 13
147let op_SIMPLEPLUS = 14
148let op_GOTO = 15
149let op_PUSHBACK = 16
150let op_SETMARK = 17
151let op_CHECKPROGRESS = 18
152
153(* Encoding of bytecode instructions *)
154
155let instr opc arg = opc lor (arg lsl 8)
156
157(* Computing relative displacements for GOTO and PUSHBACK instructions *)
158
159let displ dest from = dest - from - 1
160
161(** Compilation of a regular expression *)
162
163(* Determine if a regexp can match the empty string *)
164
165let rec is_nullable = function
166    Char _ -> false
167  | String s -> s = ""
168  | CharClass _ -> false
169  | Seq rl -> List.for_all is_nullable rl
170  | Alt (r1, r2) -> is_nullable r1 || is_nullable r2
171  | Star _ -> true
172  | Plus r -> is_nullable r
173  | Option _ -> true
174  | Group(_, r) -> is_nullable r
175  | Refgroup _ -> true
176  | Bol -> true
177  | Eol -> true
178  | Wordboundary -> true
179
180(* first r returns a set of characters C such that:
181     for all string s, s matches r => the first character of s is in C.
182   For convenience, return Charset.full if r is nullable. *)
183
184let rec first = function
185    Char c -> Charset.singleton c
186  | String s -> if s = "" then Charset.full else Charset.singleton s.[0]
187  | CharClass(cl, cmpl) -> if cmpl then Charset.complement cl else cl
188  | Seq rl -> first_seq rl
189  | Alt (r1, r2) -> Charset.union (first r1) (first r2)
190  | Star _ -> Charset.full
191  | Plus r -> first r
192  | Option _ -> Charset.full
193  | Group(_, r) -> first r
194  | Refgroup _ -> Charset.full
195  | Bol -> Charset.full
196  | Eol -> Charset.full
197  | Wordboundary -> Charset.full
198
199and first_seq = function
200    [] -> Charset.full
201  | (Bol | Eol | Wordboundary) :: rl -> first_seq rl
202  | Star r :: rl -> Charset.union (first r) (first_seq rl)
203  | Option r :: rl -> Charset.union (first r) (first_seq rl)
204  | r :: _ -> first r
205
206(* Transform a Char or CharClass regexp into a character class *)
207
208let charclass_of_regexp fold_case re =
209  let (cl1, compl) =
210    match re with
211    | Char c -> (Charset.singleton c, false)
212    | CharClass(cl, compl) -> (cl, compl)
213    | _ -> assert false in
214  let cl2 = if fold_case then Charset.fold_case cl1 else cl1 in
215  Bytes.to_string (if compl then Charset.complement cl2 else cl2)
216
217(* The case fold table: maps characters to their lowercase equivalent *)
218
219let fold_case_table =
220  (let t = Bytes.create 256 in
221   for i = 0 to 255 do Bytes.set t i (Char.lowercase(Char.chr i)) done;
222   Bytes.to_string t)[@ocaml.warning "-3"]
223
224module StringMap =
225  Map.Make(struct type t = string let compare (x:t) y = compare x y end)
226
227(* Compilation of a regular expression *)
228
229let compile fold_case re =
230
231  (* Instruction buffering *)
232  let prog = ref (Array.make 32 0)
233  and progpos = ref 0
234  and cpool = ref StringMap.empty
235  and cpoolpos = ref 0
236  and numgroups = ref 1
237  and numregs = ref 0 in
238  (* Add a new instruction *)
239  let emit_instr opc arg =
240    if !progpos >= Array.length !prog then begin
241      let newlen = ref (Array.length !prog) in
242      while !progpos >= !newlen do newlen := !newlen * 2 done;
243      let nprog = Array.make !newlen 0 in
244      Array.blit !prog 0 nprog 0 (Array.length !prog);
245      prog := nprog
246    end;
247    (!prog).(!progpos) <- (instr opc arg);
248    incr progpos in
249  (* Reserve an instruction slot and return its position *)
250  let emit_hole () =
251    let p = !progpos in emit_instr op_CHAR 0; p in
252  (* Fill a reserved instruction slot with a GOTO or PUSHBACK instruction *)
253  let patch_instr pos opc dest =
254    (!prog).(pos) <- (instr opc (displ dest pos)) in
255  (* Return the cpool index for the given string, adding it if not
256     already there *)
257  let cpool_index s =
258    try
259      StringMap.find s !cpool
260    with Not_found ->
261      let p = !cpoolpos in
262      cpool := StringMap.add s p !cpool;
263      incr cpoolpos;
264      p in
265  (* Allocate fresh register if regexp is nullable *)
266  let allocate_register_if_nullable r =
267    if is_nullable r then begin
268      let n = !numregs in
269      if n >= 64 then failwith "too many r* or r+ where r is nullable";
270      incr numregs;
271      n
272    end else
273      -1 in
274  (* Main recursive compilation function *)
275  let rec emit_code = function
276    Char c ->
277      if fold_case then
278        emit_instr op_CHARNORM (Char.code (Char.lowercase c))
279          [@ocaml.warning "-3"]
280      else
281        emit_instr op_CHAR (Char.code c)
282  | String s ->
283      begin match String.length s with
284        0 -> ()
285      | 1 ->
286        if fold_case then
287          emit_instr op_CHARNORM (Char.code (Char.lowercase s.[0]))
288            [@ocaml.warning "-3"]
289        else
290          emit_instr op_CHAR (Char.code s.[0])
291      | _ ->
292        try
293          (* null characters are not accepted by the STRING* instructions;
294             if one is found, split string at null character *)
295          let i = String.index s '\000' in
296          emit_code (String (string_before s i));
297          emit_instr op_CHAR 0;
298          emit_code (String (string_after s (i+1)))
299        with Not_found ->
300          if fold_case then
301            emit_instr op_STRINGNORM (cpool_index (String.lowercase s))
302              [@ocaml.warning "-3"]
303          else
304            emit_instr op_STRING (cpool_index s)
305      end
306  | CharClass(cl, compl) ->
307      let cl1 = if fold_case then Charset.fold_case cl else cl in
308      let cl2 = if compl then Charset.complement cl1 else cl1 in
309      emit_instr op_CHARCLASS (cpool_index (Bytes.to_string cl2))
310  | Seq rl ->
311      emit_seq_code rl
312  | Alt(r1, r2) ->
313      (*      PUSHBACK lbl1
314              <match r1>
315              GOTO lbl2
316        lbl1: <match r2>
317        lbl2: ... *)
318      let pos_pushback = emit_hole() in
319      emit_code r1;
320      let pos_goto_end = emit_hole() in
321      let lbl1 = !progpos in
322      emit_code r2;
323      let lbl2 = !progpos in
324      patch_instr pos_pushback op_PUSHBACK lbl1;
325      patch_instr pos_goto_end op_GOTO lbl2
326  | Star r ->
327      (* Implement longest match semantics for compatibility with old Str *)
328      (* General translation:
329           lbl1: PUSHBACK lbl2
330                 SETMARK regno
331                 <match r>
332                 CHECKPROGRESS regno
333                 GOTO lbl1
334           lbl2:
335         If r cannot match the empty string, code can be simplified:
336           lbl1: PUSHBACK lbl2
337                 <match r>
338                 GOTO lbl1
339           lbl2:
340        *)
341      let regno = allocate_register_if_nullable r in
342      let lbl1 = emit_hole() in
343      if regno >= 0 then emit_instr op_SETMARK regno;
344      emit_code r;
345      if regno >= 0 then emit_instr op_CHECKPROGRESS regno;
346      emit_instr op_GOTO (displ lbl1 !progpos);
347      let lbl2 = !progpos in
348      patch_instr lbl1 op_PUSHBACK lbl2
349  | Plus r ->
350      (* Implement longest match semantics for compatibility with old Str *)
351      (* General translation:
352           lbl1: <match r>
353                 CHECKPROGRESS regno
354                 PUSHBACK lbl2
355                 SETMARK regno
356                 GOTO lbl1
357           lbl2:
358         If r cannot match the empty string, code can be simplified:
359           lbl1: <match r>
360                 PUSHBACK lbl2
361                 GOTO_PLUS lbl1
362           lbl2:
363      *)
364      let regno = allocate_register_if_nullable r in
365      let lbl1 = !progpos in
366      emit_code r;
367      if regno >= 0 then emit_instr op_CHECKPROGRESS regno;
368      let pos_pushback = emit_hole() in
369      if regno >= 0 then emit_instr op_SETMARK regno;
370      emit_instr op_GOTO (displ lbl1 !progpos);
371      let lbl2 = !progpos in
372      patch_instr pos_pushback op_PUSHBACK lbl2
373  | Option r ->
374      (* Implement longest match semantics for compatibility with old Str *)
375      (*      PUSHBACK lbl
376              <match r>
377         lbl:
378      *)
379      let pos_pushback = emit_hole() in
380      emit_code r;
381      let lbl = !progpos in
382      patch_instr pos_pushback op_PUSHBACK lbl
383  | Group(n, r) ->
384      emit_instr op_BEGGROUP n;
385      emit_code r;
386      emit_instr op_ENDGROUP n;
387      numgroups := max !numgroups (n+1)
388  | Refgroup n ->
389      emit_instr op_REFGROUP n;
390      numgroups := max !numgroups (n+1)
391  | Bol ->
392      emit_instr op_BOL 0
393  | Eol ->
394      emit_instr op_EOL 0
395  | Wordboundary ->
396      emit_instr op_WORDBOUNDARY 0
397
398  and emit_seq_code = function
399    [] -> ()
400  | Star(Char _ | CharClass _ as r) :: rl
401    when disjoint_modulo_case (first r) (first_seq rl) ->
402      emit_instr op_SIMPLESTAR (cpool_index (charclass_of_regexp fold_case r));
403      emit_seq_code rl
404  | Plus(Char _ | CharClass _ as r) :: rl
405    when disjoint_modulo_case (first r) (first_seq rl) ->
406      emit_instr op_SIMPLEPLUS (cpool_index (charclass_of_regexp fold_case r));
407      emit_seq_code rl
408  | Option(Char _ | CharClass _ as r) :: rl
409    when disjoint_modulo_case (first r) (first_seq rl) ->
410      emit_instr op_SIMPLEOPT (cpool_index (charclass_of_regexp fold_case r));
411      emit_seq_code rl
412  | r :: rl ->
413      emit_code r;
414      emit_seq_code rl
415
416  and disjoint_modulo_case c1 c2 =
417    if fold_case
418    then Charset.disjoint (Charset.fold_case c1) (Charset.fold_case c2)
419    else Charset.disjoint c1 c2
420  in
421
422  emit_code re;
423  emit_instr op_ACCEPT 0;
424  let start = first re in
425  let start' = if fold_case then Charset.fold_case start else start in
426  let start_pos =
427    if start = Charset.full
428    then -1
429    else cpool_index (Bytes.to_string (Charset.expand start')) in
430  let constantpool = Array.make !cpoolpos "" in
431  StringMap.iter (fun str idx -> constantpool.(idx) <- str) !cpool;
432  { prog = Array.sub !prog 0 !progpos;
433    cpool = constantpool;
434    normtable = if fold_case then fold_case_table else "";
435    numgroups = !numgroups;
436    numregisters = !numregs;
437    startchars = start_pos }
438
439(** Parsing of a regular expression *)
440
441(* Efficient buffering of sequences *)
442
443module SeqBuffer = struct
444
445  type t = { sb_chars: Buffer.t; mutable sb_next: re_syntax list }
446
447  let create() = { sb_chars = Buffer.create 16; sb_next = [] }
448
449  let flush buf =
450    let s = Buffer.contents buf.sb_chars in
451    Buffer.clear buf.sb_chars;
452    match String.length s with
453      0 -> ()
454    | 1 -> buf.sb_next <- Char s.[0] :: buf.sb_next
455    | _ -> buf.sb_next <- String s :: buf.sb_next
456
457  let add buf re =
458    match re with
459      Char c -> Buffer.add_char buf.sb_chars c
460    | _ -> flush buf; buf.sb_next <- re :: buf.sb_next
461
462  let extract buf =
463    flush buf; Seq(List.rev buf.sb_next)
464
465end
466
467(* The character class corresponding to `.' *)
468
469let dotclass = Charset.complement (Charset.singleton '\n')
470
471(* Parse a regular expression *)
472
473let parse s =
474  let len = String.length s in
475  let group_counter = ref 1 in
476
477  let rec regexp0 i =
478    let (r, j) = regexp1 i in
479    regexp0cont r j
480  and regexp0cont r1 i =
481    if i + 2 <= len && s.[i] = '\\' && s.[i+1] = '|' then
482      let (r2, j) = regexp1 (i+2) in
483      regexp0cont (Alt(r1, r2)) j
484    else
485      (r1, i)
486  and regexp1 i =
487    regexp1cont (SeqBuffer.create()) i
488  and regexp1cont sb i =
489    if i >= len
490    || i + 2 <= len && s.[i] = '\\' && (let c = s.[i+1] in c = '|' || c = ')')
491    then
492      (SeqBuffer.extract sb, i)
493    else
494      let (r, j) = regexp2 i in
495      SeqBuffer.add sb r;
496      regexp1cont sb j
497  and regexp2 i =
498    let (r, j) = regexp3 i in
499    regexp2cont r j
500  and regexp2cont r i =
501    if i >= len then (r, i) else
502      match s.[i] with
503        '?' -> regexp2cont (Option r) (i+1)
504      | '*' -> regexp2cont (Star r) (i+1)
505      | '+' -> regexp2cont (Plus r) (i+1)
506      |  _  -> (r, i)
507  and regexp3 i =
508    match s.[i] with
509      '\\' -> regexpbackslash (i+1)
510    | '['  -> let (c, compl, j) = regexpclass0 (i+1) in
511              (CharClass(c, compl), j)
512    | '^'  -> (Bol, i+1)
513    | '$'  -> (Eol, i+1)
514    | '.'  -> (CharClass(dotclass, false), i+1)
515    | c    -> (Char c, i+1)
516  and regexpbackslash i =
517    if i >= len then (Char '\\', i) else
518      match s.[i] with
519        '|' | ')' ->
520          assert false
521      | '(' ->
522          let group_no = !group_counter in
523          incr group_counter;
524          let (r, j) = regexp0 (i+1) in
525          if j + 1 < len && s.[j] = '\\' && s.[j+1] = ')' then
526            (Group(group_no, r), j + 2)
527          else
528            failwith "\\( group not closed by \\)"
529      | '1' .. '9' as c ->
530          (Refgroup(Char.code c - 48), i + 1)
531      | 'b' ->
532          (Wordboundary, i + 1)
533      | c ->
534          (Char c, i + 1)
535  and regexpclass0 i =
536    if i < len && s.[i] = '^'
537    then let (c, j) = regexpclass1 (i+1) in (c, true, j)
538    else let (c, j) = regexpclass1 i in (c, false, j)
539  and regexpclass1 i =
540    let c = Charset.make_empty() in
541    let j = regexpclass2 c i i in
542    (c, j)
543  and regexpclass2 c start i =
544    if i >= len then failwith "[ class not closed by ]";
545    if s.[i] = ']' && i > start then i+1 else begin
546      let c1 = s.[i] in
547      if i+2 < len && s.[i+1] = '-' && s.[i+2] <> ']' then begin
548        let c2 = s.[i+2] in
549        Charset.add_range c c1 c2;
550        regexpclass2 c start (i+3)
551      end else begin
552        Charset.add c c1;
553        regexpclass2 c start (i+1)
554      end
555    end in
556
557  let (r, j) = regexp0 0 in
558  if j = len then r else failwith "spurious \\) in regular expression"
559
560(** Parsing and compilation *)
561
562let regexp e = compile false (parse e)
563
564let regexp_case_fold e = compile true (parse e)
565
566let quote s =
567  let len = String.length s in
568  let buf = Bytes.create (2 * len) in
569  let pos = ref 0 in
570  for i = 0 to len - 1 do
571    match s.[i] with
572      '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c ->
573        Bytes.set buf !pos '\\';
574        Bytes.set buf (!pos + 1) c;
575        pos := !pos + 2
576    | c ->
577        Bytes.set buf !pos c;
578        pos := !pos + 1
579  done;
580  Bytes.sub_string buf 0 !pos
581
582let regexp_string s = compile false (String s)
583
584let regexp_string_case_fold s = compile true (String s)
585
586(** Matching functions **)
587
588external re_string_match: regexp -> string -> int -> int array
589     = "re_string_match"
590external re_partial_match: regexp -> string -> int -> int array
591     = "re_partial_match"
592external re_search_forward: regexp -> string -> int -> int array
593     = "re_search_forward"
594external re_search_backward: regexp -> string -> int -> int array
595     = "re_search_backward"
596
597let last_search_result = ref [||]
598
599let string_match re s pos =
600  let res = re_string_match re s pos in
601  last_search_result := res;
602  Array.length res > 0
603
604let string_partial_match re s pos =
605  let res = re_partial_match re s pos in
606  last_search_result := res;
607  Array.length res > 0
608
609let search_forward re s pos =
610  let res = re_search_forward re s pos in
611  last_search_result := res;
612  if Array.length res = 0 then raise Not_found else res.(0)
613
614let search_backward re s pos =
615  let res = re_search_backward re s pos in
616  last_search_result := res;
617  if Array.length res = 0 then raise Not_found else res.(0)
618
619let group_beginning n =
620  let n2 = n + n in
621  if n < 0 || n2 >= Array.length !last_search_result then
622    invalid_arg "Str.group_beginning"
623  else
624    let pos = !last_search_result.(n2) in
625    if pos = -1 then raise Not_found else pos
626
627let group_end n =
628  let n2 = n + n in
629  if n < 0 || n2 >= Array.length !last_search_result then
630    invalid_arg "Str.group_end"
631  else
632    let pos = !last_search_result.(n2 + 1) in
633    if pos = -1 then raise Not_found else pos
634
635let matched_group n txt =
636  let n2 = n + n in
637  if n < 0 || n2 >= Array.length !last_search_result then
638    invalid_arg "Str.matched_group"
639  else
640    let b = !last_search_result.(n2)
641    and e = !last_search_result.(n2 + 1) in
642    if b = -1 then raise Not_found else String.sub txt b (e - b)
643
644let match_beginning () = group_beginning 0
645and match_end () = group_end 0
646and matched_string txt = matched_group 0 txt
647
648(** Replacement **)
649
650external re_replacement_text: string -> int array -> string -> string
651    = "re_replacement_text"
652
653let replace_matched repl matched =
654  re_replacement_text repl !last_search_result matched
655
656let substitute_first expr repl_fun text =
657  try
658    let pos = search_forward expr text 0 in
659    String.concat "" [string_before text pos;
660                      repl_fun text;
661                      string_after text (match_end())]
662  with Not_found ->
663    text
664
665let opt_search_forward re s pos =
666  try Some(search_forward re s pos) with Not_found -> None
667
668let global_substitute expr repl_fun text =
669  let rec replace accu start last_was_empty =
670    let startpos = if last_was_empty then start + 1 else start in
671    if startpos > String.length text then
672      string_after text start :: accu
673    else
674      match opt_search_forward expr text startpos with
675      | None ->
676          string_after text start :: accu
677      | Some pos ->
678          let end_pos = match_end() in
679          let repl_text = repl_fun text in
680          replace (repl_text :: String.sub text start (pos-start) :: accu)
681                  end_pos (end_pos = pos)
682  in
683    String.concat "" (List.rev (replace [] 0 false))
684
685let global_replace expr repl text =
686  global_substitute expr (replace_matched repl) text
687and replace_first expr repl text =
688  substitute_first expr (replace_matched repl) text
689
690(** Splitting *)
691
692let opt_search_forward_progress expr text start =
693  match opt_search_forward expr text start with
694  | None -> None
695  | Some pos ->
696      if match_end() > start then
697        Some pos
698      else if start < String.length text then
699        opt_search_forward expr text (start + 1)
700      else None
701
702let bounded_split expr text num =
703  let start =
704    if string_match expr text 0 then match_end() else 0 in
705  let rec split accu start n =
706    if start >= String.length text then accu else
707    if n = 1 then string_after text start :: accu else
708      match opt_search_forward_progress expr text start with
709      | None ->
710          string_after text start :: accu
711      | Some pos ->
712          split (String.sub text start (pos-start) :: accu)
713                (match_end()) (n-1)
714  in
715    List.rev (split [] start num)
716
717let split expr text = bounded_split expr text 0
718
719let bounded_split_delim expr text num =
720  let rec split accu start n =
721    if start > String.length text then accu else
722    if n = 1 then string_after text start :: accu else
723      match opt_search_forward_progress expr text start with
724      | None ->
725          string_after text start :: accu
726      | Some pos ->
727          split (String.sub text start (pos-start) :: accu)
728                (match_end()) (n-1)
729  in
730    if text = "" then [] else List.rev (split [] 0 num)
731
732let split_delim expr text = bounded_split_delim expr text 0
733
734type split_result = Text of string | Delim of string
735
736let bounded_full_split expr text num =
737  let rec split accu start n =
738    if start >= String.length text then accu else
739    if n = 1 then Text(string_after text start) :: accu else
740      match opt_search_forward_progress expr text start with
741      | None ->
742          Text(string_after text start) :: accu
743      | Some pos ->
744          let s = matched_string text in
745          if pos > start then
746            split (Delim(s) :: Text(String.sub text start (pos-start)) :: accu)
747                  (match_end()) (n-1)
748          else
749            split (Delim(s) :: accu)
750                  (match_end()) (n-1)
751  in
752    List.rev (split [] 0 num)
753
754let full_split expr text = bounded_full_split expr text 0
755