1(* mosml/src/dynlibs/mregex/Regex.sml
2   sestoft@dina.kvl.dk -- 1998-12-25, 1999-01-02
3 *)
4
5open List Dynlib;
6
7prim_type regex;	     (* An abstract finalized object, see mregex.c *)
8
9exception Regex of string;
10
11fun error fcn msg = raise Regex (fcn ^ ": " ^ msg)
12
13(* Obtain a handle pointing to the library defining the C functions: *)
14
15val dlh = dlopen { lib = "libmregex.so",
16		   flag = RTLD_NOW,
17		   global = false }
18
19val (REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NOTBOL, REG_NOTEOL)
20    : word * word * word * word * word
21    = app1 (dlsym dlh "mregex_getflags") ()
22
23datatype cflag = Extended | Icase | Newline
24
25fun cflagval Extended = REG_EXTENDED
26  | cflagval Icase    = REG_ICASE
27  | cflagval Newline  = REG_NEWLINE
28
29val cflagsval = foldl (fn (f, res) => Word.orb(cflagval f, res)) 0w0
30
31datatype eflag = Notbol | Noteol
32
33fun eflagval Notbol = REG_NOTBOL
34  | eflagval Noteol = REG_NOTEOL
35
36val eflagsval = foldl (fn (f, res) => Word.orb(eflagval f, res)) 0w0
37
38val regcomp_ : string -> word -> regex
39    = app2 (dlsym dlh "mregex_regcomp")
40
41fun regcomp pat cflags =
42    (regcomp_ pat (cflagsval cflags))
43    handle Fail msg => error "regcomp" msg
44
45val regexec_sus_ : regex -> word -> substring -> substring vector option
46    = app3 (dlsym dlh "mregex_regexec_sus")
47
48fun regexec regex eflags tgt =
49    (regexec_sus_ regex (eflagsval eflags) (Substring.all tgt))
50    handle Fail msg => error "regexec" msg
51
52fun regnexec regex eflags sus =
53    (regexec_sus_ regex (eflagsval eflags) sus)
54    handle Fail msg => error "regnexec" msg
55
56val regexec_bool_ : regex -> word -> substring -> bool
57    = app3 (dlsym dlh "mregex_regexec_bool")
58
59fun regexecBool regex eflags tgt =
60    (regexec_bool_ regex (eflagsval eflags) (Substring.all tgt))
61    handle Fail msg => error "regexecBool" msg
62
63fun regnexecBool regex eflags sus =
64    (regexec_bool_ regex (eflagsval eflags) sus)
65    handle Fail msg => error "regnexecBool" msg
66
67val regmatch_sus_ : string -> word -> word -> substring
68                    -> substring vector option
69    = app4 (dlsym dlh "mregex_regmatch_sus")
70
71fun regmatch { pat : string, tgt : string } cflags eflags =
72    (regmatch_sus_ pat (cflagsval cflags) (eflagsval eflags)
73                   (Substring.all tgt))
74    handle Fail msg => error "regmatch" msg
75
76val regmatch_bool_ : string -> word -> word -> substring -> bool
77    = app4 (dlsym dlh "mregex_regmatch_bool")
78
79fun regmatchBool { pat : string, tgt : string } cflags eflags =
80    (regmatch_bool_ pat (cflagsval cflags) (eflagsval eflags)
81                    (Substring.all tgt))
82    handle Fail msg => error "regmatchBool" msg
83
84
85(* Return the substring to the left of the given substring *)
86
87(* Precondition: s2 is a substring of s1, that is,
88   s1 = s2 and i1<=i2 and i2+n2<=i1+n1
89*)
90
91fun leftsus (sus1 : substring) (sus2 : Substring.substring) =
92    let val (s1, i1, n1) = Substring.base sus1
93	val (s2, i2, n2) = Substring.base sus2
94    in
95	if s1 = s2 andalso i1<=i2 andalso i2+n2<=i1+n1 then
96	    Substring.substring(s1, i1, i2-i1)
97	else
98	    raise Subscript
99    end
100
101(* Faster, non-checking version *)
102
103fun leftsus_ (sus1 : substring) (sus2 : Substring.substring) =
104    let val (s1, i1, _) = Substring.base sus1
105	val (_,  i2, _) = Substring.base sus2
106    in
107	Substring.substring(s1, i1, i2-i1)
108    end
109
110(* Return the substring to the right of the given substring *)
111
112(* Precondition: sus is a substring of s, that is, s = s' and sus is valid *)
113
114fun right (s : string) (sus : Substring.substring) =
115    let val (s', i, n) = Substring.base sus
116    in
117	if s = s' then Substring.extract(s', i+n, NONE)
118	else raise Subscript
119    end
120
121datatype replacer =
122    Str of string
123  | Sus of int
124  | Tr  of (string -> string) * int
125  | Trs of substring vector -> string
126
127fun applyreplacer suss replacer res =
128    let open Substring
129	fun h []                  res = res
130	  | h (Str s :: rest)     res = h rest (all s :: res)
131	  | h (Sus i :: rest)     res = h rest (Vector.sub(suss, i) :: res)
132	  | h (Tr (f,i)  :: rest) res =
133	    h rest (all (f (string (Vector.sub(suss, i)))) :: res)
134	  | h (Trs f :: rest) res     = h rest (all (f suss) :: res)
135    in h replacer res end
136
137fun replace1 regex replacer s =
138    let open Substring
139	val sus = all s
140    in
141	case regexec_sus_ regex 0w0 sus of
142	    NONE      => s
143	  | SOME suss =>
144		let val match  = Vector.sub(suss, 0)
145		    val left'  = leftsus sus match
146		    val right' = right s match
147		    val repl'  = applyreplacer suss replacer []
148		in Substring.concat(left' :: rev (right' :: repl')) end
149    end
150
151fun replace_aux regex fcn replacer s =
152    let open Substring
153	fun h sus revres =
154	    case regexec_sus_ regex 0w0 sus of
155		NONE      => Substring.concat (List.rev (sus :: revres))
156	      | SOME suss =>
157		    let val match   = Vector.sub(suss, 0)
158			val field1  = leftsus sus match
159			val revres1 = applyreplacer suss replacer
160			              (field1 :: revres)
161		    in
162			(* Check that we make progress *)
163			if isEmpty field1 andalso isEmpty match then
164			    error fcn "no progress"
165			else
166			    h (right s match) revres1
167		    end
168    in h (all s) [] end
169
170fun replace regex replacer s =
171    replace_aux regex "replace" replacer s
172
173fun substitute1 regex tr s = replace1 regex [Tr (tr, 0)] s
174
175fun substitute regex tr s =
176    replace_aux regex "substitute" [Tr (tr, 0)] s
177
178fun split regex fcn add s =
179    let open Substring
180	val eflags = Word.orb(REG_NOTBOL, REG_NOTEOL)
181	fun h sus revres =
182	    case regexec_sus_ regex eflags sus of
183		NONE      => List.rev (add sus revres)
184	      | SOME suss =>
185		    let val match   = Vector.sub(suss, 0)
186			val field1  = leftsus sus match
187			val revres1 = add field1 revres
188		    in
189			(* Check that we make progress *)
190			if isEmpty field1 andalso isEmpty match then
191			    error fcn "no progress"
192			else
193			    h (right s match) revres1
194		    end
195    in h (all s) [] end
196
197fun addfield sus res =
198    sus :: res
199
200fun fields regex s = split regex "fields" addfield s
201
202fun addtoken sus res =
203    if Substring.isEmpty sus then res else sus :: res
204
205fun tokens regex s = split regex "tokens" addtoken s
206
207fun fold regex (fa, fb) e s =
208    let open Substring
209	fun h sus res =
210	    case regexec_sus_ regex 0w0 sus of
211		NONE      => fa(sus, res)
212	      | SOME suss =>
213		    let val match   = Vector.sub(suss, 0)
214			val field1  = leftsus sus match
215			val res1    = fb (suss, fa(field1, res))
216		    in
217			(* Check that we make progress *)
218			if isEmpty field1 andalso isEmpty match then
219			    error "fold" "no progress"
220			else
221			    h (right s match) res1
222		    end
223    in h (all s) e end
224
225fun map regex f s =
226    let open Substring
227	fun h sus revres =
228	    case regexec_sus_ regex 0w0 sus of
229		NONE      => List.rev revres
230	      | SOME suss =>
231		    let val match   = Vector.sub(suss, 0)
232			val field1  = leftsus sus match
233			val revres1 = f suss :: revres
234		    in
235			(* Check that we make progress *)
236			if isEmpty field1 andalso isEmpty match then
237			    error "map" "no progress"
238			else
239			    h (right s match) revres1
240		    end
241    in h (all s) [] end
242
243fun app regex f s =
244    let open Substring
245	fun h sus =
246	    case regexec_sus_ regex 0w0 sus of
247		NONE      => ()
248	      | SOME suss =>
249		    let val match   = Vector.sub(suss, 0)
250			val field1  = leftsus sus match
251			val revres1 = f suss
252		    in
253			(* Check that we make progress *)
254			if isEmpty field1 andalso isEmpty match then
255			    error "app" "no progress"
256			else
257			    h (right s match)
258		    end
259    in h (all s) end
260
261
262