1(* mosml/src/dynlibs/mregex/Regex.sml 2 sestoft@dina.kvl.dk -- 1998-12-25, 1999-01-02 3 *) 4 5open List Dynlib; 6 7prim_type regex; (* An abstract finalized object, see mregex.c *) 8 9exception Regex of string; 10 11fun error fcn msg = raise Regex (fcn ^ ": " ^ msg) 12 13(* Obtain a handle pointing to the library defining the C functions: *) 14 15val dlh = dlopen { lib = "libmregex.so", 16 flag = RTLD_NOW, 17 global = false } 18 19val (REG_EXTENDED, REG_ICASE, REG_NEWLINE, REG_NOTBOL, REG_NOTEOL) 20 : word * word * word * word * word 21 = app1 (dlsym dlh "mregex_getflags") () 22 23datatype cflag = Extended | Icase | Newline 24 25fun cflagval Extended = REG_EXTENDED 26 | cflagval Icase = REG_ICASE 27 | cflagval Newline = REG_NEWLINE 28 29val cflagsval = foldl (fn (f, res) => Word.orb(cflagval f, res)) 0w0 30 31datatype eflag = Notbol | Noteol 32 33fun eflagval Notbol = REG_NOTBOL 34 | eflagval Noteol = REG_NOTEOL 35 36val eflagsval = foldl (fn (f, res) => Word.orb(eflagval f, res)) 0w0 37 38val regcomp_ : string -> word -> regex 39 = app2 (dlsym dlh "mregex_regcomp") 40 41fun regcomp pat cflags = 42 (regcomp_ pat (cflagsval cflags)) 43 handle Fail msg => error "regcomp" msg 44 45val regexec_sus_ : regex -> word -> substring -> substring vector option 46 = app3 (dlsym dlh "mregex_regexec_sus") 47 48fun regexec regex eflags tgt = 49 (regexec_sus_ regex (eflagsval eflags) (Substring.all tgt)) 50 handle Fail msg => error "regexec" msg 51 52fun regnexec regex eflags sus = 53 (regexec_sus_ regex (eflagsval eflags) sus) 54 handle Fail msg => error "regnexec" msg 55 56val regexec_bool_ : regex -> word -> substring -> bool 57 = app3 (dlsym dlh "mregex_regexec_bool") 58 59fun regexecBool regex eflags tgt = 60 (regexec_bool_ regex (eflagsval eflags) (Substring.all tgt)) 61 handle Fail msg => error "regexecBool" msg 62 63fun regnexecBool regex eflags sus = 64 (regexec_bool_ regex (eflagsval eflags) sus) 65 handle Fail msg => error "regnexecBool" msg 66 67val regmatch_sus_ : string -> word -> word -> substring 68 -> substring vector option 69 = app4 (dlsym dlh "mregex_regmatch_sus") 70 71fun regmatch { pat : string, tgt : string } cflags eflags = 72 (regmatch_sus_ pat (cflagsval cflags) (eflagsval eflags) 73 (Substring.all tgt)) 74 handle Fail msg => error "regmatch" msg 75 76val regmatch_bool_ : string -> word -> word -> substring -> bool 77 = app4 (dlsym dlh "mregex_regmatch_bool") 78 79fun regmatchBool { pat : string, tgt : string } cflags eflags = 80 (regmatch_bool_ pat (cflagsval cflags) (eflagsval eflags) 81 (Substring.all tgt)) 82 handle Fail msg => error "regmatchBool" msg 83 84 85(* Return the substring to the left of the given substring *) 86 87(* Precondition: s2 is a substring of s1, that is, 88 s1 = s2 and i1<=i2 and i2+n2<=i1+n1 89*) 90 91fun leftsus (sus1 : substring) (sus2 : Substring.substring) = 92 let val (s1, i1, n1) = Substring.base sus1 93 val (s2, i2, n2) = Substring.base sus2 94 in 95 if s1 = s2 andalso i1<=i2 andalso i2+n2<=i1+n1 then 96 Substring.substring(s1, i1, i2-i1) 97 else 98 raise Subscript 99 end 100 101(* Faster, non-checking version *) 102 103fun leftsus_ (sus1 : substring) (sus2 : Substring.substring) = 104 let val (s1, i1, _) = Substring.base sus1 105 val (_, i2, _) = Substring.base sus2 106 in 107 Substring.substring(s1, i1, i2-i1) 108 end 109 110(* Return the substring to the right of the given substring *) 111 112(* Precondition: sus is a substring of s, that is, s = s' and sus is valid *) 113 114fun right (s : string) (sus : Substring.substring) = 115 let val (s', i, n) = Substring.base sus 116 in 117 if s = s' then Substring.extract(s', i+n, NONE) 118 else raise Subscript 119 end 120 121datatype replacer = 122 Str of string 123 | Sus of int 124 | Tr of (string -> string) * int 125 | Trs of substring vector -> string 126 127fun applyreplacer suss replacer res = 128 let open Substring 129 fun h [] res = res 130 | h (Str s :: rest) res = h rest (all s :: res) 131 | h (Sus i :: rest) res = h rest (Vector.sub(suss, i) :: res) 132 | h (Tr (f,i) :: rest) res = 133 h rest (all (f (string (Vector.sub(suss, i)))) :: res) 134 | h (Trs f :: rest) res = h rest (all (f suss) :: res) 135 in h replacer res end 136 137fun replace1 regex replacer s = 138 let open Substring 139 val sus = all s 140 in 141 case regexec_sus_ regex 0w0 sus of 142 NONE => s 143 | SOME suss => 144 let val match = Vector.sub(suss, 0) 145 val left' = leftsus sus match 146 val right' = right s match 147 val repl' = applyreplacer suss replacer [] 148 in Substring.concat(left' :: rev (right' :: repl')) end 149 end 150 151fun replace_aux regex fcn replacer s = 152 let open Substring 153 fun h sus revres = 154 case regexec_sus_ regex 0w0 sus of 155 NONE => Substring.concat (List.rev (sus :: revres)) 156 | SOME suss => 157 let val match = Vector.sub(suss, 0) 158 val field1 = leftsus sus match 159 val revres1 = applyreplacer suss replacer 160 (field1 :: revres) 161 in 162 (* Check that we make progress *) 163 if isEmpty field1 andalso isEmpty match then 164 error fcn "no progress" 165 else 166 h (right s match) revres1 167 end 168 in h (all s) [] end 169 170fun replace regex replacer s = 171 replace_aux regex "replace" replacer s 172 173fun substitute1 regex tr s = replace1 regex [Tr (tr, 0)] s 174 175fun substitute regex tr s = 176 replace_aux regex "substitute" [Tr (tr, 0)] s 177 178fun split regex fcn add s = 179 let open Substring 180 val eflags = Word.orb(REG_NOTBOL, REG_NOTEOL) 181 fun h sus revres = 182 case regexec_sus_ regex eflags sus of 183 NONE => List.rev (add sus revres) 184 | SOME suss => 185 let val match = Vector.sub(suss, 0) 186 val field1 = leftsus sus match 187 val revres1 = add field1 revres 188 in 189 (* Check that we make progress *) 190 if isEmpty field1 andalso isEmpty match then 191 error fcn "no progress" 192 else 193 h (right s match) revres1 194 end 195 in h (all s) [] end 196 197fun addfield sus res = 198 sus :: res 199 200fun fields regex s = split regex "fields" addfield s 201 202fun addtoken sus res = 203 if Substring.isEmpty sus then res else sus :: res 204 205fun tokens regex s = split regex "tokens" addtoken s 206 207fun fold regex (fa, fb) e s = 208 let open Substring 209 fun h sus res = 210 case regexec_sus_ regex 0w0 sus of 211 NONE => fa(sus, res) 212 | SOME suss => 213 let val match = Vector.sub(suss, 0) 214 val field1 = leftsus sus match 215 val res1 = fb (suss, fa(field1, res)) 216 in 217 (* Check that we make progress *) 218 if isEmpty field1 andalso isEmpty match then 219 error "fold" "no progress" 220 else 221 h (right s match) res1 222 end 223 in h (all s) e end 224 225fun map regex f s = 226 let open Substring 227 fun h sus revres = 228 case regexec_sus_ regex 0w0 sus of 229 NONE => List.rev revres 230 | SOME suss => 231 let val match = Vector.sub(suss, 0) 232 val field1 = leftsus sus match 233 val revres1 = f suss :: revres 234 in 235 (* Check that we make progress *) 236 if isEmpty field1 andalso isEmpty match then 237 error "map" "no progress" 238 else 239 h (right s match) revres1 240 end 241 in h (all s) [] end 242 243fun app regex f s = 244 let open Substring 245 fun h sus = 246 case regexec_sus_ regex 0w0 sus of 247 NONE => () 248 | SOME suss => 249 let val match = Vector.sub(suss, 0) 250 val field1 = leftsus sus match 251 val revres1 = f suss 252 in 253 (* Check that we make progress *) 254 if isEmpty field1 andalso isEmpty match then 255 error "app" "no progress" 256 else 257 h (right s match) 258 end 259 in h (all s) end 260 261 262