1 // 2 // This software is now distributed according to 3 // the Lesser Gnu Public License. Please see 4 // http://www.gnu.org/copyleft/lesser.txt for 5 // the details. 6 // -- Happy Computing! 7 // 8 package com.stevesoft.pat; 9 10 import java.util.Enumeration; 11 import java.util.Hashtable; 12 import java.util.Vector; 13 14 /** This class is just like oneChar, but doesn't worry about case. */ 15 class FastChar extends oneChar 16 { FastChar(char c)17 FastChar(char c) 18 { 19 super(c); 20 } 21 matchInternal(int p, Pthings pt)22 public int matchInternal(int p, Pthings pt) 23 { 24 return (p < pt.src.length() && pt.src.charAt(p) == c) ? nextMatch( 25 p + 1, pt) : -1; 26 } 27 clone1(Hashtable h)28 Pattern clone1(Hashtable h) 29 { 30 return new FastChar(c); 31 } 32 } 33 34 /** 35 * This class is a hashtable keyed by Character Objects. It is used to match 36 * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by 37 * using a Hashtable that indexes into the group of patterns. 38 */ 39 class Branch extends Pattern 40 { 41 Hashtable h = new Hashtable(); 42 43 // We need to keep track of the order 44 // of the keys -- if we don't then 45 // recompiling the output of toString 46 // may produce errors by re-ordering 47 // ()'s and changing the id number of 48 // the backreference associated with 49 // a subpattern. 50 Vector keys = new Vector(); 51 Branch()52 Branch() 53 { 54 } 55 clone1(Hashtable x)56 Pattern clone1(Hashtable x) 57 { 58 Branch b = new Branch(); 59 b.keys = (Vector) keys.clone(); 60 x.put(this, b); 61 x.put(b, b); 62 63 for (int i = 0; i < keys.size(); i++) 64 { 65 Pattern p = (Pattern) h.get(keys.elementAt(i)); 66 b.h.put(keys.elementAt(i), p.clone(x)); 67 } 68 return b; 69 } 70 71 // this function eliminates Branches with 0 or 1 elements. reduce(boolean ignoreCase, boolean dontMinQ)72 final Pattern reduce(boolean ignoreCase, boolean dontMinQ) 73 { 74 if (h.size() == 1) 75 { 76 Enumeration e = h.keys(); 77 Character c = (Character) e.nextElement(); 78 Pattern oc; 79 if (ignoreCase || dontMinQ) 80 { 81 oc = new oneChar(c.charValue()); 82 } 83 else 84 { 85 oc = new FastChar(c.charValue()); 86 } 87 oc.next = (Pattern) h.get(c); 88 oc.add(next); 89 return oc; 90 } 91 else if (h.size() == 0) 92 { 93 return null; 94 } 95 return this; 96 } 97 maxChars()98 public patInt maxChars() 99 { 100 Enumeration e = h.keys(); 101 patInt count = new patInt(0); 102 while (e.hasMoreElements()) 103 { 104 Object key = e.nextElement(); 105 Pattern pa = (Pattern) h.get(key); 106 patInt pi = pa.maxChars(); 107 pi.inc(); 108 count.maxeq(pi); 109 } 110 return count; 111 } 112 minChars()113 public patInt minChars() 114 { 115 Enumeration e = h.keys(); 116 patInt count = new patInt(0); 117 while (e.hasMoreElements()) 118 { 119 Object key = e.nextElement(); 120 Pattern pa = (Pattern) h.get(key); 121 patInt pi = pa.minChars(); 122 pi.inc(); 123 count.mineq(pi); 124 } 125 return count; 126 } 127 128 // adds a oneChar object to this Branch addc(oneChar o, boolean ignoreCase, boolean dontMinQ)129 void addc(oneChar o, boolean ignoreCase, boolean dontMinQ) 130 { 131 Pattern n = o.next; 132 if (n == null) 133 { 134 n = new NullPattern(); 135 } 136 else 137 { 138 n = RegOpt.opt(n, ignoreCase, dontMinQ); 139 } 140 n.setParent(this); 141 set(Character.valueOf(o.c), n, ignoreCase, dontMinQ); 142 if (ignoreCase) 143 { 144 if (o.c != o.altc) 145 { 146 set(Character.valueOf(o.altc), n, ignoreCase, dontMinQ); 147 } 148 if (o.c != o.altc2 && o.altc != o.altc2) 149 { 150 set(Character.valueOf(o.altc2), n, ignoreCase, dontMinQ); 151 } 152 } 153 } 154 set(Character c, Pattern n, boolean igc, boolean dontMinQ)155 void set(Character c, Pattern n, boolean igc, boolean dontMinQ) 156 { 157 Pattern p = (Pattern) h.get(c); 158 next = null; 159 // This letter is not yet used in the Branch object. 160 // We need to add it. 161 if (p == null) 162 { 163 if (n instanceof Or) 164 { 165 // A NullPattern is prepended to an Or 166 // to prevent confusing this object. 167 // For example: (boo|bug) => (b(?:oo|ug)) 168 // during this process. However, we 169 // want (b(?:oo|ell)|bug) 170 NullPattern np = new NullPattern(); 171 np.add(n); 172 h.put(c, np); 173 } 174 else 175 { 176 h.put(c, n); 177 } 178 // Make sure we remember the order things were 179 // added into the Branch object so that we can 180 // properly convert it to a String. 181 keys.addElement(c); 182 } 183 else if (p instanceof Or) 184 { 185 ((Or) p).addOr(n); 186 } 187 else if (p instanceof oneChar && n instanceof oneChar 188 && ((oneChar) p).c != ((oneChar) n).c) 189 { 190 Branch b = new Branch(); 191 b.addc((oneChar) p, igc, dontMinQ); 192 b.addc((oneChar) n, igc, dontMinQ); 193 h.put(c, b); 194 b.setParent(this); 195 } 196 else if (p instanceof Branch && n instanceof oneChar) 197 { 198 ((Branch) p).addc((oneChar) n, igc, dontMinQ); 199 n.setParent(p); 200 } 201 else 202 { 203 // Create an Or object to receive the variety 204 // of branches in the pattern if the current letter 205 // is matched. We do not attempt to make these 206 // sub-branches into a Branch object yet. 207 Or o = new Or(); 208 o.setParent(this); 209 210 // Remove NullPattern from p -- it's no longer needed. 211 if (p instanceof NullPattern && p.parent == null && p.next != null) 212 { 213 o.addOr(p.next); 214 } 215 else 216 { 217 o.addOr(p); 218 } 219 o.addOr(n); 220 221 Pattern optpat = RegOpt.opt(o, igc, dontMinQ); 222 h.put(c, optpat); 223 optpat.setParent(this); 224 } 225 } 226 toString()227 public String toString() 228 { 229 StringBuffer sb = new StringBuffer(); 230 // should protect this... 231 sb.append("(?:(?#branch)"); // Hashtable)"); 232 for (int i = 0; i < keys.size(); i++) 233 { 234 Character c = (Character) keys.elementAt(i); 235 sb.append(c); 236 sb.append(h.get(c)); 237 if (i + 1 < keys.size()) 238 { 239 sb.append("|"); 240 } 241 } 242 sb.append(")"); 243 sb.append(nextString()); 244 return sb.toString(); 245 } 246 matchInternal(int pos, Pthings pt)247 public int matchInternal(int pos, Pthings pt) 248 { 249 if (pos >= pt.src.length()) 250 { 251 return -1; 252 } 253 Pattern n = (Pattern) h.get(Character.valueOf(pt.src.charAt(pos))); 254 if (n == null) 255 { 256 return -1; 257 } 258 if (pt.cbits != null && pt.cbits.get(pos)) 259 { 260 return -1; 261 } 262 return n.matchInternal(pos + 1, pt); 263 } 264 } 265 266 /** 267 * This is just a place to put the optimizing function. It is never instantiated 268 * as an Object. It just sorts through the RegOpt looking for things it can 269 * change and make faster. 270 */ 271 public class RegOpt 272 { opt(Pattern p, boolean ignoreCase, boolean dontMinQ)273 static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ) 274 { 275 if (p == null) 276 { 277 return p; 278 } 279 if (p instanceof Bracket) 280 { 281 Bracket b = (Bracket) p; 282 // FastBracket is the only special 283 // optimized class to have its own 284 // source file. 285 p = FastBracket.process(b, ignoreCase); 286 // if(!(p instanceof FastBracket) 287 // p = Switch.process(b,ignoreCase); 288 p.next = b.next; 289 p.parent = b.parent; 290 } 291 else if (p instanceof oneChar && !ignoreCase && !dontMinQ) 292 { 293 oneChar o = (oneChar) p; 294 p = new FastChar(o.c); 295 p.next = o.next; 296 p.parent = o.parent; 297 } 298 else if (p instanceof Or && ((Or) p).leftForm().equals("(?:") 299 && ((Or) p).v.size() == 1) 300 { // Eliminate this Or Object. 301 Or o = (Or) p; 302 p = (Pattern) o.v.elementAt(0); 303 p.setParent(null); 304 p = RegOpt.opt(p, ignoreCase, dontMinQ); 305 p.add(o.next); 306 } 307 else if (p instanceof Or) 308 { 309 Or o = (Or) p; 310 o.pv = null; 311 Vector v = o.v; 312 o.v = new Vector(); 313 Branch b = new Branch(); 314 b.parent = o.parent; 315 for (int i = 0; i < v.size(); i++) 316 { 317 Pattern pp = (Pattern) v.elementAt(i); 318 // We want to have at least two oneChar's in 319 // the Or Object to consider making a Branch. 320 if (pp instanceof oneChar 321 && (b.h.size() >= 1 || (i + 1 < v.size() && v 322 .elementAt(i + 1) instanceof oneChar))) 323 { 324 b.addc((oneChar) pp, ignoreCase, dontMinQ); 325 } 326 else 327 { 328 if (b.keys.size() > 0) 329 { 330 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); 331 if (p2 != null) 332 { 333 o.addOr(p2); 334 b = new Branch(); 335 b.parent = o.parent; 336 } 337 } 338 o.addOr(opt(pp, ignoreCase, dontMinQ)); 339 } 340 } 341 if (b.keys.size() > 0) 342 { 343 Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); 344 if (p2 != null) 345 { 346 o.addOr(p2); 347 } 348 } 349 if (o.v.size() == 1 && o.leftForm().equals("(?:")) 350 { // Eliminate Or Object 351 p = (Pattern) o.v.elementAt(0); 352 p.setParent(null); 353 p = RegOpt.opt(p, ignoreCase, dontMinQ); 354 p.add(o.next); 355 } 356 } 357 else if (p instanceof FastMulti) 358 { 359 PatternSub ps = (PatternSub) p; 360 ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ); 361 } 362 else if (p instanceof Multi && safe4fm(((PatternSub) p).sub)) 363 { 364 Multi m = (Multi) p; 365 FastMulti fm = null; 366 try 367 { 368 fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ)); 369 } catch (RegSyntax rs) 370 { 371 } 372 fm.parent = m.parent; 373 fm.matchFewest = m.matchFewest; 374 fm.next = m.next; 375 p = fm; 376 } 377 if (p.next != null) 378 { 379 p.next = opt(p.next, ignoreCase, dontMinQ); 380 } 381 return p; 382 } 383 safe4fm(Pattern x)384 final static boolean safe4fm(Pattern x) 385 { 386 while (x != null) 387 { 388 if (x instanceof Bracket) 389 { 390 ; 391 } 392 else if (x instanceof Range) 393 { 394 ; 395 } 396 else if (x instanceof oneChar) 397 { 398 ; 399 } 400 else if (x instanceof Any) 401 { 402 ; 403 } 404 else if (x instanceof Custom 405 && ((Custom) x).v instanceof UniValidator) 406 { 407 ; 408 } 409 else if (x instanceof Or) 410 { 411 Or o = (Or) x; 412 if (!o.leftForm().equals("(?:")) 413 { 414 return false; 415 } 416 patInt lo = o.countMinChars(); 417 patInt hi = o.countMaxChars(); 418 if (!lo.equals(hi)) 419 { 420 return false; 421 } 422 for (int i = 0; i < o.v.size(); i++) 423 { 424 if (!safe4fm((Pattern) o.v.elementAt(i))) 425 { 426 return false; 427 } 428 } 429 } 430 else 431 { 432 return false; 433 } 434 x = x.next; 435 } 436 return true; 437 } 438 /* 439 * public static void setParents(Regex r) { setParents(r.thePattern,null); } 440 * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub && 441 * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents( 442 * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof 443 * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++) 444 * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof 445 * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys(); 446 * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents( 447 * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else { 448 * p.parent = null; RegOpt.setParents(p.next,x); } } 449 */ 450 } 451