1 // 2 // This software is now distributed according to 3 // the Lesser Gnu Public License. Please see 4 // http://www.gnu.org/copyleft/lesser.txt for 5 // the details. 6 // -- Happy Computing! 7 // 8 package com.stevesoft.pat; 9 10 /** 11 * This class provides a method for parsing the "s/.../.../" constructs of 12 * Regex.perlCode. 13 * 14 * @see Regex#perlCode 15 */ 16 class parsePerl 17 { close(char c)18 final static char close(char c) 19 { 20 // This switch statement does not behave 21 // properly when compiled with jdk1.1.5 22 // and the -O flag. 23 /* 24 * switch(c) { case '[': return ']'; case '(': return ')'; case '{': return 25 * '}'; } return c; 26 */ 27 if (c == '<') 28 { 29 return '>'; 30 } 31 if (c == '[') 32 { 33 return ']'; 34 } 35 if (c == '(') 36 { 37 return ')'; 38 } 39 if (c == '{') 40 { 41 return '}'; 42 } 43 return c; 44 } 45 codify(String s, boolean keepbs)46 final public static String codify(String s, boolean keepbs) 47 { 48 return codify(s, 0, s.length(), keepbs); 49 } 50 codify(String s, int i0, int iN, boolean keepbs)51 final public static String codify(String s, int i0, int iN, boolean keepbs) 52 { 53 StringBuffer sb = new StringBuffer(); 54 boolean ucmode = false, lcmode = false, litmode = false; 55 boolean uc1 = false, lc1 = false; 56 boolean modified = false; 57 for (int i = i0; i < iN; i++) 58 { 59 char c = s.charAt(i); 60 boolean mf = true, app = true; 61 if (c == '\\') 62 { 63 app = false; 64 i++; 65 if (i < s.length()) 66 { 67 char c2 = s.charAt(i); 68 switch (c2) 69 { 70 case 'Q': 71 litmode = true; 72 break; 73 case 'U': 74 ucmode = true; 75 break; 76 case 'L': 77 lcmode = true; 78 break; 79 case 'u': 80 uc1 = true; 81 break; 82 case 'l': 83 lc1 = true; 84 break; 85 case 'E': 86 uc1 = lc1 = ucmode = lcmode = litmode = false; 87 break; 88 default: 89 if (keepbs) 90 { 91 sb.append('\\'); 92 } 93 c = c2; 94 if (keepbs) 95 { 96 mf = false; 97 } 98 app = true; 99 break; 100 } 101 modified |= mf; 102 } 103 } 104 if (app) 105 { 106 if (lc1) 107 { 108 c = lc(c); 109 lc1 = false; 110 } 111 else if (uc1) 112 { 113 c = uc(c); 114 uc1 = false; 115 } 116 else if (ucmode) 117 { 118 c = uc(c); 119 } 120 else if (lcmode) 121 { 122 c = lc(c); 123 } 124 if (litmode && needbs(c)) 125 { 126 sb.append('\\'); 127 } 128 sb.append(c); 129 } 130 } 131 return modified ? sb.toString() : s; 132 } 133 uc(char c)134 final static char uc(char c) 135 { 136 return CaseMgr.toUpperCase(c); 137 } 138 lc(char c)139 final static char lc(char c) 140 { 141 return CaseMgr.toLowerCase(c); 142 } 143 needbs(char c)144 final static boolean needbs(char c) 145 { 146 if (c >= 'a' && c <= 'z') 147 { 148 return false; 149 } 150 if (c >= 'A' && c <= 'Z') 151 { 152 return false; 153 } 154 if (c >= '0' && c <= '9') 155 { 156 return false; 157 } 158 if (c == '_') 159 { 160 return false; 161 } 162 return true; 163 } 164 parse(String s)165 final static Regex parse(String s) 166 { 167 boolean igncase = false, optim = false, gFlag = false; 168 boolean sFlag = false, mFlag = false, xFlag = false; 169 170 StringBuffer s1 = new StringBuffer(); 171 StringBuffer s2 = new StringBuffer(); 172 int i = 0, count = 0; 173 char mode, delim = '/', cdelim = '/'; 174 if (s.length() >= 3 && s.charAt(0) == 's') 175 { 176 mode = 's'; 177 delim = s.charAt(1); 178 cdelim = close(delim); 179 i = 2; 180 } 181 else if (s.length() >= 2 && s.charAt(0) == 'm') 182 { 183 mode = 'm'; 184 delim = s.charAt(1); 185 cdelim = close(delim); 186 i = 2; 187 } 188 else if (s.length() >= 1 && s.charAt(0) == '/') 189 { 190 mode = 'm'; 191 i = 1; 192 } 193 else 194 { 195 try 196 { 197 RegSyntaxError.endItAll("Regex.perlCode should be of the " 198 + "form s/// or m// or //"); 199 } catch (RegSyntax rs) 200 { 201 } 202 return null; 203 } 204 for (; i < s.length(); i++) 205 { 206 if (s.charAt(i) == '\\') 207 { 208 s1.append('\\'); 209 i++; 210 } 211 else if (s.charAt(i) == cdelim && count == 0) 212 { 213 i++; 214 break; 215 } 216 else if (s.charAt(i) == delim && cdelim != delim) 217 { 218 count++; 219 } 220 else if (s.charAt(i) == cdelim && cdelim != delim) 221 { 222 count--; 223 } 224 s1.append(s.charAt(i)); 225 } 226 if (mode == 's' && cdelim != delim) 227 { 228 while (i < s.length() && Prop.isWhite(s.charAt(i))) 229 { 230 i++; 231 } 232 if (i >= s.length()) 233 { 234 try 235 { 236 RegSyntaxError.endItAll("" + mode + delim + " needs " + cdelim); 237 } catch (RegSyntax rs) 238 { 239 } 240 return null; 241 } 242 cdelim = close(delim = s.charAt(i)); 243 i++; 244 } 245 count = 0; 246 if (mode == 's') 247 { 248 for (; i < s.length(); i++) 249 { 250 if (s.charAt(i) == '\\') 251 { 252 s2.append('\\'); 253 i++; 254 } 255 else if (s.charAt(i) == cdelim && count == 0) 256 { 257 i++; 258 break; 259 } 260 else if (s.charAt(i) == delim && cdelim != delim) 261 { 262 count++; 263 } 264 else if (s.charAt(i) == cdelim && cdelim != delim) 265 { 266 count--; 267 } 268 s2.append(s.charAt(i)); 269 } 270 } 271 for (; i < s.length(); i++) 272 { 273 char c = s.charAt(i); 274 switch (c) 275 { 276 case 'x': 277 xFlag = true; 278 break; 279 case 'i': 280 igncase = true; 281 break; 282 case 'o': 283 optim = true; 284 break; 285 case 's': 286 sFlag = true; 287 break; 288 case 'm': 289 mFlag = true; 290 break; 291 case 'g': 292 gFlag = true; 293 break; 294 default: 295 296 // syntax error! 297 try 298 { 299 RegSyntaxError.endItAll("Illegal flag to pattern: " + c); 300 } catch (RegSyntax rs) 301 { 302 } 303 return null; 304 } 305 } 306 Regex r = new Regex(); 307 try 308 { 309 String pat = s1.toString(), reprul = s2.toString(); 310 if (xFlag) 311 { 312 pat = strip(pat); 313 reprul = strip(reprul); 314 } 315 r.compile(pat); 316 r.ignoreCase |= igncase; 317 r.gFlag |= gFlag; 318 r.sFlag |= sFlag; 319 r.mFlag |= mFlag; 320 if (optim) 321 { 322 r.optimize(); 323 } 324 if (delim == '\'') 325 { 326 r.setReplaceRule(new StringRule(reprul)); 327 } 328 else 329 { 330 r.setReplaceRule(ReplaceRule.perlCode(reprul)); 331 } 332 } catch (RegSyntax rs) 333 { 334 r = null; 335 } 336 return r; 337 } 338 strip(String s)339 static String strip(String s) 340 { 341 StringBuffer sb = new StringBuffer(); 342 for (int i = 0; i < s.length(); i++) 343 { 344 char c = s.charAt(i); 345 if (Prop.isWhite(c)) 346 { 347 ; 348 } 349 else if (c == '#') 350 { 351 i++; 352 while (i < s.length()) 353 { 354 if (s.charAt(i) == '\n') 355 { 356 break; 357 } 358 i++; 359 } 360 } 361 else if (c == '\\') 362 { 363 sb.append(c); 364 sb.append(s.charAt(++i)); 365 } 366 else 367 { 368 sb.append(c); 369 } 370 } 371 return sb.toString(); 372 } 373 } 374