1 import java.util.*; 2 import java.net.*; 3 4 public final class mask { 5 6 /* load flags */ 7 // act= (what to do with this object?) 8 // reject,stop- don't download it (same as q=0.0) 9 // noparse - don't parse it (same as depth=-1) 10 // fastclose - close connection after sending request (will not be parsed even if parseable) 11 // close - if object is unparseable (from content-type), close connection without downloading more data 12 13 // nosave - don't save it to disk 14 // noproxy - bypass proxy 15 public static final byte ACT_LOAD=0; 16 public static final byte ACT_REJECT=1; 17 public static final byte ACT_NOPARSE=2; 18 public static final byte ACT_FASTCLOSE=3; 19 public static final byte ACT_CLOSE=4; 20 public static final byte ACT_NOSAVE=5; 21 public static final byte ACT_NOPROXY=6; 22 23 /* log flags */ 24 // log= 25 // none - no loging done 26 // queue - when element has been taken from queue 27 // load - when trying to load 28 // parse - when parsing 29 // saving - when saving to disk 30 // err - when loading error occurs 31 // fatalerr - when fatal loading error occurs 32 // ioerr - log i/o erros 33 // stored - when saving to disc 34 // reject - log rejects 35 // urlonly - log URL only 36 37 public static final short LOG_SERVERDEFAULT =-1; 38 public static final short LOG_NONE =0; 39 public static final short LOG_QUEUE =1; 40 public static final short LOG_LOAD =2; 41 public static final short LOG_PARSE =4; 42 public static final short LOG_SAVE =8; 43 public static final short LOG_ERR =16; 44 public static final short LOG_FATALERR=32; 45 public static final short LOG_IOERR =64; 46 public static final short LOG_STORED =128; 47 public static final short LOG_REJECT =256; 48 public static final short LOG_DEPTH =512; 49 public static final short LOG_URLONLY =1024;// MUST be last 50 // aliases 51 public static final short LOG_ALL=LOG_URLONLY-1; 52 public static final short LOG_DEFAULT=LOG_LOAD|LOG_IOERR|LOG_ERR|LOG_FATALERR; 53 54 /* update status flags */ 55 // norefresh - if object allready exists, don't try to load it 56 // forcereload - force cache to reloading object 57 // load,continue - load object in classic way (don't care about if old copy exists) 58 // update - if object in cache is older than XXXX hours, start loading. 59 // forceupdate - forced load if older 60 61 public static final byte UPD_NOLIMIT=-1; 62 public static final byte UPD_LOAD=0; 63 public static final byte UPD_RELOAD=1; 64 public static final byte UPD_UPDATE=2; 65 public static final byte UPD_NOREFRESH=3; 66 public static final byte UPD_FORCEUPDATE=4; 67 public static final byte UPD_NOREPARSE=5; 68 69 // strip=none,null - no URL striping before mask testing 70 // =auto 71 // =location,loc odstrani http://xxx/ 72 // =server odstrani http://xxx 73 74 public static final byte STRIP_AUTO=0; 75 public static final byte STRIP_LOCATION=1; 76 public static final byte STRIP_SERVER=2; 77 public static final byte STRIP_NONE=3; 78 public static final byte STRIP_DIRECTORY=4; 79 80 // size=xxxx bytes - only if object is bigger than xxxx bytes 81 // size=known - only if we know size of object 82 // size=unknown - only if we don't know size of object beeing downloaded 83 // size=any - don't care about it 84 85 public static final byte SIZE_NOLIMIT=-1; 86 public static final byte SIZE_ANY=0; 87 public static final byte SIZE_KNOWN=1; 88 public static final byte SIZE_UNKNOWN=2; 89 public static final byte SIZE_LIMITED=3; 90 91 // target=any,anyserver - any wwwserver in the world (including my own) 92 // alias for world,known,site,me 93 // world - any undefined location 94 // known - any known location (but not me or myserver) 95 // server,site - alias for me,sameserver 96 // same,sameserver - file located on the same server (not including me) 97 // location, samelocation, me, this - located in Location URL 98 // subdir - located in subdirectory 99 // 100 101 public static final byte TARGET_ANY=127; 102 /* official subtypes */ 103 public static final byte TARGET_NONE=0; 104 public static final byte TARGET_WORLD=1; 105 public static final byte TARGET_KNOWN=2; 106 public static final byte TARGET_SERVER=4; 107 public static final byte TARGET_LOCATION=8; 108 public static final byte TARGET_SUBDIR=16; // subdir in location 109 public static final byte TARGET_DIRECTORY=32; 110 111 /* aliases */ 112 113 public static final byte TARGET_SITE=TARGET_SERVER| 114 TARGET_LOCATION| 115 TARGET_SUBDIR| 116 TARGET_DIRECTORY; 117 118 public static final byte TARGET_ME = 119 TARGET_LOCATION| 120 TARGET_SUBDIR| 121 TARGET_DIRECTORY; 122 123 /* http - ANY highest */ 124 /* / - SITE */ 125 /* xxxx = location */ 126 /* * = cokoliv */ 127 128 /* target guess priority */ 129 public static final byte GUESS_TARGET_ANY=10; 130 public static final byte GUESS_TARGET_SITE=8; 131 public static final byte GUESS_TARGET_LOCATION=5; 132 public static final byte GUESS_TARGET_ANYWHERE=0; 133 134 135 public byte action,update,strip,size,target; 136 public short log; 137 public long sizelimit,updatelimit; // -1 is no limit 138 139 public float q; 140 141 public static final byte DEPTH_NOCHANGE=-2; 142 143 public short depth; 144 145 /* content section */ 146 147 private regexp contentmasks[]; /* regexp masks */ 148 private boolean contentok[]; /* true=normal, false=must NOT be matched */ 149 private boolean anycontent; /* true=OR, false=AND */ 150 151 /* extensions */ 152 153 private regexp extmasks[]; /* regexp masks */ 154 private boolean extok[]; /* true=normal, false=must NOT be matched */ 155 private boolean anyext; /* true=OR, false=AND */ 156 157 /* urlmasks */ 158 private regexp urlmasks[]; /* regexp masks */ 159 private boolean urlsok[]; /* true=normal, false=must NOT be matched */ 160 private boolean anyurl; /* true=OR, false=AND */ 161 162 163 /* src masks */ 164 private regexp srcmasks[]; /* regexp masks */ 165 private boolean srcok[]; /* true=normal, false=must NOT be matched */ 166 private boolean anysrc; /* true=OR, false=AND */ 167 systemdefaults()168 private final void systemdefaults() 169 { 170 action=ACT_LOAD; 171 update=UPD_LOAD; 172 strip=STRIP_AUTO; 173 target=TARGET_NONE; 174 size=SIZE_ANY; 175 log=LOG_NONE; // SERVERDEFAULT; 176 sizelimit=SIZE_NOLIMIT; 177 updatelimit=UPD_NOLIMIT; 178 anycontent=anyext=anyurl=anysrc=true; 179 q=1.0f; 180 depth=DEPTH_NOCHANGE; 181 } 182 mask(options o)183 public mask(options o) 184 { 185 systemdefaults(); 186 if(o==null) throw new NullPointerException("mask"); 187 for(int i=o.parsed.size()-1;i>=0;i--) 188 { 189 try 190 { 191 String opt; 192 StringTokenizer st; 193 st=new StringTokenizer((String)o.parsed.elementAt(i)); 194 opt=st.nextToken(); 195 // System.out.println("line="+o.parsed.elementAt(i)); 196 while(true) 197 { 198 String s; 199 boolean ok; 200 s=st.nextToken(); 201 ok=true; 202 if(s.length()==1 && s.charAt(0)=='!') 203 { 204 ok=false; 205 s=st.nextToken(); 206 } 207 208 if(opt.equals("q")) { q=Float.valueOf(s).floatValue();continue;} 209 else if (opt.equals("url")) 210 { 211 if(s.equals("*") || s.equals("any")) {urlmasks=null;urlsok=null;} 212 else 213 { 214 urlmasks=util.addRegexpToArray(s, urlmasks); 215 urlsok=util.addBooleanToArray(ok,urlsok); 216 } 217 s=st.nextToken(); 218 if(s.equals(",")) anyurl=true; else anyurl=false; 219 continue; 220 } 221 else if (opt.equals("content") || opt.equals("ct")) 222 { 223 if(s.equals("*") || s.equals("any")) {contentmasks=null;contentok=null;} 224 else 225 { 226 contentmasks=util.addRegexpToArray(s, contentmasks); 227 contentok=util.addBooleanToArray(ok,contentok); 228 } 229 s=st.nextToken(); 230 if(s.equals(",")) anycontent=true; else anycontent=false; 231 continue; 232 } 233 else if (opt.equals("ext")) 234 { 235 if(s.equals("*") || s.equals("any")) {extmasks=null;extok=null;} 236 else 237 { 238 extmasks=util.addRegexpToArray(s, extmasks); 239 extok=util.addBooleanToArray(ok,extok); 240 } 241 s=st.nextToken(); 242 if(s.equals(",")) anyext=true; else anyext=false; 243 continue; 244 } 245 246 else if (opt.equals("src")) 247 { 248 if(s.equals("*") || s.equals("any")) {srcmasks=null;srcok=null;} 249 else 250 { 251 srcmasks=util.addRegexpToArray(s, srcmasks); 252 srcok=util.addBooleanToArray(ok,srcok); 253 } 254 s=st.nextToken(); 255 if(s.equals(",")) anysrc=true; else anysrc=false; 256 continue; 257 } 258 259 else if (opt.equals("depth")) 260 { 261 try 262 { 263 depth=(short)Integer.valueOf(s).intValue(); 264 } 265 catch (NumberFormatException n) 266 { 267 System.err.println("[CONFIG_ERROR] Bad depth : "+s); 268 } 269 270 s=st.nextToken(); 271 continue; 272 } 273 274 else if (opt.equals("size")) 275 { 276 if(s.equals("any")) { size=SIZE_ANY;sizelimit=SIZE_NOLIMIT;} 277 else if (s.equals("known")) 278 if(ok==false) size=SIZE_UNKNOWN; 279 else size=SIZE_KNOWN; 280 else if (s.equals("unknown")) 281 if(ok==true) size=SIZE_UNKNOWN; 282 else size=SIZE_KNOWN; 283 else 284 try 285 { 286 sizelimit=Long.valueOf(s).intValue(); 287 if(size==SIZE_ANY) size=SIZE_LIMITED; 288 } 289 catch (NumberFormatException n) 290 { 291 System.err.println("[CONFIG_ERROR] Bad size : "+s); 292 } 293 294 s=st.nextToken(); 295 continue; 296 } 297 298 else if (opt.equals("strip")) 299 { 300 if(s.equals("none")) strip=STRIP_NONE; 301 else if (s.equals("server")) strip=STRIP_SERVER; 302 else if (s.equals("location")) strip=STRIP_LOCATION; 303 else if (s.equals("auto")) strip=STRIP_AUTO; 304 else if (s.equals("dir")) strip=STRIP_DIRECTORY; 305 else 306 System.err.println("[CONFIG_ERROR] Invalid parameter to strip option : "+s); 307 308 s=st.nextToken(); 309 continue; 310 } 311 312 else if (opt.equals("target")) 313 { 314 if(s.equals("any")) target=TARGET_ANY; 315 else if (s.equals("anyserver")) target=TARGET_ANY; 316 else if (s.equals("world")) target|=TARGET_WORLD; 317 else if (s.equals("known")) target|=TARGET_KNOWN; 318 else if (s.equals("server")) target|=TARGET_SERVER; 319 else if (s.equals("location")) target|=TARGET_LOCATION; 320 else if (s.equals("directory")) target|=TARGET_DIRECTORY; 321 else if (s.equals("dir")) target|=TARGET_DIRECTORY; 322 else if (s.equals("subdir")) target|=TARGET_SUBDIR; 323 324 325 // aliases 326 else if (s.equals("loc")) target|=TARGET_LOCATION; 327 else if (s.equals("me")) target|=TARGET_ME; 328 else if (s.equals("site")) target|=TARGET_SITE; 329 330 331 else if (s.equals("auto")) target=0; 332 else 333 System.err.println("[CONFIG_ERROR] Invalid parameter to target option : "+s); 334 335 s=st.nextToken(); 336 continue; 337 } 338 339 else if (opt.equals("act")) 340 { 341 if(s.equals("reject") || 342 s.equals("stop") || 343 (s.equals("load") && ok==false) ) 344 { action=ACT_REJECT;} 345 else if (s.equals("noparse")) { depth=-1;action=ACT_NOPARSE;} 346 else if (s.equals("fastclose")) action=ACT_FASTCLOSE; 347 else if (s.equals("close")) action=ACT_CLOSE; 348 else if (s.equals("load")) action=ACT_LOAD; 349 else if (s.equals("nosave")) action=ACT_NOSAVE; 350 else if (s.equals("direct") || 351 s.equals("noproxy") ) 352 action=ACT_NOPROXY; 353 354 else 355 System.err.println("[CONFIG_ERROR] Invalid parameter to act option : "+s); 356 357 s=st.nextToken(); 358 continue; 359 } 360 361 else if (opt.equals("log")) 362 { 363 if((s.equals("none") || 364 s.equals("off") 365 ) && ok==true) 366 log=LOG_NONE; 367 else if (s.equals("queue")) log|=LOG_QUEUE; 368 else if (s.equals("server") 369 ||s.equals("serverdefault") 370 ) log=LOG_SERVERDEFAULT; 371 else if (s.equals("load")) log|=LOG_LOAD; 372 else if (s.equals("stored") 373 ||s.equals("saved") 374 ||s.equals("store") 375 ||s.equals("loaded") 376 ) log|=LOG_STORED; 377 else if (s.equals("parse")) log|=LOG_PARSE; 378 else if (s.equals("save")) log|=LOG_SAVE; 379 else if (s.equals("depth")) log|=LOG_DEPTH; 380 else if (s.equals("err") 381 ||s.equals("error") 382 ) log|=LOG_ERR; 383 else if (s.equals("all")) log|=LOG_ALL; 384 else if (s.equals("ioerr") 385 || s.equals("io")) 386 log|=LOG_IOERR; 387 else if (s.equals("fatalerr") 388 || s.equals("fatal")) 389 log|=LOG_FATALERR; 390 else if (s.equals("default")) log=LOG_DEFAULT; 391 else if (s.equals("reject")) log|=LOG_REJECT; 392 else if (s.equals("url") || 393 s.equals("urlonly") || 394 s.equals("short") 395 ) 396 if (ok==true) log=(short)((log & LOG_ALL)|LOG_URLONLY); 397 else log&=LOG_ALL; 398 399 else 400 System.err.println("[CONFIG_ERROR] Invalid parameter to log option : "+s); 401 402 s=st.nextToken(); 403 continue; 404 } 405 else if (opt.equals("upd")) 406 { 407 if(s.equals("load")) update=UPD_LOAD; 408 else if (s.equals("norefresh") || 409 s.equals("none") 410 ) update=UPD_NOREFRESH; 411 else if (s.equals("reload") || 412 s.equals("force") || 413 s.equals("forceload") || 414 s.equals("forcereload") 415 ) update=UPD_RELOAD; 416 else if (s.equals("update")) update=UPD_UPDATE; 417 else if (s.equals("forceupdate")) update=UPD_FORCEUPDATE; 418 else if (s.equals("noreparse")) update=UPD_NOREPARSE; 419 else 420 try 421 { 422 updatelimit=Long.valueOf(s).longValue()*3600000L; 423 if(update!=UPD_UPDATE || update!=UPD_FORCEUPDATE ) 424 update=UPD_UPDATE; 425 } 426 catch (NumberFormatException n) 427 { 428 System.err.println("[CONFIG_ERROR] Bad update interval : "+s); 429 } 430 431 s=st.nextToken(); 432 continue; 433 } 434 else 435 { 436 System.err.println("[CONFIG_ERROR] Unknown mask option "+opt); 437 break; 438 } 439 } 440 441 } 442 catch (NoSuchElementException ignore) 443 {} 444 445 446 } 447 guessTarget(); 448 guessStrip(); 449 } 450 guessTarget()451 private final void guessTarget() 452 { 453 if(target!=0) return; 454 if(urlmasks==null) { target=TARGET_ME;return;} 455 /* guess: */ 456 byte ttarget=GUESS_TARGET_ANYWHERE; 457 458 for(int i=urlmasks.length-1;i>=0;i--) 459 { 460 String s; 461 s=urlmasks[i].toString(); 462 463 if(s.indexOf("://")>0) 464 { 465 if(ttarget<GUESS_TARGET_ANY) 466 ttarget=GUESS_TARGET_ANY; 467 break; 468 } 469 470 if(s.startsWith("/")) 471 { 472 if(ttarget<GUESS_TARGET_SITE) 473 ttarget=GUESS_TARGET_SITE; 474 continue; 475 } 476 477 if(s.startsWith("*")) 478 { 479 if(ttarget<GUESS_TARGET_ANYWHERE) 480 ttarget=GUESS_TARGET_ANYWHERE; 481 continue; 482 } 483 484 if(ttarget<GUESS_TARGET_LOCATION) 485 ttarget=GUESS_TARGET_LOCATION; 486 487 } 488 /* and setup..... */ 489 switch(ttarget) 490 { 491 case GUESS_TARGET_ANYWHERE: 492 case GUESS_TARGET_ANY: 493 target=TARGET_ANY;break; 494 case GUESS_TARGET_SITE: 495 target=TARGET_SITE;break; 496 case GUESS_TARGET_LOCATION: 497 target=TARGET_ME;break; 498 default: throw new IllegalArgumentException ("Screwed in guessTarget();"); 499 } 500 501 } 502 guessStrip()503 private final void guessStrip() 504 { 505 if( (strip==STRIP_LOCATION || strip==STRIP_DIRECTORY) 506 && 507 (target & TARGET_ME)==0 ) 508 { strip=STRIP_SERVER;return;} 509 if(strip!=STRIP_AUTO) return; 510 if(urlmasks==null) { strip=STRIP_NONE;return;} 511 if( (target & TARGET_KNOWN)!=0 || 512 (target & TARGET_WORLD)!=0 ) { strip=STRIP_NONE;return;} 513 if( (target & TARGET_SERVER)!=0 ) { strip=STRIP_SERVER;return;} 514 if( (target & TARGET_LOCATION)!=0 ) { strip=STRIP_LOCATION;return;} 515 strip=STRIP_DIRECTORY; 516 } 517 hasContent()518 public final boolean hasContent() 519 { 520 if(contentmasks==null) return false; else return true; 521 } 522 getTarget(String frombase,String fromdir,String locbase,String url)523 public final static byte getTarget(String frombase,String fromdir,String locbase,String url) 524 { 525 // int target=mask.getTarget(frombase,fromdir,url); 526 URL u; 527 try 528 { 529 u=new URL(url); 530 531 } 532 catch (MalformedURLException badurl) 533 { 534 return TARGET_WORLD; 535 } 536 String urldir=util.getDirname(u.getFile()); 537 538 if(locbase.regionMatches(0,url,0,locbase.length())) 539 { 540 /* stejna location */ 541 if(fromdir.equals(urldir)) return TARGET_DIRECTORY; 542 if(fromdir.regionMatches(0,urldir,0,fromdir.length())) 543 return TARGET_SUBDIR; 544 return TARGET_LOCATION; 545 } 546 547 String urlbase=u.getProtocol()+"://"+u.getHost(); 548 if(frombase.equals(urlbase)) 549 { 550 551 /* stejny server */ 552 return TARGET_SERVER; 553 } 554 /* prohledama databazi known locations */ 555 for(int i=loader.loc.length-1;i>=0;i--) 556 if(loader.loc[i].locbase.regionMatches(0,url,0,loader.loc[i].locbase.length())) 557 return TARGET_KNOWN; 558 return TARGET_WORLD; 559 } 560 match(String url,String ext,String src,byte target,String locbase)561 public final boolean match(String url,String ext,String src,byte target,String locbase) 562 { 563 // System.out.println("url="+url+"\n\tsrc="+src+" mask.target="+this.target+" target="+target); 564 if( (target & this.target)==0) return false; 565 // System.out.println("target ok"); 566 srcscan:while(true) 567 { 568 if(srcmasks!=null) 569 { 570 /* check SRC */ 571 for(int i=0;i<srcmasks.length;i++) 572 // System.out.println(srcok[i]+" "+srcmasks[i]); 573 if(srcmasks[i].matches(src) == srcok[i]) 574 if(anysrc) break srcscan; 575 else ; 576 else 577 if(!anysrc) return false; 578 579 if(anysrc) return false; 580 } 581 break; 582 } 583 // System.out.println("src ok"); 584 extscan:while(true) 585 { 586 if(extmasks!=null) 587 { 588 /* check EXT */ 589 for(int i=0;i<extmasks.length;i++) 590 if(extmasks[i].matches(ext) == extok[i]) 591 if(anyext) break extscan; 592 else ; 593 else 594 if(!anyext) return false; 595 if(anyext) return false; 596 } 597 break; 598 } 599 // System.out.println("ext ok"); 600 urlscan:while(true) 601 { 602 if(urlmasks!=null) 603 { 604 String stripped=url; 605 switch(strip) 606 { 607 case STRIP_NONE:break; 608 case STRIP_SERVER: 609 int i=url.indexOf("://",0); 610 i=url.indexOf("/",i+3); 611 if (i == -1) 612 stripped= ""; 613 else 614 stripped=url.substring(i); 615 break; 616 case STRIP_LOCATION: 617 stripped=url.substring(locbase.length()); 618 break; 619 } 620 621 622 for(int i=0;i<urlmasks.length;i++) 623 if(urlmasks[i].matches(stripped) == urlsok[i]) 624 if(anyurl) break urlscan; 625 else ; 626 else 627 if(!anyurl) return false; 628 if(anyurl) return false; 629 } 630 break; 631 } 632 // System.out.println("ALL OK"); 633 return true; 634 } 635 636 } 637