1 import java.util.*;
2 import java.net.*;
3 
4 public final class mask {
5 
6     /* load flags */
7     //    act=    (what to do with this object?)
8     //            reject,stop- don't download it (same as q=0.0)
9     //            noparse   - don't parse it (same as depth=-1)
10     //            fastclose - close connection after sending request (will not be parsed even if parseable)
11     //            close     - if object is unparseable (from content-type), close connection without downloading more data
12 
13     //	      nosave    - don't save it to disk
14     //            noproxy   - bypass proxy
15     public static final byte ACT_LOAD=0;
16     public static final byte ACT_REJECT=1;
17     public static final byte ACT_NOPARSE=2;
18     public static final byte ACT_FASTCLOSE=3;
19     public static final byte ACT_CLOSE=4;
20     public static final byte ACT_NOSAVE=5;
21     public static final byte ACT_NOPROXY=6;
22 
23     /* log flags */
24     // log=
25     //	none - no loging done
26     //	queue - when element has been taken from queue
27     //	load  - when trying to load
28     // 	parse - when parsing
29     //	saving - when saving to disk
30     //	err    - when loading error occurs
31     //	fatalerr - when fatal loading error occurs
32     //      ioerr   - log i/o erros
33     //      stored  - when saving to disc
34     //      reject  - log rejects
35     //	urlonly  - log URL only
36 
37     public static final short LOG_SERVERDEFAULT =-1;
38     public static final short LOG_NONE   =0;
39     public static final short LOG_QUEUE  =1;
40     public static final short LOG_LOAD   =2;
41     public static final short LOG_PARSE  =4;
42     public static final short LOG_SAVE   =8;
43     public static final short LOG_ERR    =16;
44     public static final short LOG_FATALERR=32;
45     public static final short LOG_IOERR   =64;
46     public static final short LOG_STORED  =128;
47     public static final short LOG_REJECT  =256;
48     public static final short LOG_DEPTH   =512;
49     public static final short LOG_URLONLY =1024;// MUST be last
50     // aliases
51     public static final short LOG_ALL=LOG_URLONLY-1;
52     public static final short LOG_DEFAULT=LOG_LOAD|LOG_IOERR|LOG_ERR|LOG_FATALERR;
53 
54     /* update status flags */
55     //       norefresh - if object allready exists, don't try to load it
56     //       forcereload - force cache to reloading object
57     //       load,continue - load object in classic way (don't care about if old copy exists)
58     //       update    - if object in cache is older than XXXX hours, start loading.
59     //       forceupdate - forced load if older
60 
61     public static final byte UPD_NOLIMIT=-1;
62     public static final byte UPD_LOAD=0;
63     public static final byte UPD_RELOAD=1;
64     public static final byte UPD_UPDATE=2;
65     public static final byte UPD_NOREFRESH=3;
66     public static final byte UPD_FORCEUPDATE=4;
67     public static final byte UPD_NOREPARSE=5;
68 
69     // strip=none,null -  no URL striping before mask testing
70     //      =auto
71     //      =location,loc odstrani http://xxx/
72     //      =server       odstrani http://xxx
73 
74     public static final byte STRIP_AUTO=0;
75     public static final byte STRIP_LOCATION=1;
76     public static final byte STRIP_SERVER=2;
77     public static final byte STRIP_NONE=3;
78     public static final byte STRIP_DIRECTORY=4;
79 
80     // size=xxxx bytes - only if object is bigger than xxxx bytes
81     // size=known - only if we know size of object
82     // size=unknown - only if we don't know size of object beeing downloaded
83     // size=any     - don't care about it
84 
85     public static final byte SIZE_NOLIMIT=-1;
86     public static final byte SIZE_ANY=0;
87     public static final byte SIZE_KNOWN=1;
88     public static final byte SIZE_UNKNOWN=2;
89     public static final byte SIZE_LIMITED=3;
90 
91     // target=any,anyserver - any wwwserver in the world (including my own)
92     //                         alias for world,known,site,me
93     //        world  - any undefined location
94     //        known  - any known location (but not me or myserver)
95     //        server,site - alias for me,sameserver
96     //        same,sameserver - file located on the same server (not including me)
97     //        location, samelocation, me, this - located in Location URL
98     //        subdir - located in subdirectory
99     //
100 
101     public static final byte      TARGET_ANY=127;
102     /* official subtypes */
103     public static final byte      TARGET_NONE=0;
104     public static final byte      TARGET_WORLD=1;
105     public static final byte      TARGET_KNOWN=2;
106     public static final byte      TARGET_SERVER=4;
107     public static final byte      TARGET_LOCATION=8;
108     public static final byte      TARGET_SUBDIR=16; // subdir in location
109     public static final byte      TARGET_DIRECTORY=32;
110 
111     /* aliases */
112 
113     public static final byte      TARGET_SITE=TARGET_SERVER|
114     TARGET_LOCATION|
115     TARGET_SUBDIR|
116     TARGET_DIRECTORY;
117 
118     public static final byte      TARGET_ME  =
119         TARGET_LOCATION|
120         TARGET_SUBDIR|
121         TARGET_DIRECTORY;
122 
123     /* http - ANY highest */
124     /*  /   - SITE        */
125     /* xxxx = location    */
126     /* *    = cokoliv     */
127 
128     /* target guess priority */
129     public static final byte GUESS_TARGET_ANY=10;
130     public static final byte GUESS_TARGET_SITE=8;
131     public static final byte GUESS_TARGET_LOCATION=5;
132     public static final byte GUESS_TARGET_ANYWHERE=0;
133 
134 
135     public byte action,update,strip,size,target;
136     public short log;
137     public long sizelimit,updatelimit; // -1 is no limit
138 
139     public float q;
140 
141     public static final byte DEPTH_NOCHANGE=-2;
142 
143     public short depth;
144 
145     /* content section */
146 
147     private regexp contentmasks[]; /* regexp masks */
148     private boolean contentok[]; /* true=normal, false=must NOT be matched */
149     private boolean anycontent; /* true=OR, false=AND */
150 
151     /* extensions */
152 
153     private regexp extmasks[]; /* regexp masks */
154     private boolean extok[]; /* true=normal, false=must NOT be matched */
155     private boolean anyext; /* true=OR, false=AND */
156 
157     /* urlmasks */
158     private regexp urlmasks[]; /* regexp masks */
159     private boolean urlsok[]; /* true=normal, false=must NOT be matched */
160     private boolean anyurl; /* true=OR, false=AND */
161 
162 
163     /* src masks */
164     private regexp srcmasks[]; /* regexp masks */
165     private boolean srcok[]; /* true=normal, false=must NOT be matched */
166     private boolean anysrc;  /* true=OR, false=AND */
167 
systemdefaults()168     private final void systemdefaults()
169     {
170         action=ACT_LOAD;
171         update=UPD_LOAD;
172         strip=STRIP_AUTO;
173         target=TARGET_NONE;
174         size=SIZE_ANY;
175         log=LOG_NONE; // SERVERDEFAULT;
176         sizelimit=SIZE_NOLIMIT;
177         updatelimit=UPD_NOLIMIT;
178         anycontent=anyext=anyurl=anysrc=true;
179         q=1.0f;
180         depth=DEPTH_NOCHANGE;
181     }
182 
mask(options o)183     public mask(options o)
184     {
185         systemdefaults();
186         if(o==null) throw new NullPointerException("mask");
187         for(int i=o.parsed.size()-1;i>=0;i--)
188         {
189             try
190             {
191                 String opt;
192                 StringTokenizer st;
193                 st=new StringTokenizer((String)o.parsed.elementAt(i));
194                 opt=st.nextToken();
195                 // System.out.println("line="+o.parsed.elementAt(i));
196                 while(true)
197                 {
198                     String s;
199                     boolean ok;
200                     s=st.nextToken();
201                     ok=true;
202                     if(s.length()==1 && s.charAt(0)=='!')
203                     {
204                         ok=false;
205                         s=st.nextToken();
206                     }
207 
208                     if(opt.equals("q")) { q=Float.valueOf(s).floatValue();continue;}
209                     else if (opt.equals("url"))
210                     {
211                         if(s.equals("*") || s.equals("any")) {urlmasks=null;urlsok=null;}
212                         else
213                         {
214                             urlmasks=util.addRegexpToArray(s, urlmasks);
215                             urlsok=util.addBooleanToArray(ok,urlsok);
216                         }
217                         s=st.nextToken();
218                         if(s.equals(",")) anyurl=true; else anyurl=false;
219                         continue;
220                     }
221                     else if (opt.equals("content") || opt.equals("ct"))
222                     {
223                         if(s.equals("*") || s.equals("any")) {contentmasks=null;contentok=null;}
224                         else
225                         {
226                             contentmasks=util.addRegexpToArray(s, contentmasks);
227                             contentok=util.addBooleanToArray(ok,contentok);
228                         }
229                         s=st.nextToken();
230                         if(s.equals(",")) anycontent=true; else anycontent=false;
231                         continue;
232                     }
233                     else if (opt.equals("ext"))
234                     {
235                         if(s.equals("*") || s.equals("any")) {extmasks=null;extok=null;}
236                         else
237                         {
238                             extmasks=util.addRegexpToArray(s, extmasks);
239                             extok=util.addBooleanToArray(ok,extok);
240                         }
241                         s=st.nextToken();
242                         if(s.equals(",")) anyext=true; else anyext=false;
243                         continue;
244                     }
245 
246                     else if (opt.equals("src"))
247                     {
248                         if(s.equals("*") || s.equals("any")) {srcmasks=null;srcok=null;}
249                         else
250                         {
251                             srcmasks=util.addRegexpToArray(s, srcmasks);
252                             srcok=util.addBooleanToArray(ok,srcok);
253                         }
254                         s=st.nextToken();
255                         if(s.equals(",")) anysrc=true; else anysrc=false;
256                         continue;
257                     }
258 
259                     else if (opt.equals("depth"))
260                     {
261                         try
262                         {
263                             depth=(short)Integer.valueOf(s).intValue();
264                         }
265                         catch (NumberFormatException n)
266                         {
267                             System.err.println("[CONFIG_ERROR] Bad depth : "+s);
268                         }
269 
270                         s=st.nextToken();
271                         continue;
272                     }
273 
274                     else if (opt.equals("size"))
275                     {
276                         if(s.equals("any")) { size=SIZE_ANY;sizelimit=SIZE_NOLIMIT;}
277                         else if (s.equals("known"))
278                             if(ok==false) size=SIZE_UNKNOWN;
279                             else size=SIZE_KNOWN;
280                         else if (s.equals("unknown"))
281                             if(ok==true) size=SIZE_UNKNOWN;
282                             else size=SIZE_KNOWN;
283                         else
284                             try
285                         {
286                                 sizelimit=Long.valueOf(s).intValue();
287                                 if(size==SIZE_ANY) size=SIZE_LIMITED;
288                         }
289                         catch (NumberFormatException n)
290                         {
291                             System.err.println("[CONFIG_ERROR] Bad size : "+s);
292                         }
293 
294                         s=st.nextToken();
295                         continue;
296                     }
297 
298                     else if (opt.equals("strip"))
299                     {
300                         if(s.equals("none")) strip=STRIP_NONE;
301                         else if (s.equals("server")) strip=STRIP_SERVER;
302                         else if (s.equals("location")) strip=STRIP_LOCATION;
303                         else if (s.equals("auto")) strip=STRIP_AUTO;
304                         else if (s.equals("dir")) strip=STRIP_DIRECTORY;
305                         else
306                             System.err.println("[CONFIG_ERROR] Invalid parameter to strip option : "+s);
307 
308                         s=st.nextToken();
309                         continue;
310                     }
311 
312                     else if (opt.equals("target"))
313                     {
314                         if(s.equals("any")) target=TARGET_ANY;
315                         else if (s.equals("anyserver")) target=TARGET_ANY;
316                         else if (s.equals("world")) target|=TARGET_WORLD;
317                         else if (s.equals("known")) target|=TARGET_KNOWN;
318                         else if (s.equals("server")) target|=TARGET_SERVER;
319                         else if (s.equals("location")) target|=TARGET_LOCATION;
320                         else if (s.equals("directory")) target|=TARGET_DIRECTORY;
321                         else if (s.equals("dir")) target|=TARGET_DIRECTORY;
322                         else if (s.equals("subdir")) target|=TARGET_SUBDIR;
323 
324 
325                         // aliases
326                         else if (s.equals("loc")) target|=TARGET_LOCATION;
327                         else if (s.equals("me")) target|=TARGET_ME;
328                         else if (s.equals("site")) target|=TARGET_SITE;
329 
330 
331                         else if (s.equals("auto")) target=0;
332                         else
333                             System.err.println("[CONFIG_ERROR] Invalid parameter to target option : "+s);
334 
335                         s=st.nextToken();
336                         continue;
337                     }
338 
339                     else if (opt.equals("act"))
340                     {
341                         if(s.equals("reject") ||
342                                 s.equals("stop") ||
343                                 (s.equals("load") && ok==false) )
344                         { action=ACT_REJECT;}
345                         else if (s.equals("noparse")) { depth=-1;action=ACT_NOPARSE;}
346                         else if (s.equals("fastclose")) action=ACT_FASTCLOSE;
347                         else if (s.equals("close")) action=ACT_CLOSE;
348                         else if (s.equals("load")) action=ACT_LOAD;
349                         else if (s.equals("nosave")) action=ACT_NOSAVE;
350                         else if (s.equals("direct") ||
351                                 s.equals("noproxy")  )
352                             action=ACT_NOPROXY;
353 
354                         else
355                             System.err.println("[CONFIG_ERROR] Invalid parameter to act option : "+s);
356 
357                         s=st.nextToken();
358                         continue;
359                     }
360 
361                     else if (opt.equals("log"))
362                     {
363                         if((s.equals("none") ||
364                                 s.equals("off")
365                         ) && ok==true)
366                             log=LOG_NONE;
367                         else if (s.equals("queue"))  log|=LOG_QUEUE;
368                         else if (s.equals("server")
369                                 ||s.equals("serverdefault")
370                         )                    log=LOG_SERVERDEFAULT;
371                         else if (s.equals("load"))   log|=LOG_LOAD;
372                         else if (s.equals("stored")
373                                 ||s.equals("saved")
374                                 ||s.equals("store")
375                                 ||s.equals("loaded")
376                         )                     log|=LOG_STORED;
377                         else if (s.equals("parse"))  log|=LOG_PARSE;
378                         else if (s.equals("save"))   log|=LOG_SAVE;
379                         else if (s.equals("depth"))  log|=LOG_DEPTH;
380                         else if (s.equals("err")
381                                 ||s.equals("error")
382                         )                     log|=LOG_ERR;
383                         else if (s.equals("all"))    log|=LOG_ALL;
384                         else if (s.equals("ioerr")
385                                 || s.equals("io"))
386                             log|=LOG_IOERR;
387                         else if (s.equals("fatalerr")
388                                 || s.equals("fatal"))
389                             log|=LOG_FATALERR;
390                         else if (s.equals("default"))  log=LOG_DEFAULT;
391                         else if (s.equals("reject"))   log|=LOG_REJECT;
392                         else if (s.equals("url") ||
393                                 s.equals("urlonly")  ||
394                                 s.equals("short")
395                         )
396                             if (ok==true) log=(short)((log & LOG_ALL)|LOG_URLONLY);
397                             else   log&=LOG_ALL;
398 
399                         else
400                             System.err.println("[CONFIG_ERROR] Invalid parameter to log option : "+s);
401 
402                         s=st.nextToken();
403                         continue;
404                     }
405                     else if (opt.equals("upd"))
406                     {
407                         if(s.equals("load")) update=UPD_LOAD;
408                         else if (s.equals("norefresh") ||
409                                 s.equals("none")
410                         )                      update=UPD_NOREFRESH;
411                         else if (s.equals("reload") ||
412                                 s.equals("force")  ||
413                                 s.equals("forceload") ||
414                                 s.equals("forcereload")
415                         )                      update=UPD_RELOAD;
416                         else if (s.equals("update")) update=UPD_UPDATE;
417                         else if (s.equals("forceupdate")) update=UPD_FORCEUPDATE;
418                         else if (s.equals("noreparse")) update=UPD_NOREPARSE;
419                         else
420                             try
421                         {
422                                 updatelimit=Long.valueOf(s).longValue()*3600000L;
423                                 if(update!=UPD_UPDATE || update!=UPD_FORCEUPDATE )
424                                     update=UPD_UPDATE;
425                         }
426                         catch (NumberFormatException n)
427                         {
428                             System.err.println("[CONFIG_ERROR] Bad update interval : "+s);
429                         }
430 
431                         s=st.nextToken();
432                         continue;
433                     }
434                     else
435                     {
436                         System.err.println("[CONFIG_ERROR] Unknown mask option "+opt);
437                         break;
438                     }
439                 }
440 
441             }
442             catch (NoSuchElementException ignore)
443             {}
444 
445 
446         }
447         guessTarget();
448         guessStrip();
449     }
450 
guessTarget()451     private final void guessTarget()
452     {
453         if(target!=0) return;
454         if(urlmasks==null) { target=TARGET_ME;return;}
455         /* guess: */
456         byte ttarget=GUESS_TARGET_ANYWHERE;
457 
458         for(int i=urlmasks.length-1;i>=0;i--)
459         {
460             String s;
461             s=urlmasks[i].toString();
462 
463             if(s.indexOf("://")>0)
464             {
465                 if(ttarget<GUESS_TARGET_ANY)
466                     ttarget=GUESS_TARGET_ANY;
467                 break;
468             }
469 
470             if(s.startsWith("/"))
471             {
472                 if(ttarget<GUESS_TARGET_SITE)
473                     ttarget=GUESS_TARGET_SITE;
474                 continue;
475             }
476 
477             if(s.startsWith("*"))
478             {
479                 if(ttarget<GUESS_TARGET_ANYWHERE)
480                     ttarget=GUESS_TARGET_ANYWHERE;
481                 continue;
482             }
483 
484             if(ttarget<GUESS_TARGET_LOCATION)
485                 ttarget=GUESS_TARGET_LOCATION;
486 
487         }
488         /* and setup..... */
489         switch(ttarget)
490         {
491         case GUESS_TARGET_ANYWHERE:
492         case GUESS_TARGET_ANY:
493             target=TARGET_ANY;break;
494         case GUESS_TARGET_SITE:
495             target=TARGET_SITE;break;
496         case GUESS_TARGET_LOCATION:
497             target=TARGET_ME;break;
498         default: throw new IllegalArgumentException ("Screwed in guessTarget();");
499         }
500 
501     }
502 
guessStrip()503     private final void guessStrip()
504     {
505         if( (strip==STRIP_LOCATION || strip==STRIP_DIRECTORY)
506                 &&
507                 (target & TARGET_ME)==0 )
508         { strip=STRIP_SERVER;return;}
509         if(strip!=STRIP_AUTO) return;
510         if(urlmasks==null) { strip=STRIP_NONE;return;}
511         if( (target & TARGET_KNOWN)!=0 ||
512                 (target & TARGET_WORLD)!=0 ) { strip=STRIP_NONE;return;}
513         if( (target & TARGET_SERVER)!=0 ) { strip=STRIP_SERVER;return;}
514         if( (target & TARGET_LOCATION)!=0 ) { strip=STRIP_LOCATION;return;}
515         strip=STRIP_DIRECTORY;
516     }
517 
hasContent()518     public final boolean hasContent()
519     {
520         if(contentmasks==null) return false; else return true;
521     }
522 
getTarget(String frombase,String fromdir,String locbase,String url)523     public final static byte getTarget(String frombase,String fromdir,String locbase,String url)
524     {
525         // int target=mask.getTarget(frombase,fromdir,url);
526         URL u;
527         try
528         {
529             u=new URL(url);
530 
531         }
532         catch (MalformedURLException badurl)
533         {
534             return TARGET_WORLD;
535         }
536         String urldir=util.getDirname(u.getFile());
537 
538         if(locbase.regionMatches(0,url,0,locbase.length()))
539         {
540             /* stejna location */
541             if(fromdir.equals(urldir)) return TARGET_DIRECTORY;
542             if(fromdir.regionMatches(0,urldir,0,fromdir.length()))
543                 return TARGET_SUBDIR;
544             return TARGET_LOCATION;
545         }
546 
547         String urlbase=u.getProtocol()+"://"+u.getHost();
548         if(frombase.equals(urlbase))
549         {
550 
551             /* stejny server */
552             return TARGET_SERVER;
553         }
554         /* prohledama databazi known locations */
555         for(int i=loader.loc.length-1;i>=0;i--)
556             if(loader.loc[i].locbase.regionMatches(0,url,0,loader.loc[i].locbase.length()))
557                 return TARGET_KNOWN;
558         return TARGET_WORLD;
559     }
560 
match(String url,String ext,String src,byte target,String locbase)561     public final boolean match(String url,String ext,String src,byte target,String locbase)
562     {
563         // System.out.println("url="+url+"\n\tsrc="+src+" mask.target="+this.target+" target="+target);
564         if( (target & this.target)==0) return false;
565         // System.out.println("target ok");
566         srcscan:while(true)
567         {
568             if(srcmasks!=null)
569             {
570                 /* check SRC */
571                 for(int i=0;i<srcmasks.length;i++)
572                     // System.out.println(srcok[i]+" "+srcmasks[i]);
573                     if(srcmasks[i].matches(src) == srcok[i])
574                         if(anysrc) break srcscan;
575                         else ;
576                     else
577                         if(!anysrc) return false;
578 
579                 if(anysrc) return false;
580             }
581             break;
582         }
583         // System.out.println("src ok");
584         extscan:while(true)
585         {
586             if(extmasks!=null)
587             {
588                 /* check EXT */
589                 for(int i=0;i<extmasks.length;i++)
590                     if(extmasks[i].matches(ext) == extok[i])
591                         if(anyext) break extscan;
592                         else ;
593                     else
594                         if(!anyext) return false;
595                 if(anyext) return false;
596             }
597             break;
598         }
599         // System.out.println("ext ok");
600         urlscan:while(true)
601         {
602             if(urlmasks!=null)
603             {
604                 String stripped=url;
605                 switch(strip)
606                 {
607                 case STRIP_NONE:break;
608                 case STRIP_SERVER:
609                     int i=url.indexOf("://",0);
610                     i=url.indexOf("/",i+3);
611                     if (i == -1)
612                         stripped= "";
613                     else
614                         stripped=url.substring(i);
615                     break;
616                 case STRIP_LOCATION:
617                     stripped=url.substring(locbase.length());
618                     break;
619                 }
620 
621 
622                 for(int i=0;i<urlmasks.length;i++)
623                     if(urlmasks[i].matches(stripped) == urlsok[i])
624                         if(anyurl) break urlscan;
625                         else ;
626                     else
627                         if(!anyurl) return false;
628                 if(anyurl) return false;
629             }
630             break;
631         }
632         // System.out.println("ALL OK");
633         return true;
634     }
635 
636 }
637