1 // Larbin
2 // Sebastien Ailleret
3 // 08-02-00 -> 04-01-02
4 
5 // This is the new structure of a site
6 // It includes a fifo of waiting urls
7 
8 #ifndef SITE_H
9 #define SITE_H
10 
11 #include <time.h>
12 #include <adns.h>
13 
14 #include "types.h"
15 #include "utils/Fifo.h"
16 #include "utils/url.h"
17 
18 void initSite ();
19 
20 // define for the state of a connection
21 enum ConnState {
22   emptyC,
23   connectingC,
24   writeC,
25   openC
26 };
27 
28 // different state of dnsQuery
29 enum DnsState {
30   waitDns,
31   doneDns,
32   errorDns,
33   noConnDns
34 };
35 
36 /** This class is intended to make sure the sum of the
37  * sizes of the fifo included in the different sites
38  * are not too big
39  */
40 class Interval {
41   private:
42   /** Position in the interval */
43   uint pos;
44   /** Size of the interval */
45   uint size;
46  public:
47   /** Constructor */
Interval(uint size)48   Interval (uint size) { this->size = size; pos = 0; }
49   /** Destructor */
~Interval()50   ~Interval () {}
51   /** How many urls can we put
52    * answer 0 if no urls can be put
53    */
putAll()54   inline uint putAll () { int res=size-pos; pos=size; return res; }
55   /** Warn an url has been retrieved */
getOne()56   inline void getOne () { pos--; }
57   /** only for debugging, handle with care */
getPos()58   inline uint getPos () { return pos; }
59 };
60 
61 class NamedSite {
62  private:
63   /* string used for following CNAME chains (just one jump) */
64   char *cname;
65   /** we've got a good dns answer
66    * get the robots.txt */
67   void dnsOK ();
68   /** Cannot get the inet addr
69    * dnsState must have been set properly before the call */
70   void dnsErr ();
71   /** Delete the old identity of the site */
72   void newId ();
73   /** put this url in its IPSite */
74   void transfer (url *u);
75   /** forget this url for this reason */
76   void forgetUrl (url *u, FetchError reason);
77  public:
78   /** Constructor */
79   NamedSite ();
80   /** Destructor : never used */
81   ~NamedSite ();
82   /* name of the site */
83   char name[maxSiteSize];
84   /* port of the site */
85   uint16_t port;
86   /* numbers of urls in ram for this site */
87   uint16_t nburls;
88   /* fifo of urls waiting to be fetched */
89   url *fifo[maxUrlsBySite];
90   uint8_t inFifo;
91   uint8_t outFifo;
92   void putInFifo(url *u);
93   url *getInFifo();
94   short fifoLength();
95   /** Is this Site in a dnsSites */
96   bool isInFifo;
97   /** internet addr of this server */
98   char dnsState;
99   struct in_addr addr;
100   uint ipHash;
101   /* Date of expiration of dns call and robots.txt fetch */
102   time_t dnsTimeout;
103   /** test if a file can be fetched thanks to the robots.txt */
104   bool testRobots(char *file);
105   /* forbidden paths : given by robots.txt */
106   Vector<char> forbidden;
107   /** Put an url in the fifo
108    * If there are too much, put it back in UrlsInternal
109    * Never fill totally the fifo => call at least with 1 */
110   void putGenericUrl(url *u, int limit, bool prio);
putUrl(url * u)111   inline void putUrl (url *u) { putGenericUrl(u, 15, false); }
putUrlWait(url * u)112   inline void putUrlWait (url *u) { putGenericUrl(u, 10, false); }
putPriorityUrl(url * u)113   inline void putPriorityUrl (url *u) { putGenericUrl(u, 5, true); }
putPriorityUrlWait(url * u)114   inline void putPriorityUrlWait (url *u) { putGenericUrl(u, 1, true); }
115   /** Init a new dns query */
116   void newQuery ();
117   /** The dns query ended with success */
118   void dnsAns (adns_answer *ans);
119   /** we got the robots.txt, transfer what must be in IPSites */
120   void robotsResult (FetchError res);
121 };
122 
123 class IPSite {
124  private:
125   /* date of last access : avoid rapid fire */
126   time_t lastAccess;
127   /** Is this Site in a okSites (eg have something to fetch) */
128   bool isInFifo;
129   /** Get an url from the fifo
130    * resize tab if too big
131    */
132   url *getUrl ();
133  public:
134   /** Constructor */
135   IPSite ();
136   /** Destructor : never used */
137   ~IPSite ();
138   /** Urls waiting for being fetched */
139   Fifo<url> tab;
140   /** Put an url in the fifo */
141   void putUrl (url *u);
142   /** fetch the fist page in the fifo okSites
143    * expects at least one element in freeConns
144    * return expected time for next call (0 means now)
145    */
146   int fetch ();
147 };
148 
149 #endif // SITE_H
150