1 // Larbin
2 // Sebastien Ailleret
3 // 07-12-01 -> 07-12-01
4 
5 #include <iostream.h>
6 #include <fstream>
7 #include <string.h>
8 #include <unistd.h>
9 
10 
11 #include "options.h"
12 
13 #include "types.h"
14 #include "global.h"
15 #include "fetch/file.h"
16 #include "utils/text.h"
17 #include "utils/debug.h"
18 #include "interf/output.h"
19 #include "utils/MD5.h"
20 
21 
22 /** A page has been loaded successfully
23  * @param page the page that has been fetched
24  */
25 
26 MD5 md5;
27 
loaded(html * page)28 void loaded (html *page) {
29   // Here should be the code for managing everything
30   // page->getHeaders() gives a char* containing the http headers
31   // page->getPage() gives a char* containing the page itself
32   // those char* are statically allocated, so you should copy
33   // them if you want to keep them
34   // in order to accept \000 in the page, you can use page->getLength()
35 #ifdef BIGSTATS
36   cout << "fetched : ";
37   page->getUrl()->print();
38   // cout << page->getHeaders() << "\n" << page->getPage() << "\n";
39 #endif // BIGSTATS
40 
41 
42   char url[maxUrlSize];
43   char digest[36] = {0};
44   char output_filename[64];
45   char headers[1024];
46   unsigned int document_type;
47 
48   strcpy(headers, page->getHeaders());
49 
50   /*
51      Cache HTML and XML only
52    */
53   if( strstr(headers, "Content-Type: text/html")){
54     document_type = 0;
55   }
56   else if( strstr(headers, "Content-Type: text/xml") ){
57     document_type = 1;
58   }
59   else {
60     return;
61   }
62 
63   page->getUrl()->writeUrl(url);
64 
65 
66   md5.reset();
67   md5.append((const md5_byte_t *)url, strlen(url));
68   md5.finish();
69 
70   for (int di = 0; di < 16; ++di){
71     sprintf((digest+di*2), "%02x", (int)(md5.getDigest()[di]));
72   }
73 
74   printf("%s\n%s\n\n", url, digest);
75 
76   std::ofstream output_file;
77   sprintf(output_filename,
78 	  "/tmp/fear-api/pf/%c/%c/%s",
79 	  digest[0], digest[1], digest);
80   output_file.open(output_filename, ios::binary | ios::out);
81   if(output_file.is_open()){
82     output_file << document_type << "\n";
83     output_file << page->getPage();
84     output_file.close();
85   }
86 }
87 
88 /** The fetch failed
89  * @param u the URL of the doc
90  * @param reason reason of the fail
91  */
failure(url * u,FetchError reason)92 void failure (url *u, FetchError reason) {
93   // Here should be the code for managing everything
94 #ifdef BIGSTATS
95   cout << "fetched failed (" << (int) reason << ") : ";
96   u->print();
97 #endif // BIGSTATS
98 }
99 
100 /** initialisation function
101  */
initUserOutput()102 void initUserOutput () {
103 
104 }
105 
106 /** stats, called in particular by the webserver
107  * the webserver is in another thread, so be careful
108  * However, if it only reads things, it is probably not useful
109  * to use mutex, because incoherence in the webserver is not as critical
110  * as efficiency
111  */
outputStats(int fds)112 void outputStats(int fds) {
113   ecrire(fds, "Nothing to declare");
114 }
115