1 // Larbin
2 // Sebastien Ailleret
3 // 07-12-01 -> 07-12-01
4
5 #include <iostream.h>
6 #include <fstream>
7 #include <string.h>
8 #include <unistd.h>
9
10
11 #include "options.h"
12
13 #include "types.h"
14 #include "global.h"
15 #include "fetch/file.h"
16 #include "utils/text.h"
17 #include "utils/debug.h"
18 #include "interf/output.h"
19 #include "utils/MD5.h"
20
21
22 /** A page has been loaded successfully
23 * @param page the page that has been fetched
24 */
25
26 MD5 md5;
27
loaded(html * page)28 void loaded (html *page) {
29 // Here should be the code for managing everything
30 // page->getHeaders() gives a char* containing the http headers
31 // page->getPage() gives a char* containing the page itself
32 // those char* are statically allocated, so you should copy
33 // them if you want to keep them
34 // in order to accept \000 in the page, you can use page->getLength()
35 #ifdef BIGSTATS
36 cout << "fetched : ";
37 page->getUrl()->print();
38 // cout << page->getHeaders() << "\n" << page->getPage() << "\n";
39 #endif // BIGSTATS
40
41
42 char url[maxUrlSize];
43 char digest[36] = {0};
44 char output_filename[64];
45 char headers[1024];
46 unsigned int document_type;
47
48 strcpy(headers, page->getHeaders());
49
50 /*
51 Cache HTML and XML only
52 */
53 if( strstr(headers, "Content-Type: text/html")){
54 document_type = 0;
55 }
56 else if( strstr(headers, "Content-Type: text/xml") ){
57 document_type = 1;
58 }
59 else {
60 return;
61 }
62
63 page->getUrl()->writeUrl(url);
64
65
66 md5.reset();
67 md5.append((const md5_byte_t *)url, strlen(url));
68 md5.finish();
69
70 for (int di = 0; di < 16; ++di){
71 sprintf((digest+di*2), "%02x", (int)(md5.getDigest()[di]));
72 }
73
74 printf("%s\n%s\n\n", url, digest);
75
76 std::ofstream output_file;
77 sprintf(output_filename,
78 "/tmp/fear-api/pf/%c/%c/%s",
79 digest[0], digest[1], digest);
80 output_file.open(output_filename, ios::binary | ios::out);
81 if(output_file.is_open()){
82 output_file << document_type << "\n";
83 output_file << page->getPage();
84 output_file.close();
85 }
86 }
87
88 /** The fetch failed
89 * @param u the URL of the doc
90 * @param reason reason of the fail
91 */
failure(url * u,FetchError reason)92 void failure (url *u, FetchError reason) {
93 // Here should be the code for managing everything
94 #ifdef BIGSTATS
95 cout << "fetched failed (" << (int) reason << ") : ";
96 u->print();
97 #endif // BIGSTATS
98 }
99
100 /** initialisation function
101 */
initUserOutput()102 void initUserOutput () {
103
104 }
105
106 /** stats, called in particular by the webserver
107 * the webserver is in another thread, so be careful
108 * However, if it only reads things, it is probably not useful
109 * to use mutex, because incoherence in the webserver is not as critical
110 * as efficiency
111 */
outputStats(int fds)112 void outputStats(int fds) {
113 ecrire(fds, "Nothing to declare");
114 }
115