1 /*
2 * httpserver.c - URLfilterDB
3 *
4 * ufdbGuard is copyrighted (C) 2005-2020 by URLfilterDB with all rights reserved.
5 *
6 * Parts of the ufdbGuard daemon are based on squidGuard.
7 * This module is NOT based on squidGuard.
8 *
9 * serve HTTP GET requests for /cgi-bin/URLblocked.cgi
10 *
11 * RCS $Id: httpserver.c,v 1.104 2020/07/24 11:40:40 root Exp root $
12 */
13
14
15 #if defined(__OPTIMIZE__) && defined(__GNUC__) && defined(GCC_INLINE_STRING_FUNCTIONS_ARE_FASTER)
16 #undef _FORTIFY_SOURCE
17 #define __USE_STRING_INLINES 1
18 #endif
19
20 #if 0
21 #define strmatch2(a,b) (strcmp(a,b) == 0)
22 #define strmatch3(a,b) (strcmp(a,b) == 0)
23 #define strmatch4(a,b) (strcmp(a,b) == 0)
24 #define strmatch5(a,b) (strcmp(a,b) == 0)
25 #else
26 #define strmatch2(a,b) ((a)[0] == (b)[0] && (a)[1] == (b)[1] && (a)[2] == '\0')
27 #define strmatch3(a,b) ((a)[0] == (b)[0] && (a)[1] == (b)[1] && (a)[2] == (b)[2] && (a)[3] == '\0')
28 #define strmatch4(a,b) ((a)[0] == (b)[0] && (a)[1] == (b)[1] && (a)[2] == (b)[2] && (a)[3] == (b)[3] && (a)[4] == '\0')
29 #define strmatch5(a,b) ((a)[0] == (b)[0] && (a)[1] == (b)[1] && (a)[2] == (b)[2] && (a)[3] == (b)[3] && (a)[4] == (b)[4] && (a)[5] == '\0')
30 #endif
31
32 #include "ufdb.h"
33 #include "ufdblib.h"
34 #include "httpserver.h"
35 #include "version.h"
36
37 #include <unistd.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <strings.h>
41 #include <time.h>
42 #include <sys/time.h>
43 #include <sys/select.h>
44 #include <ctype.h>
45 #include <errno.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <sys/socket.h>
50 #include <netinet/in.h>
51 #include <netinet/tcp.h>
52 #include <arpa/inet.h>
53
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57
58
59 static int defaultFLVsize = 0;
60 static char * defaultFLVcontent = NULL;
61 static int defaultMP3size = 0;
62 static char * defaultMP3content = NULL;
63 static int defaultMPGsize = 0;
64 static char * defaultMPGcontent = NULL;
65 static int defaultWMVsize = 0;
66 static char * defaultWMVcontent = NULL;
67
68
69 #define HTML_COMMENT \
70 "<!-- ufdbguardd http-server " UFDB_VERSION " -->\r\n" \
71 "<!-- long comment to disable MSIE and Chrome so-called friendly error page -->\r\n" \
72 "<!-- long comment to disable MSIE and Chrome so-called friendly error page -->\r\n" \
73 "<!-- long comment to disable MSIE and Chrome so-called friendly error page -->\r\n" \
74 "<!-- long comment to disable MSIE and Chrome so-called friendly error page -->\r\n" \
75 "<!-- long comment to disable MSIE and Chrome so-called friendly error page -->\r\n"
76
77 #define LANG_IND_EN 0
78 #define LANG_IND_NL 1
79 #define LANG_IND_DE 2
80 #define LANG_IND_PL 3
81 #define LANG_IND_IT 4
82 #define LANG_IND_ES 5
83 #define LANG_IND_PT 6
84 #define LANG_IND_FR 7
85 #define LANG_IND_TR 8
86 #define LANG_IND_SV 9
87
88
89 #define LANG_IND_DEFAULT LANG_IND_EN
90
91 static char _fatal_error_text [] =
92 "<center>\n"
93 "<font color=red><b>"
94 "Access to the internet is blocked because<br>\n"
95 "the URL filter has a fatal error. <br>\n"
96 "Ask you helpdesk or web proxy administrator for help.\n"
97 "</b></font>\n"
98 "</center>\n";
99 static char _loading_database_text [] =
100 "<center>\n"
101 "<font color=red><b>"
102 "Access to the internet is temporarily blocked because<br>\n"
103 "a new URL database is being loaded by the URL filter. <br>\n"
104 "Wait one minute and try again.\n"
105 "</b></font>\n"
106 "</center>\n";
107
108 static const char * _title [] =
109 {
110 /* EN */ "403 Forbidden",
111 /* NL */ "403 Geen Toegang",
112 /* DE */ "403 Verboten",
113 /* PL */ "403 Cenzura, zakaz pobrania",
114 /* IT */ "403 Accesso negato",
115 /* ES */ "403 Ningún acces",
116 /* PT */ "403 Proibido",
117 /* FR */ "403 Interdit",
118 /* TR */ "403 Erişim engellendi",
119 /* SV */ "403 Förbjuden"
120 };
121 static const char * _forbidden [] =
122 {
123 "Forbidden",
124 "Geen Toegang",
125 "Verboten",
126 "Cenzura, zakaz pobrania",
127 "Accesso negato",
128 "Ningún acces",
129 "Proibido",
130 "Interdit",
131 "Erişim engellendi",
132 "Sidan stoppad enligt landstingets riktlinjer"
133 };
134 static const char * _explain_1 [] =
135 {
136 "Access is blocked since the URL is in the filter category",
137 "De toegang is geblokkeerd omdat de URL in de categorie",
138 "Zugriff verweigert weil die URL die Klassifizierung",
139 "Nie otworzysz tej strony bo jest ona sklasyfikowana jako",
140 "L'accesso è negato poiché l'URL appartiene a none",
141 "Se bloquea el acceso puesto que el URL se considera ser",
142 "O acesso a este site foi bloqueado porque o conteúdo está",
143 "L'accès est inderdit parce que le site fait partie de la catégorie",
144 "Ulaşmak istediğiniz sayfaya erişim kapalıdır. Sınıfı:",
145 "Access till denna sida är stoppad:"
146 };
147 static const char * _explain_2 [] =
148 {
149 ".",
150 " valt.",
151 " hat.",
152 " przez program kontroli ufdbGuard.",
153 ".",
154 ".",
155 ".",
156 ".",
157 ".",
158 "."
159 };
160 static const char * _moreInfo1 [] =
161 {
162 "More information about ufdbGuard is <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
163 "Meer informatie over ufdbGuard is <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
164 "Mehr Informationen über ufdbGuard ist <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
165 "Informacja o tym programie kontroli jest na <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
166 "Maggiori informazioni su ufdbGuard sono disponibili <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
167 "Más información sobre ufdbGuard está <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
168 "Mais informação sobre ufdbGuard está <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
169 "Plus d'informations à propos de ufdbGuard <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
170 "ufdbGuard hakkında bilgi için <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL,
171 "Mer information om ufdbGuard är <a href=\"" UFDB_EXPLAIN_DENY_REASON_URL
172 };
173 static const char * _moreInfo2 [] =
174 {
175 "\" target=\"_blank\">here</a>.",
176 "\" target=\"_blank\">hier</a>.",
177 "\" target=\"_blank\">hier</a>.",
178 "\" target=\"_blank\">stronie</a>.",
179 "\" target=\"_blank\">qui</a>.",
180 "\" target=\"_blank\">aquí</a>.",
181 "\" target=\"_blank\">aqui</a>.",
182 "\" target=\"_blank\">ici</a>.",
183 "\" target=\"_blank\">tıklayınız</a>.",
184 "\" target=\"_blank\"här</a>."
185 };
186 static const char * _goBack [] =
187 {
188 "Click here to go back",
189 "Klik hier om terug te gaan",
190 "Klicken Sie hier um zurück zu gehen",
191 "Wycofaj do poprzedniej strony",
192 "tornare indietro",
193 "ir detrás",
194 "volte",
195 "Revenir en arrière",
196 "Önceki sayfa",
197 "Klicka här för att komma tillbaks"
198 };
199
200
initData(const char * imagesDirectory)201 static void initData(
202 const char * imagesDirectory )
203 {
204 int in;
205 struct stat statbuf;
206 char filename[1024];
207
208 sprintf( filename, "%s/default.flv", imagesDirectory );
209 in = open( filename, O_RDONLY );
210 if (in >= 0)
211 {
212 if (fstat( in, &statbuf ) >= 0)
213 {
214 defaultFLVcontent = (char *) ufdbMalloc( statbuf.st_size );
215 if (read( in, defaultFLVcontent, statbuf.st_size ) == statbuf.st_size)
216 defaultFLVsize = statbuf.st_size;
217 }
218 close( in );
219 }
220
221 sprintf( filename, "%s/default.mp3", imagesDirectory );
222 in = open( filename, O_RDONLY );
223 if (in >= 0)
224 {
225 if (fstat( in, &statbuf ) >= 0)
226 {
227 defaultMP3content = (char *) ufdbMalloc( statbuf.st_size );
228 if (read( in, defaultMP3content, statbuf.st_size ) == statbuf.st_size)
229 defaultMP3size = statbuf.st_size;
230 }
231 close( in );
232 }
233
234 sprintf( filename, "%s/default.mpeg", imagesDirectory );
235 in = open( filename, O_RDONLY );
236 if (in >= 0)
237 {
238 if (fstat( in, &statbuf ) >= 0)
239 {
240 defaultMPGcontent = (char *) ufdbMalloc( statbuf.st_size );
241 if (read( in, defaultMPGcontent, statbuf.st_size ) == statbuf.st_size)
242 defaultMPGsize = statbuf.st_size;
243 }
244 close( in );
245 }
246
247 sprintf( filename, "%s/default.wmv", imagesDirectory );
248 in = open( filename, O_RDONLY );
249 if (in >= 0)
250 {
251 if (fstat( in, &statbuf ) >= 0)
252 {
253 defaultWMVcontent = (char *) ufdbMalloc( statbuf.st_size );
254 if (read( in, defaultWMVcontent, statbuf.st_size ) == statbuf.st_size)
255 defaultWMVsize = statbuf.st_size;
256 }
257 close( in );
258 }
259
260 /* TODO: read all forbidden images */
261 }
262
263
writeHttpdPidFile(void)264 void writeHttpdPidFile( void )
265 {
266 FILE * fp;
267
268 (void) unlink( globalHttpdPidFile );
269 fp = fopen( globalHttpdPidFile, "w" );
270 if (fp == NULL)
271 ufdbLogError( "cannot write to PID file %s - check file and directory permission and ownership", globalHttpdPidFile );
272 else
273 {
274 fprintf( fp, "%d\n", ufdbGV.pid );
275 fclose( fp );
276 if (ufdbGV.debugHttpd || ufdbGV.debug)
277 ufdbLogMessage( "wrote my pid (%d) to %s", ufdbGV.pid, globalHttpdPidFile );
278 }
279 }
280
281
removeHttpdPidFile(void)282 void removeHttpdPidFile( void )
283 {
284 if (ufdbGV.debugHttpd || ufdbGV.debug)
285 ufdbLogMessage( "removing pid file %s", globalHttpdPidFile );
286 (void) unlink( globalHttpdPidFile );
287 }
288
289
writeBuffer(int fd,char * buffer,int size)290 static void writeBuffer( int fd, char * buffer, int size )
291 {
292 int retval;
293
294 try_again:
295 retval = write( fd, buffer, size );
296 if (retval < 0)
297 {
298 if (errno == EINTR)
299 goto try_again;
300 ufdbLogError( "httpServer: write failed: %s", strerror(errno) );
301 return;
302 }
303
304 if (retval < size)
305 {
306 buffer += retval;
307 size -= retval;
308 goto try_again;
309 }
310 }
311
312
AnswerHttpTimeout(int fd)313 static void AnswerHttpTimeout( int fd )
314 {
315 int hdrlen;
316 int bodylen;
317 time_t now_t;
318 struct tm t;
319 char header[2048];
320 char content[8192+1200];
321
322 if (ufdbGV.debug || ufdbGV.debugHttpd)
323 ufdbLogMessage( "AnswerHttpTimeout" );
324
325 now_t = time( NULL );
326 gmtime_r( &now_t, &t );
327
328 hdrlen = sprintf( header,
329 "HTTP/1.0 200 OK\r\n" /* or 50X ?? */
330 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
331 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
332 "Cache-Control: max-age=180\r\n"
333 "Connection: close\r\n"
334 "Content-Type: text/html\r\n"
335 "\r\n",
336 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
337 t.tm_mday,
338 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
339 t.tm_year + 1900,
340 t.tm_hour, t.tm_min, t.tm_sec
341 );
342
343 bodylen = snprintf( content, sizeof(content),
344 "<html>\r\n"
345 "<head>\r\n"
346 "<title>Timeout</title>\r\n"
347 "</head>\r\n"
348 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"orange\" text=\"black\">\r\n"
349 "<font size=\"+2\">A timeout error occurred. <br> </font>\r\n"
350 "<font size=\"+1\">\r\n"
351 "The HTTP message for ufdbGuard was not received within the timeout period<br>\r\n"
352 "The URL that you are trying to access is blocked.<br>\r\n"
353 "</font>\r\n"
354 HTML_COMMENT
355 "</body>\r\n"
356 "</html>\r\n"
357 );
358
359 writeBuffer( fd, header, hdrlen );
360 writeBuffer( fd, content, bodylen );
361 }
362
363
AnswerHttpCrossdomain(int fd)364 static void AnswerHttpCrossdomain( int fd )
365 {
366 int hdrlen;
367 int bodylen;
368 time_t now_t;
369 struct tm t;
370 char header[2048];
371 char content[8192+1200];
372
373 if (ufdbGV.debug || ufdbGV.debugHttpd)
374 ufdbLogMessage( "AnswerHttpCrossdomain:" );
375
376 now_t = time( NULL );
377 gmtime_r( &now_t, &t );
378 hdrlen = sprintf( header,
379 "HTTP/1.0 200 OK\r\n"
380 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
381 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
382 "Cache-Control: max-age=60\r\n"
383 "Connection: close\r\n"
384 "Content-Type: text/xml\r\n"
385 "\r\n",
386 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
387 t.tm_mday,
388 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
389 t.tm_year + 1900,
390 t.tm_hour, t.tm_min, t.tm_sec
391 );
392 bodylen = snprintf( content, sizeof(content),
393 "<?xml version=\"1.0\"?>\n"
394 "<cross-domain-policy>\n"
395 " <allow-access-from domain=\"*\" />\n"
396 "</cross-domain-policy>\n"
397 );
398
399 writeBuffer( fd, header, hdrlen );
400 writeBuffer( fd, content, bodylen );
401 }
402
403
AnswerHttpEmpty(int fd,const char * url)404 static void AnswerHttpEmpty( int fd, const char * url )
405 {
406 int hdrlen;
407 int bodylen;
408 time_t now_t;
409 struct tm t;
410 char header[2048];
411 char content[8192+1200];
412
413 if (ufdbGV.debug || ufdbGV.debugHttpd)
414 ufdbLogMessage( "AnswerHttpEmpty: %s", url );
415
416 now_t = time( NULL );
417 gmtime_r( &now_t, &t );
418
419 hdrlen = sprintf( header,
420 "HTTP/1.0 200 OK\r\n"
421 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
422 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
423 "Cache-Control: max-age=180\r\n"
424 "Connection: close\r\n"
425 "Content-Type: text/html\r\n"
426 "\r\n",
427 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
428 t.tm_mday,
429 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
430 t.tm_year + 1900,
431 t.tm_hour, t.tm_min, t.tm_sec
432 );
433
434 bodylen = snprintf( content, sizeof(content),
435 "<html>\r\n"
436 "<head>\r\n"
437 "<title>Error</title>\r\n"
438 "</head>\r\n"
439 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"orange\" text=\"black\">\r\n"
440 "<font size=\"+2\">An error occurred. <br> </font>\r\n"
441 "<font size=\"+1\">\r\n"
442 "This http server can only serve URL requests for ufdbGuard <br>\r\n"
443 "redirection messages and does not understand the URL. <br>\r\n"
444 "URL: <tt>%s</tt> <br>\r\n"
445 "Most likely the configuration of \"redirect\" statements is incorrect. It should include \"/cgi-bin/URLblocked.cgi\". <br>\r\n"
446 "</font>\r\n"
447 HTML_COMMENT
448 "</body>\r\n"
449 "</html>\r\n"
450 ,
451 url
452 );
453
454 writeBuffer( fd, header, hdrlen );
455 writeBuffer( fd, content, bodylen );
456 }
457
458
AnswerHttpNotFound(int fd,const char * url)459 static void AnswerHttpNotFound( int fd, const char * url )
460 {
461 int hdrlen;
462 time_t now_t;
463 struct tm t;
464 char header[2048];
465
466 if (ufdbGV.debug || ufdbGV.debugHttpd)
467 ufdbLogMessage( "AnswerHttpNotFound: %s", url );
468
469 now_t = time( NULL );
470 gmtime_r( &now_t, &t );
471
472 hdrlen = sprintf( header,
473 "HTTP/1.0 404 Not Found\r\n"
474 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
475 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
476 "Cache-Control: max-age=5\r\n"
477 "Connection: close\r\n"
478 "Content-Type: text/html\r\n"
479 "Content-Length: 0\r\n"
480 "\r\n",
481 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
482 t.tm_mday,
483 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
484 t.tm_year + 1900,
485 t.tm_hour, t.tm_min, t.tm_sec
486 );
487
488 writeBuffer( fd, header, hdrlen );
489 }
490
491
AnswerHttpHead(int fd,char * url)492 static void AnswerHttpHead( int fd, char * url )
493 {
494 int hdrlen;
495 time_t now_t;
496 struct tm t;
497 char header[2048];
498
499 if (ufdbGV.debug || ufdbGV.debugHttpd)
500 ufdbLogMessage( "AnswerHttpHead: %s", url );
501
502 now_t = time( NULL );
503 gmtime_r( &now_t, &t );
504
505 hdrlen = sprintf( header,
506 "HTTP/1.0 204 No Content\r\n" /* TODO: or maybe 304 unmodified ?? */
507 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
508 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
509 "Cache-Control: max-age=5\r\n"
510 "Connection: close\r\n"
511 "Content-Type: text/plain\r\n"
512 "Content-Length: 0\r\n"
513 "\r\n",
514 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
515 t.tm_mday,
516 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
517 t.tm_year + 1900,
518 t.tm_hour, t.tm_min, t.tm_sec
519 );
520
521 writeBuffer( fd, header, hdrlen );
522 }
523
524
AnswerHttpPost(int fd,char * url)525 static void AnswerHttpPost( int fd, char * url )
526 {
527 int hdrlen;
528 time_t now_t;
529 struct tm t;
530 char header[2048];
531
532 if (ufdbGV.debug || ufdbGV.debugHttpd)
533 ufdbLogMessage( "AnswerHttpPost: %s", url );
534
535 now_t = time( NULL );
536 gmtime_r( &now_t, &t );
537
538 hdrlen = sprintf( header,
539 "HTTP/1.0 204 No Content\r\n"
540 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
541 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
542 "Cache-Control: max-age=5\r\n"
543 "Connection: close\r\n"
544 "Content-Type: text/plain\r\n"
545 "Content-Length: 0\r\n"
546 "\r\n",
547 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
548 t.tm_mday,
549 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
550 t.tm_year + 1900,
551 t.tm_hour, t.tm_min, t.tm_sec
552 );
553
554 writeBuffer( fd, header, hdrlen );
555 }
556
557
decodeURIvalue(char * new_text,const char * text)558 static void decodeURIvalue(
559 char * new_text,
560 const char * text )
561 {
562 char * b;
563
564 b = new_text;
565 while (*text != '\0')
566 {
567 if (*text == '%')
568 {
569 int u1, u2, ascii;
570 text++;
571 #ifdef UFDB_REPARSE_PERCENT_IN_URI
572 reparse:
573 #endif
574 if (*text == '\0' || *text+1 == '\0')
575 {
576 *b++ = '%';
577 *b = '\0';
578 return;
579 }
580 u1 = *text++;
581 if (u1 == '%')
582 {
583 *b++ = '%';
584 continue;
585 }
586 if (u1 >= 'A' && u1 <= 'F')
587 u1 = u1 - 'A' + 10;
588 else if (u1 >= 'a' && u1 <= 'f')
589 u1 = u1 - 'a' + 10;
590 else
591 u1 = u1 - '0';
592 u2 = *text++;
593 if (u2 >= 'A' && u2 <= 'F')
594 u2 = u2 - 'A' + 10;
595 else if (u2 >= 'a' && u2 <= 'f')
596 u2 = u2 - 'a' + 10;
597 else
598 u2 = u2 - '0';
599 ascii = u1 * 16 + u2;
600 if (ascii < 9 || ascii > 127)
601 ascii = 32;
602 #ifdef UFDB_REPARSE_PERCENT_IN_URI
603 if (ascii == '%')
604 goto reparse;
605 #endif
606 if (ascii <= 'Z' && ascii >= 'A')
607 *b++ = ascii + ('a' - 'A');
608 else
609 *b++ = ascii;
610 }
611 else
612 {
613 if (*text <= 'Z' && *text >= 'A')
614 *b++ = *text + ('a' - 'A');
615 else
616 *b++ = *text;
617 text++;
618 }
619 }
620 *b = '\0';
621 }
622
623
624 #define CONTENTTYPE_IMAGE 0
625 #define CONTENTTYPE_JAVA 1
626 #define CONTENTTYPE_HTML 2
627 #define CONTENTTYPE_XML 3
628 #define CONTENTTYPE_CSS 4
629 #define CONTENTTYPE_TEXT 5
630 #define CONTENTTYPE_JSON 6
631 #define CONTENTTYPE_STREAM 7
632 #define CONTENTTYPE_204 8
633
contentType2String(int contentType)634 static const char * contentType2String(
635 int contentType )
636 {
637 switch (contentType)
638 {
639 case CONTENTTYPE_IMAGE: return "image";
640 case CONTENTTYPE_JAVA: return "java";
641 case CONTENTTYPE_HTML: return "html";
642 case CONTENTTYPE_XML: return "xml";
643 case CONTENTTYPE_CSS: return "css";
644 case CONTENTTYPE_TEXT: return "text";
645 case CONTENTTYPE_JSON: return "json";
646 case CONTENTTYPE_STREAM: return "stream";
647 case CONTENTTYPE_204: return "E204";
648 }
649 return "error";
650 }
651
652
653 /* If the URL has no file suffix, we cannot know the content type.
654 * Historically it was guessed that the file type was HTML.
655 * Now we try to guess the file type looking at the URL because sometimes there are clues.
656 * If there are no clues, we return the default_type.
657 */
guessContentType(char * url,int default_type)658 static int guessContentType(
659 char * url,
660 int default_type )
661 {
662 char * path;
663 char * r;
664
665 /* strip http:// https:// ftp:// */
666 r = strstr( url, "://" );
667 if (r != NULL && r-url < 7)
668 url = r + 3;
669
670 path = strchr( url, '/' );
671 if (path == NULL || *(path+1) == '\0') /* not always HTML :-( but our best guess */
672 return CONTENTTYPE_HTML;
673
674 if (strncmp( url, "www.googleadservices.com/pagead/conversion/", 43 ) == 0)
675 return CONTENTTYPE_IMAGE;
676 if (strncmp( url, "googleadservices.com/pagead/conversion/", 39 ) == 0)
677 return CONTENTTYPE_IMAGE;
678 if (strcmp( url, "www.google-analytics.com/collect" ) == 0)
679 return CONTENTTYPE_IMAGE;
680
681 /* object is xml, html, java or video ... :-) so we choose text because it
682 * sends an empty object.
683 */
684 if (strncmp( url, "pubads.g.doubleclick.net/gampad/ads", 35 ) == 0)
685 return CONTENTTYPE_JAVA;
686
687 if (strncmp( url, "googleads.g.doubleclick.net/pagead/ads", 38 ) == 0)
688 return CONTENTTYPE_TEXT;
689
690 if (strcmp( url, "a.analytics.yahoo.com/fpc.pl" ) == 0 ||
691 strcmp( url, "o.analytics.yahoo.com/fpc.pl" ) == 0 ||
692 strcmp( url, "s.analytics.yahoo.com/fpc.pl" ) == 0)
693 return CONTENTTYPE_JAVA;
694 if (strcmp( url, "a.analytics.yahoo.com/p.pl" ) == 0 ||
695 strcmp( url, "o.analytics.yahoo.com/p.pl" ) == 0 ||
696 strcmp( url, "s.analytics.yahoo.com/p.pl" ) == 0 ||
697 strcmp( url, "a.analytics.yahoo.com/itr.pl" ) == 0 ||
698 strcmp( url, "o.analytics.yahoo.com/itr.pl" ) == 0 ||
699 strcmp( url, "s.analytics.yahoo.com/itr.pl" ) == 0)
700 return CONTENTTYPE_IMAGE;
701 if (strcmp( url, "row.bc.yahoo.com/b" ) == 0)
702 return CONTENTTYPE_IMAGE;
703 if (strcmp( url, "geo.yahoo.com/t" ) == 0 ||
704 strcmp( url, "geo.yahoo.com/p" ) == 0)
705 return CONTENTTYPE_IMAGE;
706 if (strncmp( url, "pclick.internal.yahoo.com/p/s=", 30 ) == 0)
707 return CONTENTTYPE_IMAGE;
708 if (strcmp( url, "br.yahoo.com/jserror" ) == 0)
709 return CONTENTTYPE_IMAGE;
710 if (strcmp( url, "de.yahoo.com/jserror" ) == 0)
711 return CONTENTTYPE_IMAGE;
712 if (strcmp( url, "nl.yahoo.com/jserror" ) == 0)
713 return CONTENTTYPE_IMAGE;
714 if (strcmp( url, "fr.yahoo.com/jserror" ) == 0)
715 return CONTENTTYPE_IMAGE;
716 if (strcmp( url, "it.yahoo.com/jserror" ) == 0)
717 return CONTENTTYPE_IMAGE;
718 if (strcmp( url, "ch.yahoo.com/jserror" ) == 0)
719 return CONTENTTYPE_IMAGE;
720 if (strcmp( url, "at.yahoo.com/jserror" ) == 0)
721 return CONTENTTYPE_IMAGE;
722
723 if ((r = strstr( url, ".bc.yahoo.com/b" )) != NULL &&
724 strcmp( r, ".bc.yahoo.com/b" ) == 0)
725 return CONTENTTYPE_IMAGE;
726
727 if ((r = strstr( url, ".chartbeat.net/ping" )) != NULL &&
728 r < path)
729 return CONTENTTYPE_IMAGE;
730
731 if (strncmp( url, "www.nytimes.com/adx/bin/clientside", 34 ) == 0)
732 return CONTENTTYPE_IMAGE;
733
734 if (strcmp( url, "goshka.mail.ru/gstat" ) == 0)
735 return CONTENTTYPE_IMAGE;
736 if (strcmp( url, "swa.mail.ru/cgi-bin/counters" ) == 0)
737 return CONTENTTYPE_JAVA;
738 if (strcmp( url, "rs.mail.ru/un" ) == 0)
739 return CONTENTTYPE_IMAGE;
740 if ((r = strstr( url, ".top.mail.ru/counter" )) != NULL &&
741 r < path)
742 return CONTENTTYPE_IMAGE;
743 if ((r = strstr( url, ".top.list.ru/counter" )) != NULL &&
744 r < path)
745 return CONTENTTYPE_IMAGE;
746
747 if (strncmp( url, "s.youtube.com/api/stats/", 24 ) == 0)
748 return CONTENTTYPE_TEXT; /* 204 */
749
750 if (strncmp( url, "www.youtube.com/cp/", 15 ) == 0 ||
751 strncmp( url, "www.youtube.com/p/", 14 ) == 0 ||
752 strncmp( url, "www.youtube.com/v/", 14 ) == 0 ||
753 strncmp( url, "www.youtube.com/ptracking", 25 ) == 0 ||
754 strncmp( url, "www.youtube.com/videoplayback", 29 ) == 0)
755 return CONTENTTYPE_STREAM;
756
757 if ((r = strstr( url, ".scorecard.com/" )) != NULL &&
758 r < path)
759 {
760 if (strncmp( url, "b.scorecardresearch.com/", 24 ) == 0)
761 {
762 if (strcmp( path, "/b" ) == 0 ||
763 strcmp( path, "/p" ) == 0 ||
764 strcmp( path, "/p2" ) == 0)
765 return CONTENTTYPE_IMAGE;
766 }
767 if (strncmp( url, "a.scorecardresearch.com/rpc.flow", 32 ) == 0)
768 return CONTENTTYPE_JAVA;
769
770 return CONTENTTYPE_TEXT;
771 }
772
773 if (strncmp( url, "link.theplatform.com/s/", 23 ) == 0 &&
774 strstr( path, "/tracker.log" ) != NULL)
775 return CONTENTTYPE_TEXT;
776
777 if (strcmp( url, "t-l3.hulu.com/beacon/v3/playback" ) == 0 ||
778 strcmp( url, "t.hulu.com/beacon/v3/error" ) == 0)
779 return CONTENTTYPE_TEXT; /* void */
780 if (strncmp( url, "t2.hulu.com/v3/playertracking/", 30 ) == 0 ||
781 strncmp( url, "t2.hulu.com/v3/recommendationtracking/tracking", 46 ) == 0 ||
782 strcmp( url, "t2.hulu.com/v3/contentinteraction/cploaded" ) == 0 ||
783 strcmp( url, "t.hulu.com/beacon/v3/error" ) == 0 ||
784 strncmp( url, "t2.hulu.com/v3/revenue/", 23 ) == 0)
785 return CONTENTTYPE_IMAGE;
786
787 if (strcmp( url, "r.casalemedia.com/rum" ) == 0)
788 return CONTENTTYPE_IMAGE;
789 if (strncmp( url, "as.casalemedia.com/", 19 ) == 0)
790 return CONTENTTYPE_TEXT;
791
792 if (strcmp( url, "t.mookie1.com/t/v1/imp" ) == 0)
793 return CONTENTTYPE_IMAGE;
794
795 if (strcmp( url, "ulog.tealiumiq.com/ulog" ) == 0)
796 return CONTENTTYPE_IMAGE;
797
798 if (strcmp( url, "rtd.tubemogul.com/upi/" ) == 0)
799 return CONTENTTYPE_IMAGE;
800
801 if (strcmp( url, "ad.yieldmanager.com/unpixel" ) == 0)
802 return CONTENTTYPE_IMAGE;
803 if (strcmp( url, "ad.yieldmanager.com/imp" ) == 0)
804 return CONTENTTYPE_JAVA;
805
806 if (strncmp( url, "www.tns-counter.ru/v1", 21 ) == 0 ||
807 strncmp( url, "rw.tns-counter.ru/v1", 20 ) == 0)
808 return CONTENTTYPE_IMAGE;
809
810 if (strncmp( url, "adserver.adtech.de/addyn", 24 ) == 0)
811 return CONTENTTYPE_JAVA;
812
813 if (strcmp( url, "pagead2.googlesyndication.com/pagead/imgad" ) == 0) /* gif or swf */
814 return CONTENTTYPE_STREAM;
815
816 if (strstr( url, ".overture.com/js" ) != NULL)
817 return CONTENTTYPE_JAVA;
818
819 if (strncmp( url, "adfarm.mediaplex.com/ad/tr/", 27 ) == 0)
820 return CONTENTTYPE_IMAGE;
821
822 if (strcmp( url, "b.aol.com/click" ) == 0)
823 return CONTENTTYPE_IMAGE;
824 if (strcmp( url, "b.aol.com/ping" ) == 0)
825 return CONTENTTYPE_TEXT;
826
827 if ((r = strstr( url, ".doubleclick.net/" )) != NULL && r < path)
828 {
829 if (strcmp( path, "/push" ) == 0)
830 return CONTENTTYPE_TEXT; /* HTML or 204 no content */
831 if (strncmp( path, "/xbbe/view", 10 ) == 0)
832 return CONTENTTYPE_JAVA;
833 if (strncmp( path, "/xbbe/pixel", 11 ) == 0)
834 return CONTENTTYPE_HTML;
835 if (strncmp( path, "/xbbe/match", 11 ) == 0)
836 return CONTENTTYPE_TEXT;
837
838 if (strncmp( path, "/mads/", 6 ) == 0)
839 return CONTENTTYPE_HTML;
840
841 if (strncmp( path, "/adi/", 5 ) == 0)
842 return CONTENTTYPE_HTML;
843
844 if (strncmp( path, "/adj/", 5 ) == 0 ||
845 strncmp( path, "/pfadj/", 7 ) == 0 ||
846 strncmp( path, "/noidadj/", 9 ) == 0)
847 return CONTENTTYPE_JAVA;
848
849 if (strncmp( path, "/adx/", 5 ) == 0 ||
850 strncmp( path, "/pfadx/", 7 ) == 0 ||
851 strncmp( path, "/noidadx/", 9 ) == 0)
852 return CONTENTTYPE_XML;
853
854 if (strcmp( path, "/simgad" ) == 0)
855 return CONTENTTYPE_IMAGE;
856 if (strncmp( path, "/pixel", 6 ) == 0)
857 return CONTENTTYPE_IMAGE;
858 if (strncmp( path, "/imp", 4 ) == 0)
859 return CONTENTTYPE_IMAGE;
860 if (strcmp( path, "/activity" ) == 0)
861 return CONTENTTYPE_IMAGE;
862 if (strcmp( path, "/activityi" ) == 0)
863 return CONTENTTYPE_HTML;
864 if (strncmp( path, "/ad/", 4 ) == 0) /* redirects to IMAGE (most), HTML (hulu.com) and XML (where?) content ... */
865 return CONTENTTYPE_TEXT;
866 if (strcmp( path, "/json" ) == 0)
867 return CONTENTTYPE_JSON;
868 if (strncmp( path, "/pagead/conversion/", 19 ) == 0)
869 return CONTENTTYPE_IMAGE;
870 if (strncmp( path, "/pagead/viewthroughconversion/", 30 ) == 0)
871 return CONTENTTYPE_IMAGE;
872
873 r = strchr( path+1, '/' );
874 if (r != NULL) /* ad-emea.doubleclick.net/N6514/adj/newsticker/homepage */
875 {
876 if (strncmp( r, "/adi/", 5 ) == 0)
877 return CONTENTTYPE_HTML;
878 if (strncmp( r, "/adj/", 5 ) == 0 ||
879 strncmp( r, "/pfadj/", 7 ) == 0 ||
880 strncmp( r, "/noidadj/", 9 ) == 0)
881 return CONTENTTYPE_JAVA;
882 if (strncmp( r, "/adx/", 5 ) == 0 ||
883 strncmp( r, "/pfadx/", 7 ) == 0 ||
884 strncmp( r, "/noidadx/", 9 ) == 0)
885 return CONTENTTYPE_XML;
886 if (strncmp( r, "/imp", 4 ) == 0)
887 return CONTENTTYPE_IMAGE;
888 if (strncmp( r, "/pixel", 6 ) == 0)
889 return CONTENTTYPE_IMAGE;
890 if (strncmp( r, "/ad/", 5 ) == 0)
891 return CONTENTTYPE_TEXT;
892 if (strcmp( r, "/json" ) == 0)
893 return CONTENTTYPE_JSON;
894 }
895 }
896
897 if (strncmp( url, "zdbb.net/l/", 11 ) == 0)
898 return CONTENTTYPE_IMAGE;
899
900 if (strcmp( url, "sync.mathtag.com/sync/img" ) == 0)
901 return CONTENTTYPE_IMAGE;
902
903 if ((r = strstr( url, ".ivwbox.de/" )) != NULL && r < path)
904 {
905 if (strstr( path, "ivw/cp/" ) != NULL)
906 return CONTENTTYPE_IMAGE;
907 }
908
909 if ((r = strstr( url, ".tlm100.net/" )) != NULL && r < path)
910 {
911 if (strstr( path, "/pse" ) != NULL)
912 return CONTENTTYPE_IMAGE;
913 }
914
915 if ((r = strstr( url, ".intellitxt.com/" )) != NULL && r < path)
916 {
917 if (strncmp( path, "/intellitxt/front.asp", 21 ) == 0)
918 return CONTENTTYPE_JAVA;
919 if (strncmp( path, "/v4/init", 8 ) == 0)
920 return CONTENTTYPE_JAVA;
921 if (strncmp( path, "/v5/init", 8 ) == 0)
922 return CONTENTTYPE_JAVA;
923 if (strncmp( path, "/v6/init", 8 ) == 0)
924 return CONTENTTYPE_JAVA;
925 if (strncmp( path, "/v7/init", 8 ) == 0)
926 return CONTENTTYPE_JAVA;
927 }
928
929 if (strcmp( url, "ad.zanox.com/ppv/" ) == 0)
930 return CONTENTTYPE_IMAGE;
931 if (strcmp( url, "ad.zanox.com/ppc/" ) == 0)
932 return CONTENTTYPE_HTML;
933 if (strcmp( url, "zbox.zanox.com/ppb/" ) == 0)
934 return CONTENTTYPE_HTML;
935
936 if (strcmp( url, "ps.googleusercontent.com/beacon" ) == 0)
937 return CONTENTTYPE_IMAGE;
938
939 if (strstr( url, ".sitestat.com/" ) != NULL)
940 {
941 if (ufdbStrStrEnd( path, "/s" ))
942 return CONTENTTYPE_IMAGE;
943 }
944
945 if (strcmp( url, "hits.blogsoft.org/" ) == 0)
946 return CONTENTTYPE_IMAGE;
947
948 if ((r = strstr( url, ".adjuggler.net/servlet" )) != NULL &&
949 r < path)
950 {
951 if (strstr( path, "/vj" ) != NULL)
952 return CONTENTTYPE_JAVA;
953 if (strstr( path, "/vh" ) != NULL)
954 return CONTENTTYPE_HTML;
955 if (strstr( path, "/vx" ) != NULL) /* used in video player for event tracking */
956 return CONTENTTYPE_TEXT;
957 if (strstr( path, "/cc" ) != NULL) /* used in video player for event tracking */
958 return CONTENTTYPE_TEXT;
959 }
960
961 if (strncmp( url, "view.atdmt.com/", 15 ) == 0 && path != NULL)
962 {
963 /* /jaction is either html of java ! */
964 if (strncmp( path, "/action/", 8 ) == 0)
965 return CONTENTTYPE_IMAGE;
966 if (strstr( path, "/jaction/" ) != NULL)
967 return CONTENTTYPE_JAVA;
968 if (strstr( path, "/jview/" ) != NULL)
969 return CONTENTTYPE_JAVA;
970 if (strstr( path, "/view/" ) != NULL) /* redirected image */
971 return CONTENTTYPE_IMAGE;
972 if (strstr( path, "/iview/" ) != NULL) /* iframe or bodyimage */
973 return CONTENTTYPE_HTML;
974 if (strstr( path, "/iaction/" ) != NULL) /* iframe or bodyimage */
975 return CONTENTTYPE_HTML;
976 }
977
978 if (strcmp( url, "www.facebook.com/dialog/oauth" ) == 0 ||
979 strcmp( url, "static.ak.fbcdn.net/connect/xd_proxy.php" ) == 0)
980 return CONTENTTYPE_HTML;
981 if (strncmp( url, "static.ak.fbcdn.net/connect.php/js/fb.share", 43 ) == 0)
982 return CONTENTTYPE_JAVA;
983 if (strncmp( url, "static.ak.fbcdn.net/connect.php/css/", 36 ) == 0)
984 return CONTENTTYPE_CSS;
985 if (strncmp( url, "static.ak.connect.facebook.com/connect.php/en_us/css/", 53 ) == 0)
986 return CONTENTTYPE_CSS;
987 if (strcmp( url, "static.ak.connect.facebook.com/connect.php/en_us/js/api/canvasutil/connect/xfbml" ) == 0)
988 return CONTENTTYPE_JSON;
989 if (strncmp( url, "static.ak.connect.facebook.com/connect.php", 42 ) == 0)
990 return CONTENTTYPE_JAVA;
991 if (strncmp( url, "www.facebook.com/ajax/presence/reconnect.php", 44 ) == 0)
992 return CONTENTTYPE_JAVA;
993 if (strcmp( url, "www.facebook.com/fr/u.php" ) == 0)
994 return CONTENTTYPE_IMAGE;
995
996 if (strncmp( url, "log.adap.tv/log", 15 ) == 0)
997 return CONTENTTYPE_TEXT;
998 #if 0
999 if (strncmp( url, "www.facebook.com/plugins/like.php", 33 ) == 0)
1000 return CONTENTTYPE_HTML;
1001 #endif
1002
1003 if (strcmp( url, "trk.kissmetrics.com/e" ) == 0 ||
1004 strcmp( url, "trk.kissmetrics.com/s" ) == 0)
1005 return CONTENTTYPE_IMAGE;
1006
1007 if (strncmp( url, "whos.amung.us/pjswidget", 23 ) == 0)
1008 return CONTENTTYPE_IMAGE;
1009
1010 if (strcmp( url, "secure-us.imrworldwide.com/cgi-bin/m" ) == 0)
1011 return CONTENTTYPE_IMAGE;
1012
1013 if (strncmp( url, "a.collective-media.net/cmadj/", 29 ) == 0)
1014 return CONTENTTYPE_JAVA;
1015
1016 if (strcmp( url, "traffic.shareaholic.com/e" ) == 0)
1017 return CONTENTTYPE_IMAGE;
1018
1019 if (strcmp( url, "d.agkn.com/iframe" ) == 0)
1020 return CONTENTTYPE_HTML;
1021
1022 if (strncmp( path, "/adsadclient", 12 ) == 0 &&
1023 (r = strstr( url, ".msn.com/" )) != NULL &&
1024 r < path)
1025 return CONTENTTYPE_TEXT; /* produces java, xml and html */
1026
1027 if (strcmp( url, "catalog.video.msn.com/frauddetect.aspx" ) == 0)
1028 return CONTENTTYPE_TEXT;
1029
1030 if (strncmp( url, "www.linkedin.com/analytics/", 27 ) == 0)
1031 return CONTENTTYPE_IMAGE;
1032 if (strncmp( url, "dp.33across.com/ps", 18 ) == 0)
1033 return CONTENTTYPE_IMAGE;
1034 if (strncmp( url, "d.agkn.com/pixel!", 17 ) == 0)
1035 return CONTENTTYPE_IMAGE;
1036 if (strncmp( url, "data.aggregateknowledge.com/pixel!", 34 ) == 0)
1037 return CONTENTTYPE_IMAGE;
1038
1039 if ((r = strstr( url, ".sitemeter.com" )) != NULL &&
1040 r < path)
1041 {
1042 if (strcmp( path, "/meter.asp" ) == 0)
1043 return CONTENTTYPE_IMAGE;
1044 }
1045
1046 if ((r = strstr( url, ".terra." )) != NULL &&
1047 r < path)
1048 {
1049 if (strcmp( path, "/td.asp" ) == 0)
1050 return CONTENTTYPE_IMAGE;
1051 if (strcmp( path, "/uv" ) == 0)
1052 return CONTENTTYPE_IMAGE;
1053 }
1054
1055 if (strncmp( url, "adlog.com.com/adlog/i/", 22 ) == 0)
1056 return CONTENTTYPE_IMAGE;
1057
1058 if (strcmp( url, "pbid.pro-market.net/engine" ) == 0)
1059 return CONTENTTYPE_IMAGE;
1060
1061 if (strcmp( url, "a.tribalfusion.com/j.ad" ) == 0)
1062 return CONTENTTYPE_JAVA;
1063
1064 if (strcmp( url, "p.ic.tynt.com/b/p" ) == 0)
1065 return CONTENTTYPE_IMAGE;
1066
1067 if (strncmp( path, "valog.loginside.co.kr/", 22 ) == 0)
1068 return CONTENTTYPE_IMAGE;
1069
1070 if (strncmp( url, "ftjcfx.com/image-", 17 ) == 0)
1071 return CONTENTTYPE_IMAGE;
1072 if (strncmp( url, "lduhtrp.net/image-", 18 ) == 0)
1073 return CONTENTTYPE_IMAGE;
1074 if (strstr( url, "img.pheedo.com/img.phdo" ) != NULL)
1075 return CONTENTTYPE_IMAGE;
1076 if (strncmp( url, "an.yandex.ru/count/", 19 ) == 0)
1077 return CONTENTTYPE_TEXT;
1078 if (strncmp( url, "bs.yandex.ru/informer/", 21 ) == 0)
1079 return CONTENTTYPE_IMAGE;
1080 if (strncmp( url, "mc.yandex.ru/watch/", 19 ) == 0) /* can be json and image */
1081 return CONTENTTYPE_TEXT;
1082 if (strncmp( url, "mc.yandex.ru/clmap/", 19 ) == 0) /* tracker */
1083 return CONTENTTYPE_TEXT;
1084
1085 if (strncmp( url, "counter.yadro.ru/hit", 20 ) == 0) /* it was HTML but it now is GIF/IMAGE */
1086 return CONTENTTYPE_IMAGE;
1087
1088 if (strncmp( url, "b.kavanga.ru/exp", 16 ) == 0)
1089 return CONTENTTYPE_JAVA;
1090 if (strstr( url, ".api.playtomic.com/tracker/q.aspx" ) != NULL) /* 204 no content */
1091 return CONTENTTYPE_HTML;
1092 if (strncmp( url, "an.yandex.ru/code/", 16 ) == 0)
1093 return CONTENTTYPE_JAVA;
1094 if (strncmp( url, "zero.kz/c.php", 13 ) == 0)
1095 return CONTENTTYPE_IMAGE;
1096
1097 if (strstr( path, "/realmedia/ads/" ) != NULL)
1098 {
1099 if (strstr( path, "/adstream_jx" ) != NULL ||
1100 strstr( path, "/adstream_mjx" ) != NULL)
1101 return CONTENTTYPE_JAVA;
1102
1103 if (strstr( path, "/adstream_lx" ) != NULL ||
1104 strstr( path, "/adstream_nx" ) != NULL)
1105 return CONTENTTYPE_IMAGE;
1106
1107 if (strstr( path, "/ads/cap.cgi" ) != NULL) /* /adstream.cap */
1108 return CONTENTTYPE_IMAGE;
1109 }
1110
1111 if (strstr( url, "v.fwmrm.net" ) != NULL)
1112 {
1113 if (strcmp( path, "/ad/p/1" ) == 0)
1114 return CONTENTTYPE_XML;
1115 if (strcmp( path, "/ad/l/1" ) == 0)
1116 return CONTENTTYPE_HTML;
1117 if (strcmp( path, "/ad/g/1" ) == 0)
1118 return CONTENTTYPE_JAVA;
1119 }
1120
1121 if (strstr( url, "overture.com/ls_js_" ) != NULL)
1122 return CONTENTTYPE_JAVA;
1123
1124 if (strstr( path, "/scripts/beacon.dll" ) != NULL ||
1125 strstr( path, "/scripts/beacon2.dll" ) != NULL)
1126 return CONTENTTYPE_IMAGE;
1127
1128 if (strncmp( url, "trk.vindicosuite.com/tracking/v3/instream/impression", 52 ) == 0)
1129 return CONTENTTYPE_IMAGE;
1130
1131 if ((r = strstr( url, ".smartadserver.com/a/diff" )) != NULL &&
1132 r < path)
1133 return CONTENTTYPE_JAVA;
1134 if ((r = strstr( url, ".smartadserver.com/def/" )) != NULL &&
1135 r < path)
1136 return CONTENTTYPE_JAVA;
1137
1138 if (strcmp( path, "/hit.xiti" ) == 0)
1139 return CONTENTTYPE_IMAGE;
1140
1141 if (strstr( path, "/javascript/" ) != NULL ||
1142 strstr( path, "/ajaxpro/" ) != NULL)
1143 return CONTENTTYPE_JAVA;
1144
1145 r = strstr( path, ".js.php" );
1146 if (r != NULL && strlen(r) == 7)
1147 return CONTENTTYPE_JAVA;
1148 r = strstr( path, "/js.php" );
1149 if (r != NULL && strlen(r) == 7)
1150 return CONTENTTYPE_JAVA;
1151 r = strstr( path, "/javascript.php" );
1152 if (r != NULL && strlen(r) == 15)
1153 return CONTENTTYPE_JAVA;
1154
1155 r = strstr( path, "/css.php" );
1156 if (r != NULL && strlen(r) == 8)
1157 return CONTENTTYPE_CSS;
1158
1159 r = strstr( path, "/image.php" );
1160 if (r != NULL && strlen(r) == 10)
1161 return CONTENTTYPE_IMAGE;
1162 r = strstr( path, "/pic.php" );
1163 if (r != NULL && strlen(r) == 8)
1164 return CONTENTTYPE_IMAGE;
1165 if (strstr( path, "/image.php/" ) != NULL)
1166 return CONTENTTYPE_IMAGE;
1167 if (strstr( path, "/image.svc/" ) != NULL)
1168 return CONTENTTYPE_IMAGE;
1169
1170 r = strstr( path, "/java.php" );
1171 if (r != NULL && strlen(r) == 9)
1172 return CONTENTTYPE_JAVA;
1173
1174 if (strstr( path, "/js.ng/" ) != NULL ||
1175 strstr( path, "/js/" ) != NULL)
1176 return CONTENTTYPE_JAVA;
1177
1178 if (strstr( path, "/scripts/" ) != NULL ||
1179 strstr( path, "/script/" ) != NULL)
1180 return CONTENTTYPE_JAVA;
1181
1182 if (strncmp( url, "xml.", 4 ) == 0)
1183 return CONTENTTYPE_XML;
1184
1185 /* SiteCatalyst beacon */
1186 if (strncmp( path, "/b/ss/", 6 ) == 0 &&
1187 (strstr( path, "/fas" ) != NULL ||
1188 strstr( path, "/h." ) != NULL ||
1189 strstr( path, "/g." ) != NULL ))
1190 return CONTENTTYPE_IMAGE;
1191
1192 if (strcmp( path, "/csi" ) == 0 &&
1193 (strstr( url, "google." ) != NULL || strstr( url, "gstatic.com" ) != NULL))
1194 return CONTENTTYPE_IMAGE;
1195
1196 if (strcmp( path, "/www/delivery/ajs.php" ) == 0 ||
1197 strcmp( path, "/www/delivery/spcjs.php" ) == 0 ||
1198 strcmp( path, "/openx/www/delivery/ajs.php" ) == 0)
1199 return CONTENTTYPE_JAVA;
1200 if (strcmp( path, "/www/delivery/lg.php" ) == 0 ||
1201 strcmp( path, "/openx/www/delivery/lg.php" ) == 0 ||
1202 strcmp( path, "/jump/www/delivery/lg.php" ) == 0)
1203 return CONTENTTYPE_IMAGE;
1204 if ((r = strstr( url, "openx." )) != NULL && r < path)
1205 {
1206 if (strcmp( path, "/jstag" ) == 0)
1207 return CONTENTTYPE_JAVA;
1208 if (strcmp( path, "/ajs.php" ) == 0)
1209 return CONTENTTYPE_JAVA;
1210 if (strcmp( path, "/ag.php" ) == 0)
1211 return CONTENTTYPE_JAVA;
1212 if (strcmp( path, "/lg.php" ) == 0)
1213 return CONTENTTYPE_IMAGE;
1214 if (strcmp( path, "/img" ) == 0)
1215 return CONTENTTYPE_IMAGE;
1216 if (strcmp( path, "/json" ) == 0)
1217 return CONTENTTYPE_JSON;
1218 if (strcmp( path, "/set" ) == 0)
1219 return CONTENTTYPE_IMAGE;
1220 /* afr.php is HTML */
1221 /* http://us-ads.openx.net/w/1.0/afr is HTML */
1222 }
1223
1224 if (strcmp( url, "x.bidswitch.net/ul_cb/sync" ) == 0 ||
1225 strcmp( url, "x.bidswitch.net/sync" ) == 0)
1226 return CONTENTTYPE_IMAGE;
1227
1228 if (strcmp( url, "metrics.brightcove.com/tracker" ) == 0)
1229 return CONTENTTYPE_IMAGE;
1230
1231 if (strcmp( url, "tl.r7ls.net/fl/" ) == 0)
1232 return CONTENTTYPE_IMAGE;
1233
1234 if (strncmp( url, "eniro.tns-cs.net/j0", 19 ) == 0)
1235 return CONTENTTYPE_IMAGE;
1236
1237 if (strcmp( url, "www.stumbleupon.com/hostedbadge.php" ) == 0)
1238 return CONTENTTYPE_JAVA;
1239 if (strcmp( url, "www.stumbleupon.com/services/1.1/badge.getinfo" ) == 0)
1240 return CONTENTTYPE_JSON;
1241
1242 if (strcmp( url, "www.facebook.com/brandlift.php" ) == 0)
1243 return CONTENTTYPE_IMAGE;
1244
1245 if (strstr( url, ".channel.facebook.com/x/" ) != NULL)
1246 return CONTENTTYPE_JSON;
1247 if (strstr( url, ".channel.facebook.com/p" ) != NULL)
1248 return CONTENTTYPE_TEXT;
1249
1250 if (strcmp( url, "maps.google.com/maps/ms" ) == 0)
1251 return CONTENTTYPE_JAVA;
1252
1253 if (strncmp( url, "stats.jtvnw.net/", 16 ) == 0 && path[1] >= '0' && path[1] <= '9')
1254 return CONTENTTYPE_IMAGE;
1255
1256 if (strncmp( url, "dev.visualwebsiteoptimizer.com/j.php", 36 ) == 0)
1257 return CONTENTTYPE_JAVA;
1258
1259 if (strcmp( url, "sendgrid.me/wf/open" ) == 0)
1260 return CONTENTTYPE_IMAGE;
1261
1262 if (strcmp( url, "hitserver.ibope.com.br/b" ) == 0)
1263 return CONTENTTYPE_IMAGE;
1264
1265 if (strcmp( url, "data.coremetrics.com/eluminate" ) == 0)
1266 return CONTENTTYPE_IMAGE;
1267
1268 if ((r = strstr( url, "tt.omtrdc.net" )) != NULL &&
1269 r < path &&
1270 strstr( path, "/mbox" ) != NULL)
1271 return CONTENTTYPE_JAVA;
1272
1273 if (strstr( url, "emediate.eu/eas" ) != NULL)
1274 return CONTENTTYPE_JAVA;
1275
1276 if (strstr( path, "/xml-rpc" ) != NULL)
1277 return CONTENTTYPE_XML;
1278
1279 if (strstr( path, "/xml/" ) != NULL)
1280 return CONTENTTYPE_XML;
1281
1282 if (strcmp( path, "/open/1" ) == 0)
1283 return CONTENTTYPE_STREAM;
1284
1285 #if 0
1286 if (strstr( path, "/video/" ) != NULL)
1287 return CONTENTTYPE_STREAM;
1288 #endif
1289
1290 /* NOTE: http://pixel.mathtag.com/event/js?mt_id=... is JAVA !!! */
1291
1292 if (strstr( path, "/jserver/" ) != NULL ||
1293 strstr( path, "/js/" ) != NULL ||
1294 ((r = strstr( path, "/js")) != NULL && strlen(r) == 3) )
1295 return CONTENTTYPE_JAVA;
1296
1297 if (strncmp( url, "pixel.", 6 ) == 0 ||
1298 strncmp( url, "pix.", 4 ) == 0)
1299 return CONTENTTYPE_IMAGE;
1300
1301 r = strstr( path, "/pixel" );
1302 if (r != NULL && strlen(r) == 6)
1303 return CONTENTTYPE_IMAGE;
1304
1305 if (strcmp( path, "/stats/beacon" ) == 0)
1306 return CONTENTTYPE_IMAGE;
1307
1308 if (strcmp( url, "vk.com/videostats.php" ) == 0)
1309 return CONTENTTYPE_TEXT;
1310
1311 if (strcmp( url, "www.adhood.com/adserver/ad.php" ) == 0)
1312 return CONTENTTYPE_JAVA;
1313
1314 if (strcmp( url, "t.beanstalkdata.com/webvisit" ) == 0 ||
1315 strcmp( url, "proc.beanstalkdata.com/mongo/track.php" ) == 0)
1316 return CONTENTTYPE_IMAGE;
1317
1318 if (strcmp( url, "hit.deckstats.com/es/s/ctrl" ) == 0)
1319 return CONTENTTYPE_IMAGE;
1320
1321 if (strncmp( path, "/j0=", 4 ) == 0)
1322 return CONTENTTYPE_IMAGE;
1323
1324 if (strncmp( url, "images.", 7 ) == 0 ||
1325 strncmp( url, "image.", 6 ) == 0)
1326 return CONTENTTYPE_IMAGE;
1327
1328 if (strstr( url, "webtrekk" ) != NULL &&
1329 (strstr( path, "/wt" ) != NULL ||
1330 strstr( path, "/hm" ) != NULL ||
1331 strstr( path, "/ce" ) != NULL))
1332 return CONTENTTYPE_IMAGE;
1333
1334 if (strcmp( path, "/json-proxy" ) == 0 || strcmp( path, "/json/" ) == 0)
1335 return CONTENTTYPE_JSON;
1336
1337 if (strstr( path, "/jsonp" ) != NULL)
1338 return CONTENTTYPE_JSON;
1339
1340 if (strstr( path, "/piwik.php" ) != NULL)
1341 return CONTENTTYPE_IMAGE;
1342
1343 if ((r = strstr( url, "cedexis" )) != NULL && r < path && strlen(path) > 6)
1344 return CONTENTTYPE_JAVA;
1345
1346 if (strncmp( path, "/cgi-bin/ivw/cp/", 16 ) == 0)
1347 return CONTENTTYPE_IMAGE;
1348
1349 if (strstr( path, "/js_" ) != NULL)
1350 return CONTENTTYPE_JAVA;
1351
1352 if (strncmp( url, "img.", 4 ) == 0)
1353 return CONTENTTYPE_IMAGE;
1354
1355 if (strncmp( path, "/imp", 4 ) == 0)
1356 return CONTENTTYPE_IMAGE;
1357
1358 if (strstr( path, "/track" ) != NULL) /* trackers can be image/java etc. type=text is safe because it is empty */
1359 return CONTENTTYPE_TEXT;
1360
1361 if (strstr( url, ".cnzz.com/stat.htm" ) != NULL) /* usually a gif but an emtpy doc is safer */
1362 return CONTENTTYPE_TEXT;
1363
1364 if (strncmp( url, "log.", 4 ) == 0 ||
1365 strncmp( url, "track", 5 ) == 0) /* trackers can be image/java etc. type=text is safe because it is empty */
1366 return CONTENTTYPE_TEXT;
1367
1368 if (strcmp( path, "/adj" ) == 0)
1369 return CONTENTTYPE_JAVA;
1370
1371 if (strlen( path ) > 200 &&
1372 (strstr( url, "log" ) != NULL || /* might be a tracker */
1373 strncmp( url, "stat", 4 ) == 0))
1374 return CONTENTTYPE_TEXT;
1375
1376 if (strcmp( url, "tm.scribit.com/vat/visitt") == 0)
1377 return CONTENTTYPE_IMAGE;
1378
1379 if (strcmp( path, "/log" ) == 0 ||
1380 strcmp( path, "/stat.php" ) == 0 ||
1381 strcmp( path, "/hit.php" ) == 0 ||
1382 strstr( path, "/tracker" ) != NULL ||
1383 strstr( url, "counter" ) != NULL ||
1384 strstr( path, "/ping" ) != NULL) /* may be an image or other type, but TEXT is safe for all */
1385 return CONTENTTYPE_TEXT;
1386
1387 if (strstr( url, "/owa/log" ) != NULL)
1388 return CONTENTTYPE_TEXT; /* may be an image or other type, but TEXT is safe for all */
1389
1390 if (strstr( path, "/g.pixel" ) != NULL)
1391 return CONTENTTYPE_IMAGE;
1392
1393 if (strstr( path, "/adlog" ) != NULL) /* may be pixel or something else, TEXT is safe for all */
1394 return CONTENTTYPE_TEXT;
1395
1396 if (strstr( path, "/event" ) != NULL) /* may be pixel or something else, TEXT is safe for all */
1397 return CONTENTTYPE_TEXT;
1398
1399 if (strcmp( path, "/gen_204" ) == 0) /* most likely has 204 page */
1400 return CONTENTTYPE_204;
1401
1402 if (strlen( path ) <= 3) /* might be a beacon image, java or other type, but TEXT is safe for all */
1403 return CONTENTTYPE_TEXT;
1404
1405 if (strncmp( path, "/b/", 3 ) == 0) /* most likely a beacon image; but use text to be safer */
1406 return CONTENTTYPE_TEXT;
1407
1408 if (strncmp( url, "analytic", 8 ) == 0) /* image or other type, TEXT is safe */
1409 return CONTENTTYPE_TEXT;
1410
1411 if (strstr( path, "api/event" ) != NULL) /* image or other type, TEXT is safe */
1412 return CONTENTTYPE_TEXT;
1413
1414 if (strstr( url, "heatmap" ) != NULL) /* json, image or other type, TEXT is safe */
1415 return CONTENTTYPE_TEXT;
1416
1417 return default_type;
1418 }
1419
1420
AnswerHttpUrlBlocked(int fd,int lang,char * parameters,const char * imagesDirectory)1421 static void AnswerHttpUrlBlocked(
1422 int fd,
1423 int lang,
1424 char * parameters,
1425 const char * imagesDirectory )
1426 {
1427 char * p;
1428 char * suffix;
1429 char * pptr;
1430 time_t now_t;
1431 struct tm t;
1432 const char * mode = "default";
1433 const char * color = "orange";
1434 const char * size = "normal";
1435 const char * httpcode = NULL;
1436 const char * textcolor = "white";
1437 const char * bgcolor = "black";
1438 const char * titlesize = "+0";
1439 const char * textsize = "+0";
1440 int contentLength;
1441 int contentType;
1442 int headerLength;
1443 char clientaddr[512] = "";
1444 char clientname[256] = "";
1445 char clientuser[256] = "";
1446 char source[256] = "";
1447 char category[256] = "unknown";
1448 char header[2048];
1449 char admin[2048];
1450 char url[8192];
1451 char whyblocked[8192+100];
1452 char text[8192+100+100+256];
1453 char content[8192+8192+1200];
1454 char moreInfoParams[8192+1200];
1455
1456 #if 0
1457 if (ufdbGV.debug || ufdbGV.debugHttpd)
1458 ufdbLogMessage( "AnswerHttpUrlBlocked: %d %%s %s", lang, parameters, imagesDirectory );
1459 #endif
1460
1461 if (*parameters == '&') /* Apache mod_rewrite adds an '&' */
1462 parameters++;
1463 suffix = NULL;
1464 content[0] = '\0';
1465 contentLength = 0;
1466 strcpy( admin, "The network administrator" );
1467 strcpy( url, "unknown" );
1468
1469 /*
1470 * valid parameters are:
1471 * admin
1472 * mode
1473 * color
1474 * size
1475 * httpcode
1476 * clientaddr
1477 * clientname
1478 * clientuser/clientident
1479 * clientgroup/source
1480 * targetgroup/category
1481 * url
1482 */
1483 pptr = NULL;
1484 p = strtok_r( parameters, "&", &pptr );
1485 while (p != NULL)
1486 {
1487 char * value;
1488
1489 value = (char*) strchr( p, '=' );
1490 if (value == NULL)
1491 {
1492 ufdbLogError( "AnswerHttpUrlBlocked: parameter %s has no value", p );
1493 p = strtok_r( NULL, "&", &pptr );
1494 continue;
1495 }
1496 *value = '\0';
1497 value++;
1498 if (strcmp( p, "admin" ) == 0)
1499 decodeURIvalue( admin, value );
1500 else if (strcmp( p, "mode" ) == 0)
1501 mode = value;
1502 else if (strcmp( p, "color" ) == 0 || strcmp( p, "colour" ) == 0)
1503 color = value;
1504 else if (strcmp( p, "size" ) == 0)
1505 size = value;
1506 else if (strcmp( p, "httpcode" ) == 0)
1507 httpcode = value;
1508 else if (strcmp( p, "clientaddr" ) == 0)
1509 decodeURIvalue( clientaddr, value );
1510 else if (strcmp( p, "clientname" ) == 0)
1511 decodeURIvalue( clientname, value );
1512 else if (strcmp( p, "clientuser" ) == 0 || strcmp( p, "clientident" ) == 0)
1513 decodeURIvalue( clientuser, value );
1514 else if (strcmp( p, "clientgroup" ) == 0 || strcmp( p, "source" ) == 0 || strcmp( p, "srcclass" ) == 0)
1515 decodeURIvalue( source, value );
1516 else if (strcmp( p, "category" ) == 0 || strcmp( p, "targetgroup" ) == 0 || strcmp( p, "targetclass" ) == 0)
1517 decodeURIvalue( category, value );
1518 else if (strcmp( p, "url" ) == 0)
1519 {
1520 decodeURIvalue( url, value );
1521
1522 /* strip the URL */
1523 p = strchr( url, '?' );
1524 if (p != NULL)
1525 *p = '\0';
1526 else
1527 {
1528 p = strchr( url, ';' );
1529 if (p != NULL)
1530 *p = '\0';
1531 else
1532 {
1533 p = strchr( url, '&' );
1534 if (p != NULL)
1535 *p = '\0';
1536 }
1537 }
1538
1539 #if 0
1540 if (ufdbGV.debug || ufdbGV.debugHttpd)
1541 ufdbLogMessage( " blocked url: %s", url );
1542 #endif
1543
1544 /* url is the last parameter so stop parsing */
1545 break;
1546 }
1547 else
1548 ufdbLogError( "AnswerHttpUrlBlocked: unknown parameter '%s'", p );
1549
1550 p = strtok_r( NULL, "&", &pptr );
1551 }
1552
1553 content[0] = '\0';
1554
1555 p = strstr( url, "://" );
1556 if (p == NULL)
1557 pptr = strchr( url, '/' );
1558 else
1559 pptr = strchr( p+3, '/' );
1560
1561 if (pptr == NULL) /* no URL path... */
1562 {
1563 contentType = guessContentType( url, CONTENTTYPE_HTML );
1564 p = NULL;
1565 }
1566 else
1567 {
1568 p = strrchr( pptr, '.' );
1569 if (p == NULL || strlen(p) > 6) /* URL path has no suffix */
1570 {
1571 p = NULL;
1572 contentType = guessContentType( url, CONTENTTYPE_HTML );
1573 }
1574 else
1575 {
1576 char * ch;
1577
1578 p++;
1579 ch = p;
1580 while (*ch != '\0')
1581 {
1582 if (isupper(*ch))
1583 *ch = tolower(*ch);
1584 ch++;
1585 }
1586 suffix = p;
1587
1588 if (strmatch3( p, "bmp" ) ||
1589 strmatch3( p, "gif" ) ||
1590 strmatch3( p, "ico" ) ||
1591 strmatch3( p, "img" ) ||
1592 strmatch3( p, "jpg" ) ||
1593 strmatch3( p, "jpe" ) ||
1594 strmatch4( p, "jpeg" ) ||
1595 strmatch3( p, "png" ) ||
1596 strmatch4( p, "webp" ) ||
1597 strmatch4( p, "tiff" ))
1598 {
1599 contentType = CONTENTTYPE_IMAGE;
1600 }
1601 else if (strmatch3( p, "css" ))
1602 {
1603 contentType = CONTENTTYPE_CSS;
1604 }
1605 else if (strmatch2( p, "js" ) ||
1606 strmatch3( p, "jar" ))
1607 {
1608 contentType = CONTENTTYPE_JAVA;
1609 }
1610 else if (strmatch3( p, "xml" ) ||
1611 strmatch4( p, "sxml" ) ||
1612 strmatch3( p, "rss" ))
1613 {
1614 contentType = CONTENTTYPE_XML;
1615 }
1616 else if (strmatch3( p, "asx" ) ||
1617 strmatch3( p, "cab" ) ||
1618 strmatch5( p, "class" ) ||
1619 strmatch4( p, "divx" ) ||
1620 strmatch4( p, "h264" ) ||
1621 strmatch3( p, "mpg" ) ||
1622 strmatch4( p, "mpeg" ) ||
1623 strmatch3( p, "ogv" ) ||
1624 strmatch2( p, "qt" ) ||
1625 strmatch2( p, "ra" ) ||
1626 strmatch3( p, "ram" ) ||
1627 strmatch2( p, "rv" ) ||
1628 strmatch3( p, "wmv" ) ||
1629 strmatch3( p, "avi" ) ||
1630 strmatch3( p, "mov" ) ||
1631 strmatch3( p, "swf" ) ||
1632 strmatch3( p, "mp4" ) ||
1633 strmatch3( p, "m4v" ) ||
1634 strmatch3( p, "flv" ) ||
1635 strmatch3( p, "f4v" ) ||
1636 strmatch3( p, "bz2" ) ||
1637 strmatch3( p, "dat" ) ||
1638 strmatch3( p, "doc" ) ||
1639 strmatch2( p, "xz" ) ||
1640 strmatch2( p, "gz" ) ||
1641 strmatch3( p, "mp3" ) ||
1642 strmatch3( p, "msi" ) ||
1643 strmatch3( p, "mst" ) ||
1644 strmatch3( p, "ppt" ) ||
1645 strmatch3( p, "pdf" ) ||
1646 strmatch3( p, "rar" ) ||
1647 strmatch3( p, "tar" ) ||
1648 strmatch3( p, "ttf" ) ||
1649 strmatch3( p, "xls" ) ||
1650 strmatch3( p, "zip" ) ||
1651 strmatch4( p, "woff" ) ||
1652 strmatch5( p, "woff2" ) ||
1653 strmatch3( p, "bin" ))
1654 {
1655 contentType = CONTENTTYPE_STREAM;
1656 }
1657 else if (strmatch3( p, "txt" ) ||
1658 strmatch3( p, "csv" ))
1659 {
1660 contentType = CONTENTTYPE_TEXT;
1661 }
1662 else if (strmatch4( p, "json" ))
1663 {
1664 contentType = CONTENTTYPE_JSON;
1665 }
1666 else if (strmatch3( p, "htm" ) ||
1667 strmatch5( p, "shtml" ) ||
1668 strmatch5( p, "dhtml" ) ||
1669 strmatch4( p, "html" ))
1670 {
1671 contentType = CONTENTTYPE_HTML;
1672 }
1673 else /* there is an unknown suffix */
1674 {
1675 contentType = guessContentType( url, CONTENTTYPE_HTML );
1676 }
1677 }
1678 }
1679
1680 /* transparent grey ads xml The network administrator http://adnet.bluebillywig.com/crossdomain.xml */
1681 if (ufdbGV.debug || ufdbGV.debugHttpd)
1682 ufdbLogMessage( " AnswerHttpUrlBlocked: mode %s color %s category %s content-type %s p \"%s\" admin \"%s\" httpcode \"%s\" url %s",
1683 mode, color, category, contentType2String(contentType),
1684 p == NULL ? "" : p, admin,
1685 httpcode == NULL ? "-" : httpcode, url );
1686
1687 if (strcmp( color, "orange" ) == 0)
1688 {
1689 textcolor = "white";
1690 bgcolor = "#ee8811";
1691 }
1692 else if (strcmp( color, "white" ) == 0)
1693 {
1694 textcolor = "#3f003f";
1695 bgcolor = "white";
1696 }
1697 else if (strcmp( color, "black" ) == 0)
1698 {
1699 textcolor = "#f0f0f0";
1700 bgcolor = "black";
1701 }
1702 else if (strcmp( color, "red" ) == 0)
1703 {
1704 textcolor = "#f0f0f0";
1705 bgcolor = "red";
1706 }
1707 else if (strcmp( color, "grey" ) == 0 || strcmp( color, "gray" ) == 0)
1708 {
1709 textcolor = "#111111";
1710 bgcolor = "#c2c2c2";
1711 }
1712 else
1713 {
1714 textcolor = "white";
1715 bgcolor = "#ee8811";
1716 }
1717
1718 if (strcmp( size, "normal" ) == 0)
1719 {
1720 titlesize = "+2";
1721 textsize = "+0";
1722 }
1723 else if (strcmp( size, "small" ) == 0)
1724 {
1725 titlesize = "+1";
1726 textsize = "-1";
1727 }
1728 else if (strcmp( size, "large" ) == 0)
1729 {
1730 titlesize = "+3";
1731 textsize = "+1";
1732 }
1733 else
1734 {
1735 titlesize = "+2";
1736 textsize = "+0";
1737 }
1738
1739 now_t = time( NULL );
1740 gmtime_r( &now_t, &t );
1741
1742 snprintf( moreInfoParams, sizeof(moreInfoParams), "ufdbcat=%s&ufdbsrc=%s&ufdburl=%s", category, source, url );
1743
1744 if (httpcode != NULL && strcmp( httpcode, "204" ) == 0)
1745 contentType = CONTENTTYPE_204;
1746
1747 /* Check for graphics and send a bitmap. */
1748 if (contentType == CONTENTTYPE_IMAGE)
1749 {
1750 int ifd;
1751 char file[1024];
1752
1753 /* Send an image for URLs that have one of the following suffixes:
1754 * bmp, gif, jpg, jpeg, png, ico.
1755 * with special png file for the category ads.
1756 */
1757 headerLength = snprintf( header, sizeof(header),
1758 "HTTP/1.0 200 OK\r\n"
1759 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
1760 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
1761 "Cache-Control: max-age=180\r\n"
1762 "Connection: close\r\n"
1763 "Content-Type: image/png\r\n"
1764 "\r\n",
1765 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
1766 t.tm_mday,
1767 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
1768 t.tm_year + 1900,
1769 t.tm_hour, t.tm_min, t.tm_sec
1770 );
1771 if (strcmp( category, "ads" ) == 0 ||
1772 strcmp( category, "always-block" ) == 0 ||
1773 strcmp( category, "alwaysblock" ) == 0 )
1774 {
1775 if (strcmp( mode, "noads" ) == 0)
1776 p = (char*) "no-ads.png";
1777 else if (strcmp( mode, "cross" ) == 0)
1778 p = (char*) "smallcross.png";
1779 else if (strcmp( mode, "square" ) == 0)
1780 p = (char*) "square.png";
1781 else if (strcmp( mode, "simple-red" ) == 0)
1782 p = (char*) "transparent.png";
1783 else /* transparent */
1784 p = (char*) "transparent.png";
1785 snprintf( file, sizeof(file), "%s/%s", imagesDirectory, p );
1786 ifd = open( file, O_RDONLY );
1787 if (ifd < 0)
1788 {
1789 ufdbLogError( "cannot open image file %s: %s *****", file, strerror(errno) );
1790 contentLength = 0;
1791 }
1792 else
1793 {
1794 contentLength = read( ifd, content, sizeof(content)-1 );
1795 if (contentLength < 0)
1796 ufdbLogError( "cannot read image file %s: %s *****", file, strerror(errno) );
1797 close( ifd );
1798 }
1799 }
1800 else /* send bitmap; category is NOT "ads" */
1801 {
1802 const char * langStr;
1803
1804 switch (lang)
1805 {
1806 case LANG_IND_NL: langStr = "nl"; break;
1807 case LANG_IND_DE: langStr = "de"; break;
1808 case LANG_IND_PL: langStr = "pl"; break;
1809 case LANG_IND_IT: langStr = "it"; break;
1810 case LANG_IND_ES: langStr = "es"; break;
1811 case LANG_IND_PT: langStr = "pt"; break;
1812 case LANG_IND_FR: langStr = "fr"; break;
1813 case LANG_IND_TR: langStr = "tr"; break;
1814 case LANG_IND_SV: langStr = "sv"; break;
1815 case LANG_IND_EN:
1816 default: langStr = "en";
1817 }
1818 if (strcmp( mode, "cross" ) == 0)
1819 snprintf( file, sizeof(file), "%s/smallcross.png", imagesDirectory );
1820 else if (strcmp( mode, "square" ) == 0)
1821 snprintf( file, sizeof(file), "%s/square.png", imagesDirectory );
1822 else if (strcmp( mode, "simple-red" ) == 0)
1823 snprintf( file, sizeof(file), "%s/transparent.png", imagesDirectory );
1824 else if (strcmp( mode, "transparent" ) == 0 || strcmp( mode, "transparant" ) == 0)
1825 snprintf( file, sizeof(file), "%s/transparent.png", imagesDirectory );
1826 else
1827 snprintf( file, sizeof(file), "%s/forbidden-normal-%s.png", imagesDirectory, langStr );
1828 ifd = open( file, O_RDONLY );
1829 if (ifd < 0)
1830 {
1831 ufdbLogError( "cannot open image file %s: %s *****", file, strerror(errno) );
1832 contentLength = 0;
1833 }
1834 else
1835 {
1836 contentLength = read( ifd, content, sizeof(content)-1 );
1837 if (contentLength < 0)
1838 ufdbLogError( "cannot read image file %s: %s *****", file, strerror(errno) );
1839 close( ifd );
1840 }
1841 }
1842 }
1843 else if (contentType == CONTENTTYPE_JAVA)
1844 {
1845 /*
1846 * send empty reply for URLs that have one of the following suffixes:
1847 * js, jar
1848 */
1849 headerLength = snprintf( header, sizeof(header),
1850 "HTTP/1.0 200 OK\r\n"
1851 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
1852 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
1853 "Cache-Control: max-age=180\r\n"
1854 "Connection: close\r\n"
1855 "Content-Type: text/javascript\r\n"
1856 "\r\n",
1857 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
1858 t.tm_mday,
1859 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
1860 t.tm_year + 1900,
1861 t.tm_hour, t.tm_min, t.tm_sec
1862 );
1863 content[0] = '\0';
1864 contentLength = 0;
1865 }
1866 else if (contentType == CONTENTTYPE_STREAM)
1867 {
1868 const char * mimeType;
1869
1870 mimeType = "application/octet-stream";
1871 content[0] = '\0';
1872 contentLength = 0;
1873
1874 if (ufdbGV.debug)
1875 ufdbLogMessage( "blocking stream for %s: suffix: \"%s\" ", url, suffix==NULL?"null":suffix );
1876
1877 if (suffix != NULL)
1878 {
1879 if (defaultFLVsize > 0 &&
1880 (strmatch3( suffix, "flv" ) ||
1881 strmatch3( suffix, "f4v" ) ||
1882 strmatch3( suffix, "f4a" ) ||
1883 strmatch3( suffix, "f4b" ) ||
1884 strmatch3( suffix, "f4p" ) ||
1885 strmatch3( suffix, "swf" )))
1886 {
1887 mimeType = "video/x-flv";
1888 memcpy( content, defaultFLVcontent, defaultFLVsize );
1889 contentLength = defaultFLVsize;
1890 }
1891 else
1892 if (defaultMP3size > 0 &&
1893 (strmatch3( suffix, "mp3" ) ||
1894 strmatch3( suffix, "mp2" )))
1895 {
1896 mimeType = "audio/mp3";
1897 memcpy( content, defaultMP3content, defaultMP3size );
1898 contentLength = defaultMP3size;
1899 }
1900 else
1901 if (defaultMP3size > 0 &&
1902 (strmatch3( suffix, "mp4" ) ||
1903 strmatch4( suffix, "mpeg" ) ||
1904 strmatch3( suffix, "mpe" ) ||
1905 strmatch3( suffix, "mpg" ) ||
1906 strmatch3( suffix, "mpa" ) ||
1907 strmatch4( suffix, "mpg4" )))
1908 {
1909 mimeType = "video/mpeg";
1910 memcpy( content, defaultMPGcontent, defaultMPGsize );
1911 contentLength = defaultMPGsize;
1912 }
1913 else
1914 if (defaultWMVsize > 0 &&
1915 (strmatch3( suffix, "wmv" ) ||
1916 strmatch3( suffix, "asf" ) ||
1917 strmatch3( suffix, "wvx" )))
1918 {
1919 mimeType = "video/x-ms-wmv";
1920 memcpy( content, defaultWMVcontent, defaultWMVsize );
1921 contentLength = defaultWMVsize;
1922 }
1923 }
1924
1925 /*
1926 * send empty reply for URLs that have one of the following suffixes:
1927 * rar, tar, zip, ...
1928 */
1929 headerLength = snprintf( header, sizeof(header),
1930 "HTTP/1.0 200 OK\r\n"
1931 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
1932 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
1933 "Cache-Control: max-age=180\r\n"
1934 "Connection: close\r\n"
1935 "Content-Type: %s\r\n"
1936 "Content-Length: %d\r\n"
1937 "\r\n",
1938 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
1939 t.tm_mday,
1940 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
1941 t.tm_year + 1900,
1942 t.tm_hour, t.tm_min, t.tm_sec,
1943 mimeType,
1944 contentLength
1945 );
1946 }
1947 else if (contentType == CONTENTTYPE_JSON)
1948 {
1949 /*
1950 * send empty reply for URLs that have one of the following suffixes:
1951 * json
1952 */
1953 headerLength = snprintf( header, sizeof(header),
1954 "HTTP/1.0 200 OK\r\n"
1955 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
1956 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
1957 "Cache-Control: max-age=180\r\n"
1958 "Connection: close\r\n"
1959 "Content-Type: application/json\r\n"
1960 "\r\n",
1961 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
1962 t.tm_mday,
1963 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
1964 t.tm_year + 1900,
1965 t.tm_hour, t.tm_min, t.tm_sec
1966 );
1967 content[0] = '\0';
1968 contentLength = 0;
1969 }
1970 else if (contentType == CONTENTTYPE_CSS)
1971 {
1972 /*
1973 * send empty reply for URLs that have one of the following suffixes:
1974 * css
1975 */
1976 headerLength = snprintf( header, sizeof(header),
1977 "HTTP/1.0 200 OK\r\n"
1978 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
1979 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
1980 "Cache-Control: max-age=180\r\n"
1981 "Connection: close\r\n"
1982 "Content-Type: text/css\r\n"
1983 "\r\n",
1984 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
1985 t.tm_mday,
1986 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
1987 t.tm_year + 1900,
1988 t.tm_hour, t.tm_min, t.tm_sec
1989 );
1990 strcpy( content, "\r\n" );
1991 contentLength = 2;
1992 }
1993 else if (contentType == CONTENTTYPE_TEXT)
1994 {
1995 /*
1996 * send empty reply for URLs that have one of the following suffixes:
1997 * txt
1998 */
1999 headerLength = snprintf( header, sizeof(header),
2000 "HTTP/1.0 200 OK\r\n"
2001 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
2002 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
2003 "Cache-Control: max-age=180\r\n"
2004 "Connection: close\r\n"
2005 "Content-Type: text/plain\r\n"
2006 "\r\n",
2007 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
2008 t.tm_mday,
2009 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
2010 t.tm_year + 1900,
2011 t.tm_hour, t.tm_min, t.tm_sec
2012 );
2013 content[0] = '\0';
2014 contentLength = 0;
2015 }
2016 else if (contentType == CONTENTTYPE_XML)
2017 {
2018 headerLength = snprintf( header, sizeof(header),
2019 "HTTP/1.0 200 OK\r\n"
2020 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
2021 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
2022 "Cache-Control: max-age=180\r\n"
2023 "Content-Type: text/xml\r\n"
2024 "Connection: close\r\n"
2025 "\r\n",
2026 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
2027 t.tm_mday,
2028 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
2029 t.tm_year + 1900,
2030 t.tm_hour, t.tm_min, t.tm_sec
2031 );
2032
2033 if (strstr( url, "/crossdomain.xml" ) != NULL ||
2034 strstr( url, "/clientaccesspolicy.xml" ) != NULL)
2035 {
2036 strcpy( content, "<?xml version=\"1.0\"?>\r\n"
2037 "<cross-domain-policy>\r\n"
2038 " <allow-access-from domain=\"*\" secure=\"false\" />\r\n"
2039 " <allow-http-request-headers-from domain=\"*\" headers=\"*\" secure=\"false\"/>\r\n"
2040 " <site-control permitted-cross-domain-policies=\"all\"/>\r\n"
2041 "</cross-domain-policy>\r\n" );
2042 }
2043 else
2044 {
2045 strcpy( content, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n"
2046 "<ufdbguardd>\r\n"
2047 " <dummy value=\"0\" />\r\n"
2048 "</ufdbguardd>\r\n" );
2049 }
2050 contentLength = strlen( content );
2051 }
2052 else if (contentType == CONTENTTYPE_204)
2053 {
2054 /*
2055 * send a HTML 204 message
2056 */
2057 headerLength = snprintf( header, sizeof(header),
2058 "HTTP/1.0 204 No Content\r\n"
2059 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
2060 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
2061 "Cache-Control: max-age=180\r\n"
2062 "Connection: close\r\n"
2063 "Content-Type: text/plain\r\n"
2064 "X-blocked-category: %s\r\n"
2065 "X-blocked-URL: %s\r\n"
2066 "\r\n",
2067 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
2068 t.tm_mday,
2069 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
2070 t.tm_year + 1900,
2071 t.tm_hour, t.tm_min, t.tm_sec,
2072 category,
2073 url
2074 );
2075 content[0] = '\0';
2076 contentLength = 0;
2077 }
2078 else /********************************************** contentType == CONTENTTYPE_HTML */
2079 {
2080 headerLength = snprintf( header, sizeof(header),
2081 "HTTP/1.0 200 OK\r\n"
2082 "Date: %3.3s, %02d %3.3s %4d %02d:%02d:%02d GMT\r\n"
2083 "Server: ufdbhttpd/" UFDB_VERSION "\r\n"
2084 "Cache-Control: max-age=180\r\n"
2085 "Connection: close\r\n"
2086 "Content-Type: text/html\r\n"
2087 "\r\n",
2088 &"SunMonTueWedThuFriSat"[t.tm_wday*3],
2089 t.tm_mday,
2090 &"JanFebMarAprMayJunJulAugSepOctNovDec"[t.tm_mon*3],
2091 t.tm_year + 1900,
2092 t.tm_hour, t.tm_min, t.tm_sec
2093 );
2094
2095 if (strcmp( category, "fatal-error" ) == 0) /* HTML, fatal-error */
2096 {
2097 strcpy( text, _fatal_error_text );
2098 contentLength = snprintf( content, sizeof(content),
2099 "<html>\r\n"
2100 "<head>\r\n"
2101 "<title>%s</title>\r\n"
2102 "</head>\r\n"
2103 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"%s\" text=\"%s\">\r\n"
2104 "%s\r\n"
2105 "</body>\r\n"
2106 HTML_COMMENT
2107 "</html>\r\n"
2108 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2109 ,
2110 _title[lang],
2111 bgcolor, textcolor,
2112 text,
2113 lang, color, mode, category, contentType2String(contentType), url );
2114 }
2115 else if (strcmp( category, "loading-database" ) == 0) /* HTML, loading-database */
2116 {
2117 strcpy( text, _loading_database_text );
2118 contentLength = snprintf( content, sizeof(content),
2119 "<html>\r\n"
2120 "<head>\r\n"
2121 "<title>%s</title>\r\n"
2122 "</head>\r\n"
2123 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"%s\" text=\"%s\">\r\n"
2124 "%s\r\n"
2125 "</body>\r\n"
2126 HTML_COMMENT
2127 "</html>\r\n"
2128 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2129 ,
2130 _title[lang],
2131 bgcolor, textcolor,
2132 text,
2133 lang, color, mode, category, contentType2String(contentType), url );
2134 }
2135 else /* HTML */
2136 {
2137 sprintf( whyblocked, "blocked by ufdbGuard: url=%s", url );
2138 if (category[0] != '\0')
2139 {
2140 strcat( whyblocked, " category=" );
2141 strcat( whyblocked, category );
2142 }
2143 if (source[0] != '\0')
2144 {
2145 strcat( whyblocked, " source=" );
2146 strcat( whyblocked, source );
2147 }
2148
2149 /*
2150 * send ads-specific reply for the ads category.
2151 */
2152 if (strcmp( category, "ads" ) == 0 || /* HTML, ADS */
2153 strcmp( category, "always-block" ) == 0 ||
2154 strcmp( category, "alwaysblock" ) == 0 )
2155 {
2156 if (strcmp( mode, "noads" ) == 0)
2157 snprintf( text, sizeof(text), " <a title=\"%s\" target=\"_blank\">no ads</a> ", whyblocked );
2158 else if (strcmp( mode, "square" ) == 0)
2159 snprintf( text, sizeof(text), " <a title=\"%s\" target=\"_blank\">[]</a> ", whyblocked );
2160 else if (strcmp( mode, "cross" ) == 0)
2161 snprintf( text, sizeof(text), " <a title=\"%s\" target=\"_blank\">x</a> ", whyblocked );
2162 else if (strcmp( mode, "simple-red" ) == 0)
2163 snprintf( text, sizeof(text), " <font color=red><i><a title=\"%s\" target=\"_blank\">%s</a></i></font> ",
2164 whyblocked, category );
2165 else if (strcmp( mode, "transparent" ) == 0 || strcmp( mode, "transparant" ) == 0)
2166 strcpy( text, " " );
2167 else /* default */
2168 {
2169 snprintf( text, sizeof(text), " <font color=\"%s\"><i><a title=\"%s\" target=\"_blank\">%s %s%s</a></i></font> ",
2170 textcolor, whyblocked, _explain_1[lang], category, _explain_2[lang] );
2171 }
2172
2173 if (strcmp( mode, "transparent" ) == 0 || strcmp( mode, "transparant" ) == 0)
2174 {
2175 /* no bgcolor and font color */
2176 contentLength = snprintf( content, sizeof(content),
2177 "<html>\r\n"
2178 "<head>\r\n"
2179 "<title>%s</title>\r\n"
2180 "</head>\r\n"
2181 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1>\r\n"
2182 "<center>%s</center>\r\n"
2183 HTML_COMMENT
2184 "</body>\r\n"
2185 "</html>\r\n"
2186 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2187 ,
2188 _title[lang],
2189 text,
2190 lang, color, mode, category, contentType2String(contentType), url );
2191 }
2192 else /* HTML, ADS, NOT transparent */
2193 {
2194 char infotext[1024];
2195
2196 if (strcmp( mode, "simple-red" ) == 0)
2197 {
2198 infotext[0] = '\0';
2199 bgcolor = "ffcccc";
2200 textcolor = "red";
2201 }
2202 else
2203 {
2204 int n;
2205 n = 0;
2206 infotext[n] = '\0';
2207 if (source[0] != '\0' && strcmp( source, "unknown" ) != 0)
2208 n += sprintf( &infotext[n], "source=%s ", source );
2209 if (clientuser[0] != '\0' && strcmp( clientuser, "unknown" ) != 0)
2210 n += sprintf( &infotext[n], "user=%s ", clientuser );
2211 if (clientaddr[0] != '\0' && strcmp( clientaddr, "unknown" ) != 0)
2212 n += sprintf( &infotext[n], "client=%s ", clientaddr );
2213 if (clientname[0] != '\0')
2214 n += sprintf( &infotext[n], "clientname=%s ", clientname );
2215 }
2216
2217 contentLength = snprintf( content, sizeof(content),
2218 "<html>\r\n"
2219 "<head>\r\n"
2220 "<title>%s</title>\r\n"
2221 "</head>\r\n"
2222 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"%s\" text=\"%s\">\r\n"
2223 "<center>\r\n"
2224 "<font size=\"%s\">%s</font>\r\n"
2225 "<br>\r\n <p />\r\n"
2226 "<font size=\"-3\">%s</font>\r\n"
2227 "</center>\r\n"
2228 "</body>\r\n"
2229 HTML_COMMENT
2230 "</html>\r\n"
2231 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2232 ,
2233 _title[lang],
2234 bgcolor, textcolor,
2235 textsize, text,
2236 infotext,
2237 lang, color, mode, category, contentType2String(contentType), url );
2238 }
2239 }
2240 else if (strcmp( category, "social-badges" ) == 0 || /* HTML, SOCIAL BADGE */
2241 strcmp( category, "social_badges" ) == 0)
2242 {
2243 contentLength = snprintf( content, sizeof(content),
2244 "<html>\r\n"
2245 "<head>\r\n"
2246 "<title>block social networking badge</title>\r\n"
2247 "</head>\r\n"
2248 "<body bgcolor=\"#fafafa\">\r\n"
2249 "<center>\r\n"
2250 "<font size=\"-1\" color=\"#1f1f1f\">"
2251 "<a title=\"The social networking badge is blocked.\"> B </a>"
2252 "</font>\r\n"
2253 "</center>\r\n"
2254 HTML_COMMENT
2255 "</html>\r\n"
2256 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2257 "</body>\r\n"
2258 ,
2259 lang, color, mode, category, contentType2String(contentType), url );
2260
2261 }
2262 else /* HTML, all other categories */
2263 {
2264 if (strcmp( mode, "transparent" ) == 0 || strcmp( mode, "transparant" ) == 0)
2265 {
2266 contentLength = snprintf( content, sizeof(content),
2267 "<html>\r\n"
2268 "<head>\r\n"
2269 "<title>%s</title>\r\n"
2270 "</head>\r\n"
2271 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1>\r\n"
2272 "<center>\r\n"
2273 "<i>%s</i><p />\r\n"
2274 "<font size=\"%s\">\r\n"
2275 "%s <i>%s</i>%s <br>\r\n"
2276 "URL: <tt>%s</tt> <br>\r\n"
2277 "<br>\r\n"
2278 "<a href=\"javascript:history.go(-1);\">%s</a>. <br>\r\n"
2279 "<br>\r\n"
2280 "%s?%s%s\r\n"
2281 "</center>\r\n"
2282 "</font>\r\n"
2283 "</body>\r\n"
2284 HTML_COMMENT
2285 "</html>\r\n"
2286 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2287 ,
2288 _title[lang],
2289 _forbidden[lang],
2290 textsize,
2291 _explain_1[lang], category, _explain_2[lang],
2292 url,
2293 _goBack[lang],
2294 _moreInfo1[lang], moreInfoParams, _moreInfo2[lang],
2295 lang, color, mode, category, contentType2String(contentType), url );
2296 }
2297 else if (strcmp( mode, "simple-red" ) == 0) /* HTML, NO ADS, simple-red */
2298 {
2299 contentLength = snprintf( content, sizeof(content),
2300 "<html>\r\n"
2301 "<head>\r\n"
2302 "<title>%s</title>\r\n"
2303 "</head>\r\n"
2304 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"ffcccc\" link=\"red\" alink=\"red\" vlink=\"red\" text=\"red\">\r\n"
2305 "<center>\r\n"
2306 "<a href=\"%s?%s\" title=\"%s\" target=\"_blank\"> "
2307 "%s<br>\r\n"
2308 "<i>%s</i></a>\r\n"
2309 "</center>\r\n"
2310 "</body>\r\n"
2311 HTML_COMMENT
2312 "</html>\r\n"
2313 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2314 ,
2315 _title[lang],
2316 UFDB_EXPLAIN_DENY_REASON_URL, moreInfoParams, whyblocked,
2317 _forbidden[lang],
2318 category,
2319 lang, color, mode, category, contentType2String(contentType), url );
2320 }
2321 else /* HTML, NO ADS, MODE NOT transparent/simple-red */
2322 {
2323 contentLength = snprintf( content, sizeof(content),
2324 "<html>\r\n"
2325 "<head>\r\n"
2326 "<title>%s</title>\r\n"
2327 "</head>\r\n"
2328 "<body topmargin=1 leftmargin=1 marginheight=1 marginwidth=1 bgcolor=\"%s\" link=\"%s\" alink=\"%s\" vlink=\"%s\" text=\"%s\">\r\n"
2329 "<center>\r\n"
2330 "<a href=\"%s?%s\" title=\"%s\" target=\"_blank\"> "
2331 "<font size=\"%s\">%s</font></a><p />\r\n"
2332 "<font size=\"%s\">\r\n"
2333 "%s <i>%s</i>%s</a><br>\r\n"
2334 "URL: <tt>%s</tt> <br>\r\n"
2335 "<p>\r\n"
2336 "<a title=\"%s\" href=\"javascript:history.go(-1);\">%s</a>. <br>\r\n"
2337 "%s\r\n"
2338 "<p>\r\n"
2339 "%s?%s%s\r\n"
2340 "</font>\r\n"
2341 "</center>\r\n"
2342 "</body>\r\n"
2343 HTML_COMMENT
2344 "</html>\r\n"
2345 "<!-- lang:%d %s %s %s %s %s -->\r\n"
2346 ,
2347 _title[lang],
2348 bgcolor, textcolor, textcolor, textcolor, textcolor, /* body */
2349 UFDB_EXPLAIN_DENY_REASON_URL, moreInfoParams, whyblocked,
2350 titlesize, _forbidden[lang], /* font */
2351 textsize, /* font */
2352 _explain_1[lang], category, _explain_2[lang],
2353 url,
2354 whyblocked, _goBack[lang],
2355 admin,
2356 _moreInfo1[lang], moreInfoParams, _moreInfo2[lang],
2357 lang, color, mode, category, contentType2String(contentType), url );
2358 }
2359 }
2360 }
2361 }
2362
2363 #if 0
2364 if (ufdbGV.debug)
2365 ufdbLogMessage( "AnswerHttpUrlBlocked: strlen(header): %d contentLength: %d", headerLength, contentLength );
2366 #endif
2367
2368 writeBuffer( fd, header, headerLength );
2369 if (contentLength > 0)
2370 writeBuffer( fd, content, contentLength );
2371 }
2372
2373
FindLanguageIndex(char * language)2374 static int FindLanguageIndex( char * language )
2375 {
2376 if (strmatch2( language, "en" ))
2377 return LANG_IND_EN;
2378 else if (strmatch2( language, "nl" ))
2379 return LANG_IND_NL;
2380 else if (strmatch2( language, "de" ))
2381 return LANG_IND_DE;
2382 else if (strmatch2( language, "pl" ))
2383 return LANG_IND_PL;
2384 else if (strmatch2( language, "it" ))
2385 return LANG_IND_IT;
2386 else if (strmatch2( language, "pt" ))
2387 return LANG_IND_PT;
2388 else if (strmatch2( language, "fr" ))
2389 return LANG_IND_FR;
2390 else if (strmatch2( language, "tr" ))
2391 return LANG_IND_TR;
2392 else if (strmatch2( language, "sv" ))
2393 return LANG_IND_SV;
2394 else
2395 return -1;
2396 }
2397
2398
FindLanguage(char * headers)2399 static int FindLanguage( char * headers )
2400 {
2401 char * l;
2402 char * end;
2403 int ind;
2404 int length;
2405 char language[64];
2406
2407 l = strstr( headers, "Accept-Language:" );
2408 if (l == NULL)
2409 return LANG_IND_EN;
2410 l += sizeof("Accept-Language:") - 1;
2411
2412 while (*l != '\0')
2413 {
2414 while (isspace(*l))
2415 l++;
2416 end = l;
2417 while (isalpha(*end))
2418 end++;
2419 length = end - l;
2420 if (length > 63)
2421 length = 63;
2422 strncpy( language, l, length );
2423 language[length] = '\0';
2424 ind = FindLanguageIndex( language );
2425 if (ind >= 0)
2426 return ind;
2427 /* try the next language */
2428 l = end + 1;
2429 while (!isalpha(*l) && *l != '\0')
2430 {
2431 if (*l == '\r' || *l == '\n')
2432 return LANG_IND_EN;
2433 l++;
2434 }
2435 }
2436 return LANG_IND_EN;
2437 }
2438
2439
2440 /* A typical request looks like this:
2441 *
2442 * GET /cgi-bin/URLblocked?mode=normal&... HTTP/1.1
2443 * Host: www.myserver.com
2444 * Accept: text/html
2445 * Accept-Encoding: compress
2446 * Connection: Keep-Alive
2447 *
2448 * OR
2449 *
2450 * GET /cgi-bin/URLblocked?mode=normal&... HTTP/1.0
2451 * User-Agent: Wget/1.8.2
2452 * Accept: text/html
2453 * Accept-Encoding: compress
2454 * Connection: Keep-Alive
2455 *
2456 */
ServeHttpClient(int fd,const char * imagesDirectory)2457 static void ServeHttpClient(
2458 int fd,
2459 const char * imagesDirectory )
2460 {
2461 int start, maxbytes;
2462 int nbytes;
2463 int ntrials;
2464 time_t start_time, now;
2465 char * p;
2466 char * command;
2467 char * reqptr;
2468 char request[16384];
2469
2470 start_time = time( NULL );
2471 if (ufdbGV.debug)
2472 ufdbLogMessage( "ServeHttpClient t %3ld fd %d", (long) (start_time%1000), fd );
2473
2474 ntrials = 20;
2475 start = 0;
2476
2477 try_again:
2478 errno = 0;
2479 maxbytes = 16380 - start;
2480 nbytes = read( fd, &request[start], maxbytes );
2481 now = time( NULL );
2482 if (ufdbGV.debug)
2483 ufdbLogMessage( "ServeHttpClient t %3ld fd %d read %d bytes", (long) (now%1000), fd, nbytes );
2484 if (nbytes < 0)
2485 {
2486 if (now - start_time >= 4)
2487 {
2488 ufdbLogError( "ServeHttpClient: timeout with %d bytes received", start );
2489 AnswerHttpTimeout( fd );
2490 return;
2491 }
2492 if (errno == EINTR || errno == EAGAIN)
2493 goto try_again;
2494 else
2495 {
2496 ufdbLogError( "ServeHttpClient: unrecoverable error: %s", strerror(errno) );
2497 AnswerHttpEmpty( fd, "unknown" );
2498 return;
2499 }
2500 }
2501 start += nbytes;
2502 request[start] = '\0';
2503 if (strstr( request, "\r\n\r\n" ) == NULL)
2504 {
2505 if (--ntrials > 0)
2506 {
2507 if (now - start_time >= 4)
2508 {
2509 ufdbLogError( "ServeHttpClient: timeout with %d bytes received", start );
2510 AnswerHttpTimeout( fd );
2511 return;
2512 }
2513 usleep( 5001 );
2514 goto try_again;
2515 }
2516 else
2517 {
2518 ufdbLogError( "ServeHttpClient: did not get a whole HTTP request within the time limit\n"
2519 "I got: <<%s>>",
2520 request );
2521 AnswerHttpEmpty( fd, "unknown" );
2522 return;
2523 }
2524 }
2525
2526 if (ufdbGV.debugHttpd || ufdbGV.debug)
2527 ufdbLogMessage( "ServeHttpClient: new request:\n%s", request );
2528
2529 /* We got the full header. Now we can parse it and send an answer. */
2530 p = strtok_r( request, " \t", &reqptr );
2531 if (p == NULL ||
2532 (strcmp( p, "GET" ) != 0 && strcmp( p, "POST" ) != 0 && strcmp( p, "HEAD" ) != 0))
2533 {
2534 ufdbLogError( "ServeHttpClient: we got an unsupported message that is not a http GET/HEAD/POST but '%s'",
2535 p==NULL ? "NULL" : p );
2536 AnswerHttpNotFound( fd, p==NULL ? "NULL" : p );
2537 return;
2538 }
2539 command = p;
2540
2541 p = strtok_r( NULL, "? \t", &reqptr );
2542 if (p == NULL)
2543 {
2544 ufdbLogError( "ServeHttpClient: received GET/HEAD/POST command without URL" );
2545 AnswerHttpNotFound( fd, "NULL" );
2546 return;
2547 }
2548
2549 /* p points to a URL,
2550 * HTTP/1.0 has http://hostname/path
2551 * HTTP/1.1 has /path
2552 * and we only want "/path".
2553 */
2554 if (strncasecmp( p, "http://", 7 ) == 0)
2555 {
2556 char * p7;
2557
2558 p7 = strchr( p+7, '/' );
2559 if (p7 == NULL)
2560 {
2561 ufdbLogError( "ServeHttpClient: unsupported URL for GET/POST: %s", p );
2562 AnswerHttpEmpty( fd, p );
2563 return;
2564 }
2565 p = p7;
2566 }
2567 else if (strncasecmp( p, "https://", 8 ) == 0)
2568 {
2569 char * p8;
2570
2571 p8 = strchr( p+8, '/' );
2572 if (p8 == NULL)
2573 {
2574 ufdbLogError( "ServeHttpClient: unsupported URL for GET/POST: %s", p );
2575 AnswerHttpEmpty( fd, p );
2576 return;
2577 }
2578 p = p8;
2579 }
2580 else if (strncasecmp( p, "ftp://", 6 ) == 0)
2581 {
2582 char * p6;
2583
2584 p6 = strchr( p+6, '/' );
2585 if (p6 == NULL)
2586 {
2587 ufdbLogError( "ServeHttpClient: unsupported URL for GET/POST: %s", p );
2588 AnswerHttpEmpty( fd, p );
2589 return;
2590 }
2591 p = p6;
2592 }
2593
2594 if (ufdbGV.debug)
2595 ufdbLogMessage( "ServeHttpClient: command %s p %s", command, p );
2596
2597 if (strcmp( command, "HEAD" ) == 0)
2598 {
2599 AnswerHttpHead( fd, p );
2600 return;
2601 }
2602 if (strcmp( command, "POST" ) == 0)
2603 {
2604 AnswerHttpPost( fd, p );
2605 return;
2606 }
2607
2608 if (strcmp( p, "/cgi-bin/URLblocked.cgi" ) == 0)
2609 {
2610 int lang;
2611
2612 p = strtok_r( NULL, " \t", &reqptr );
2613 lang = FindLanguage( reqptr );
2614 AnswerHttpUrlBlocked( fd, lang, p, imagesDirectory );
2615 }
2616 else
2617 if (strcmp( p, "/crossdomain.xml" ) == 0 ||
2618 strcmp( p, "/clientaccesspolicy.xml" ) == 0)
2619 {
2620 AnswerHttpCrossdomain( fd );
2621 }
2622 else
2623 if (strcmp( p, "/favicon.ico" ) == 0 ||
2624 strcmp( p, "/robots.txt" ) == 0)
2625 {
2626 AnswerHttpNotFound( fd, p );
2627 }
2628 else
2629 {
2630 ufdbLogError( "ServeHttpClient: unsupported URL for GET/POST: \"%s\"", p );
2631 AnswerHttpEmpty( fd, p );
2632 }
2633 }
2634
2635
ServeHttpConnections(int s,const char * imagesDirectory)2636 static void ServeHttpConnections(
2637 int s,
2638 const char * imagesDirectory )
2639 {
2640 int newfd;
2641 int n;
2642 fd_set fds;
2643 struct timeval tv;
2644
2645 while (1)
2646 {
2647 FD_ZERO( &fds );
2648 FD_SET( s, &fds );
2649 tv.tv_sec = 0;
2650 tv.tv_usec = 750000;
2651 errno = 0;
2652 /* select() is used to enable signals to be received by this (non-threaded) process */
2653 n = select( s+1, &fds, (fd_set *) NULL, (fd_set *) NULL, &tv );
2654 #if 0
2655 ufdbLogMessage( " select returns %d errno is %d", n, errno );
2656 #endif
2657 if (n < 0 && errno == EINTR)
2658 {
2659 ufdbLogError( "signal received. exiting..." );
2660 removeHttpdPidFile();
2661 exit( 0 );
2662 }
2663 if (n == 0) /* timeout */
2664 continue;
2665 newfd = accept( s, NULL, NULL );
2666 if (newfd < 0)
2667 {
2668 if (errno == EINTR)
2669 continue;
2670 if (errno == EAGAIN || errno == EWOULDBLOCK)
2671 continue;
2672 ufdbLogError( "SimulateHttpServer: \"accept\" returns error: %s", strerror(errno) );
2673 continue;
2674 }
2675
2676 tv.tv_sec = 3;
2677 tv.tv_usec = 0;
2678 setsockopt( newfd, SOL_SOCKET, SO_RCVTIMEO, (void *) &tv, sizeof(tv) );
2679 tv.tv_sec = 3;
2680 tv.tv_usec = 0;
2681 setsockopt( newfd, SOL_SOCKET, SO_SNDTIMEO, (void *) &tv, sizeof(tv) );
2682 #if 0
2683 int sock_parm;
2684 sock_parm = 16384;
2685 setsockopt( newfd, SOL_SOCKET, SO_SNDBUF, (void *) &sock_parm, sizeof(sock_parm) );
2686 sock_parm = 16384;
2687 setsockopt( newfd, SOL_SOCKET, SO_RCVBUF, (void *) &sock_parm, sizeof(sock_parm) );
2688 #endif
2689
2690 ServeHttpClient( newfd, imagesDirectory );
2691 close( newfd );
2692 }
2693 }
2694
2695
2696 #ifndef HAVE_INET_ATON
2697
inet_aton(const char * cp,struct in_addr * inp)2698 int inet_aton(
2699 const char * cp,
2700 struct in_addr * inp )
2701 {
2702 unsigned int byte_result = 0;
2703 unsigned long result = 0;
2704 char c = '.'; /* mark c to indicate invalid IP in case length is 0 */
2705 int dot_count = 0;
2706
2707 if (cp == NULL)
2708 return 0;
2709
2710 while (*cp != '\0')
2711 {
2712 int digit;
2713
2714 c = *cp++;
2715 digit = (int) (c - '0');
2716 if (digit >= 0 && digit <= 9)
2717 {
2718 byte_result = byte_result * 10 + digit;
2719 if (byte_result > 255)
2720 return 0;
2721 }
2722 else if (c == '.')
2723 {
2724 dot_count++;
2725 result = (result << 8) + (unsigned long) byte_result;
2726 byte_result = 0;
2727 }
2728 else
2729 return 0;
2730 }
2731
2732 if (c != '.') /* IP number can't end on '.' */
2733 {
2734 /*
2735 Handle short-forms addresses according to standard. Examples:
2736 127 -> 0.0.0.127
2737 127.1 -> 127.0.0.1
2738 127.2.1 -> 127.2.0.1
2739 */
2740 switch (dot_count) {
2741 case 1: result <<= 8; /* Fall through */
2742 case 2: result <<= 8; /* Fall through */
2743 }
2744 inp->s_addr = (result << 8) + (unsigned long) byte_result;
2745 return 1;
2746 }
2747
2748 return 0;
2749 }
2750
2751 #endif
2752
2753
ufdbSimulateHttpServer(const char * interface,int port,const char * username,const char * imagesDirectory)2754 void ufdbSimulateHttpServer(
2755 const char * interface,
2756 int port,
2757 const char * username,
2758 const char * imagesDirectory )
2759 {
2760 int s;
2761 int sock_parm;
2762 struct sockaddr_in addr;
2763 struct timeval tv;
2764
2765 errno = 0;
2766 s = socket( AF_INET, SOCK_STREAM, 0 );
2767 if (s < 0)
2768 {
2769 ufdbLogError( "SimulateHttpServer: cannot create socket: %s", strerror(errno) );
2770 return;
2771 }
2772
2773 addr.sin_family = AF_INET;
2774 addr.sin_port = htons( port );
2775 if (interface == NULL || strcmp(interface,"all")==0)
2776 addr.sin_addr.s_addr = htonl( INADDR_ANY );
2777 else
2778 {
2779 struct in_addr iaddr;
2780 if (inet_pton( AF_INET, interface, &iaddr ) == 0)
2781 {
2782 addr.sin_addr.s_addr = htonl( INADDR_ANY );
2783 ufdbLogError( "interface parameter '%s' is invalid. I will listen on port %d on ALL interfaces.",
2784 interface, port );
2785 }
2786 else
2787 addr.sin_addr.s_addr = iaddr.s_addr;
2788 }
2789
2790 /*
2791 * Allow server-side addresses to be reused (don't have to wait for timeout).
2792 */
2793 sock_parm = 1;
2794 setsockopt( s, SOL_SOCKET, SO_REUSEADDR, (void *) &sock_parm, sizeof(sock_parm) );
2795
2796 /*
2797 * This http server has very little data to receive and send...
2798 */
2799 sock_parm = 12 * 1024;
2800 setsockopt( s, SOL_SOCKET, SO_SNDBUF, (void *) &sock_parm, sizeof(sock_parm) );
2801 sock_parm = 12 * 1024;
2802 setsockopt( s, SOL_SOCKET, SO_RCVBUF, (void *) &sock_parm, sizeof(sock_parm) );
2803
2804 tv.tv_sec = 6;
2805 tv.tv_usec = 0;
2806 setsockopt( s, SOL_SOCKET, SO_RCVTIMEO, (void *) &tv, sizeof(tv) );
2807 tv.tv_sec = 6;
2808 tv.tv_usec = 0;
2809 setsockopt( s, SOL_SOCKET, SO_SNDTIMEO, (void *) &tv, sizeof(tv) );
2810
2811 /* with anti-aliasing warnings ON, connect/bind cause compiler warning which we may ignore */
2812 if (bind( s, (struct sockaddr *) &addr, sizeof(addr) ) < 0)
2813 {
2814 ufdbLogError( "SimulateHttpServer: cannot bind socket: %s\n"
2815 "Check for other processes using port %d uid=%d euid=%d",
2816 strerror(errno), port, getuid(), geteuid() );
2817 close( s );
2818 return;
2819 }
2820
2821 /* Now that the socket is bound, we can drop root privileges */
2822 if (username != NULL && username[0] != '\0')
2823 {
2824 UFDBdropPrivileges( username );
2825 }
2826
2827 writeHttpdPidFile();
2828 atexit( removeHttpdPidFile );
2829
2830 /*
2831 * According to comment in the Apache httpd source code, these socket
2832 * options should only be set after a successful bind....
2833 */
2834 sock_parm = 1;
2835 setsockopt( s, SOL_SOCKET, SO_KEEPALIVE, (void *) &sock_parm, sizeof(sock_parm) );
2836
2837 #ifdef TCP_FASTOPEN
2838 /* change the socket options to TCO_FASTOPEN */
2839 sock_parm = 256;
2840 setsockopt( s, SOL_TCP, TCP_FASTOPEN, (void *) &sock_parm, sizeof(sock_parm) );
2841 #endif
2842
2843 #if 0
2844 sock_parm = 1;
2845 setsockopt( s, IPPROTO_TCP, TCP_NODELAY, (void *) &sock_parm, sizeof(sock_parm) );
2846 #endif
2847
2848 if (listen( s, 256 ) < 0)
2849 {
2850 ufdbLogError( "SimulateHttpServer: cannot listen on socket: %s", strerror(errno) );
2851 close( s );
2852 return;
2853 }
2854
2855 ufdbLogMessage( "SimulateHttpServer: listening on port %d", port );
2856
2857 initData( imagesDirectory );
2858
2859 ServeHttpConnections( s, imagesDirectory );
2860 }
2861
2862
2863 #ifdef __cplusplus
2864 }
2865 #endif
2866