1 /* ------------------------------------------------------------ */
2 /*
3 HTTrack Website Copier, Offline Browser for Windows and Unix
4 Copyright (C) 1998-2017 Xavier Roche and other contributors
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 Important notes:
20
21 - We hereby ask people using this source NOT to use it in purpose of grabbing
22 emails addresses, or collecting any other private information on persons.
23 This would disgrace our work, and spoil the many hours we spent on it.
24
25 Please visit our Website: http://www.httrack.com
26 */
27
28 /* ------------------------------------------------------------ */
29 /* File: Subroutines */
30 /* Author: Xavier Roche */
31 /* ------------------------------------------------------------ */
32
33 /* Internal engine bytecode */
34 #define HTS_INTERNAL_BYTECODE
35
36 // Fichier librairie .c
37
38 #include "htscore.h"
39
40 /* specific definitions */
41 #include "htsbase.h"
42 #include "htsnet.h"
43 #include "htsbauth.h"
44 #include "htsthread.h"
45 #include "htsback.h"
46 #include "htswrap.h"
47 #include "htsmd5.h"
48 #include "htsmodules.h"
49 #include "htscharset.h"
50 #include "htsencoding.h"
51
52 #ifdef _WIN32
53 #include <direct.h>
54 #else
55 #ifdef HAVE_SYS_TYPES_H
56 #include <sys/types.h>
57 #endif
58 #ifdef HAVE_SYS_STAT_H
59 #include <sys/stat.h>
60 #endif
61 #ifdef HAVE_UNISTD_H
62 #include <unistd.h>
63 #endif
64 #endif /* _WIN32 */
65 #include <stdarg.h>
66
67 #include <string.h>
68 #include <time.h>
69 #include <stdarg.h>
70
71 #ifndef _WIN32
72 #include <sys/time.h>
73 #else
74 #include <sys/timeb.h>
75 #endif
76 #include <fcntl.h>
77
78 // pour utimbuf
79 #ifdef _WIN32
80 #include <sys/utime.h>
81 #else
82 #include <utime.h>
83 #endif /* _WIN32 */
84
85 #include <sys/stat.h>
86
87 #ifdef __ANDROID__
88 #define timezone 0
89 #endif
90 /* END specific definitions */
91
92 /* Windows might be missing va_copy */
93 #ifdef _WIN32
94 #ifndef va_copy
95 #define va_copy(dst, src) ((dst) = (src))
96 #endif
97 #endif
98
99 // Debugging
100 #if _HTS_WIDE
101 FILE *DEBUG_fp = NULL;
102 #endif
103
104 /* variables globales */
105 int _DEBUG_HEAD;
106 FILE *ioinfo;
107
108 #if HTS_USEOPENSSL
109 SSL_CTX *openssl_ctx = NULL;
110 #endif
111 int IPV6_resolver = 0;
112
113 /* détection complémentaire */
114 const char *hts_detect[] = {
115 "archive",
116 "background",
117 "data", // OBJECT
118 "dynsrc",
119 "lowsrc",
120 "profile", // element META
121 "src",
122 "swurl",
123 "url",
124 "usemap",
125 "longdesc", // accessibility
126 "xlink:href", // xml/svg tag
127 "poster", // HTML5
128 ""
129 };
130
131 /* détecter début */
132 const char *hts_detectbeg[] = {
133 "hotspot", /* hotspot1=..,hotspot2=.. */
134 ""
135 };
136
137 /* ne pas détcter de liens dedans */
138 const char *hts_nodetect[] = {
139 "accept-charset",
140 "accesskey",
141 "action",
142 "align",
143 "alt",
144 "axes",
145 "axis",
146 "char",
147 "charset",
148 "cite",
149 "class",
150 "classid",
151 "code",
152 "color",
153 "datetime",
154 "dir",
155 "enctype",
156 "face",
157 "height",
158 "id",
159 "lang",
160 "language",
161 "media",
162 "method",
163 "name",
164 "prompt",
165 "scheme",
166 "size",
167 "style",
168 "target",
169 "title",
170 "type",
171 "valign",
172 "version",
173 "width",
174 ""
175 };
176
177 /* détection de mini-code javascript */
178 /* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
179 const char *hts_detect_js[] = {
180 "onAbort",
181 "onBlur",
182 "onChange",
183 "onClick",
184 "onDblClick",
185 "onDragDrop",
186 "onError",
187 "onFocus",
188 "onKeyDown",
189 "onKeyPress",
190 "onKeyUp",
191 "onLoad",
192 "onMouseDown",
193 "onMouseMove",
194 "onMouseOut",
195 "onMouseOver",
196 "onMouseUp",
197 "onMove",
198 "onReset",
199 "onResize",
200 "onSelect",
201 "onSubmit",
202 "onUnload",
203 "style", /* hack for CSS code data */
204 ""
205 };
206
207 const char *hts_main_mime[] = {
208 "application",
209 "audio",
210 "image",
211 "message",
212 "multipart",
213 "text",
214 "video",
215 ""
216 };
217
218 /* détection "...URL=<url>" */
219 const char *hts_detectURL[] = {
220 "content",
221 ""
222 };
223
224 /* tags où l'URL doit être réécrite mais non capturée */
225 const char *hts_detectandleave[] = {
226 "action",
227 ""
228 };
229
230 /* ne pas renommer les types renvoyés (souvent types inconnus) */
231 const char *hts_mime_keep[] = {
232 "application/octet-stream",
233 "text/plain",
234 "application/xml",
235 "text/xml",
236 ""
237 };
238
239 /* bogus servers returns these mime types when the extension is seen within the filename */
240 const char *hts_mime_bogus_multiple[] = {
241 "application/x-wais-source", /* src (src.rpm) */
242 ""
243 };
244
245 /* pas de type mime connu, mais extension connue */
246 const char *hts_ext_dynamic[] = {
247 "php3",
248 "php",
249 "php4",
250 "php2",
251 "cgi",
252 "asp",
253 "jsp",
254 "pl",
255 /*"exe", */
256 "cfm",
257 "nsf", /* lotus */
258 ""
259 };
260
261 /* types MIME
262 note: application/octet-stream should not be used here
263 */
264 const char *hts_mime[][2] = {
265 {"application/acad", "dwg"},
266 {"application/arj", "arj"},
267 {"application/clariscad", "ccad"},
268 {"application/drafting", "drw"},
269 {"application/dxf", "dxf"},
270 {"application/excel", "xls"},
271 {"application/i-deas", "unv"},
272 {"application/iges", "isg"},
273 {"application/iges", "iges"},
274 {"application/mac-binhex40", "hqx"},
275 {"application/mac-compactpro", "cpt"},
276 {"application/msword", "doc"},
277 {"application/msword", "w6w"},
278 {"application/msword", "word"},
279 {"application/mswrite", "wri"},
280 /*{"application/octet-stream","dms"}, */
281 /*{"application/octet-stream","lzh"}, */
282 /*{"application/octet-stream","lha"}, */
283 /*{"application/octet-stream","bin"}, */
284 {"application/oda", "oda"},
285 {"application/pdf", "pdf"},
286 {"application/postscript", "ps"},
287 {"application/postscript", "ai"},
288 {"application/postscript", "eps"},
289 {"application/powerpoint", "ppt"},
290 {"application/pro_eng", "prt"},
291 {"application/pro_eng", "part"},
292 {"application/rtf", "rtf"},
293 {"application/set", "set"},
294 {"application/sla", "stl"},
295 {"application/smil", "smi"},
296 {"application/smil", "smil"},
297 {"application/smil", "sml"},
298 {"application/solids", "sol"},
299 {"application/STEP", "stp"},
300 {"application/STEP", "step"},
301 {"application/vda", "vda"},
302 {"application/x-authorware-map", "aam"},
303 {"application/x-authorware-seg", "aas"},
304 {"application/x-authorware-bin", "aab"},
305 {"application/x-bzip2", "bz2"},
306 {"application/x-cocoa", "cco"},
307 {"application/x-csh", "csh"},
308 {"application/x-director", "dir"},
309 {"application/x-director", "dcr"},
310 {"application/x-director", "dxr"},
311 {"application/x-mif", "mif"},
312 {"application/x-dvi", "dvi"},
313 {"application/x-gzip", "gz"},
314 {"application/x-gzip", "gzip"},
315 {"application/x-hdf", "hdf"},
316 {"application/x-javascript", "js"},
317 {"application/x-koan", "skp"},
318 {"application/x-koan", "skd"},
319 {"application/x-koan", "skt"},
320 {"application/x-koan", "skm"},
321 {"application/x-latex", "latex"},
322 {"application/x-netcdf", "nc"},
323 {"application/x-netcdf", "cdf"},
324 /* {"application/x-sh","sh"}, */
325 /* {"application/x-csh","csh"}, */
326 /* {"application/x-ksh","ksh"}, */
327 {"application/x-shar", "shar"},
328 {"application/x-stuffit", "sit"},
329 {"application/x-tcl", "tcl"},
330 {"application/x-tex", "tex"},
331 {"application/x-texinfo", "texinfo"},
332 {"application/x-texinfo", "texi"},
333 {"application/x-troff", "t"},
334 {"application/x-troff", "tr"},
335 {"application/x-troff", "roff"},
336 {"application/x-troff-man", "man"},
337 {"application/x-troff-me", "ms"},
338 {"application/x-wais-source", "src"},
339 {"application/zip", "zip"},
340 {"application/x-zip-compressed", "zip"},
341 {"application/x-bcpio", "bcpio"},
342 {"application/x-cdlink", "vcd"},
343 {"application/x-cpio", "cpio"},
344 {"application/x-gtar", "tgz"},
345 {"application/x-gtar", "gtar"},
346 {"application/x-shar", "shar"},
347 {"application/x-shockwave-flash", "swf"},
348 {"application/x-sv4cpio", "sv4cpio"},
349 {"application/x-sv4crc", "sv4crc"},
350 {"application/x-tar", "tar"},
351 {"application/x-ustar", "ustar"},
352 {"application/x-winhelp", "hlp"},
353 {"application/xml", "xml"},
354 {"audio/midi", "mid"},
355 {"audio/midi", "midi"},
356 {"audio/midi", "kar"},
357 {"audio/mpeg", "mp3"},
358 {"audio/mpeg", "mpga"},
359 {"audio/mpeg", "mp2"},
360 {"audio/basic", "au"},
361 {"audio/basic", "snd"},
362 {"audio/x-aiff", "aif"},
363 {"audio/x-aiff", "aiff"},
364 {"audio/x-aiff", "aifc"},
365 {"audio/x-pn-realaudio", "rm"},
366 {"audio/x-pn-realaudio", "ram"},
367 {"audio/x-pn-realaudio", "ra"},
368 {"audio/x-pn-realaudio-plugin", "rpm"},
369 {"audio/x-wav", "wav"},
370 {"chemical/x-pdb", "pdb"},
371 {"chemical/x-pdb", "xyz"},
372 {"drawing/x-dwf", "dwf"},
373 {"image/gif", "gif"},
374 {"image/ief", "ief"},
375 {"image/jpeg", "jpg"},
376 {"image/jpeg", "jpe"},
377 {"image/jpeg", "jpeg"},
378 {"image/pict", "pict"},
379 {"image/png", "png"},
380 {"image/tiff", "tiff"},
381 {"image/tiff", "tif"},
382 {"image/svg+xml", "svg"},
383 {"image/svg-xml", "svg"},
384 {"image/x-cmu-raster", "ras"},
385 {"image/x-freehand", "fh4"},
386 {"image/x-freehand", "fh7"},
387 {"image/x-freehand", "fh5"},
388 {"image/x-freehand", "fhc"},
389 {"image/x-freehand", "fh"},
390 {"image/x-portable-anymap", "pnm"},
391 {"image/x-portable-bitmap", "pgm"},
392 {"image/x-portable-pixmap", "ppm"},
393 {"image/x-rgb", "rgb"},
394 {"image/x-xbitmap", "xbm"},
395 {"image/x-xpixmap", "xpm"},
396 {"image/x-xwindowdump", "xwd"},
397 {"model/mesh", "msh"},
398 {"model/mesh", "mesh"},
399 {"model/mesh", "silo"},
400 {"multipart/x-zip", "zip"},
401 {"multipart/x-gzip", "gzip"},
402 {"text/css", "css"},
403 {"text/html", "html"},
404 {"text/html", "htm"},
405 {"text/plain", "txt"},
406 {"text/plain", "g"},
407 {"text/plain", "h"},
408 {"text/plain", "c"},
409 {"text/plain", "cc"},
410 {"text/plain", "hh"},
411 {"text/plain", "m"},
412 {"text/plain", "f90"},
413 {"text/richtext", "rtx"},
414 {"text/tab-separated-values", "tsv"},
415 {"text/x-setext", "etx"},
416 {"text/x-sgml", "sgml"},
417 {"text/x-sgml", "sgm"},
418 {"text/xml", "xml"},
419 {"text/xml", "dtd"},
420 {"video/mpeg", "mpeg"},
421 {"video/mpeg", "mpg"},
422 {"video/mpeg", "mpe"},
423 {"video/quicktime", "qt"},
424 {"video/quicktime", "mov"},
425 {"video/x-msvideo", "avi"},
426 {"video/x-sgi-movie", "movie"},
427 {"x-conference/x-cooltalk", "ice"},
428 /*{"application/x-httpd-cgi","cgi"}, */
429 {"x-world/x-vrml", "wrl"},
430
431 /* More from w3schools.com */
432 {"application/envoy", "evy"},
433 {"application/fractals", "fif"},
434 {"application/futuresplash", "spl"},
435 {"application/hta", "hta"},
436 {"application/internet-property-stream", "acx"},
437 {"application/msword", "dot"},
438 {"application/olescript", "axs"},
439 {"application/pics-rules", "prf"},
440 {"application/pkcs10", "p10"},
441 {"application/pkix-crl", "crl"},
442 {"application/set-payment-initiation", "setpay"},
443 {"application/set-registration-initiation", "setreg"},
444 {"application/vnd.ms-excel", "xls"},
445 {"application/vnd.ms-excel", "xla"},
446 {"application/vnd.ms-excel", "xlc"},
447 {"application/vnd.ms-excel", "xlm"},
448 {"application/vnd.ms-excel", "xlt"},
449 {"application/vnd.ms-excel", "xlw"},
450 {"application/vnd.ms-pkicertstore", "sst"},
451 {"application/vnd.ms-pkiseccat", "cat"},
452 {"application/vnd.ms-powerpoint", "ppt"},
453 {"application/vnd.ms-powerpoint", "pot"},
454 {"application/vnd.ms-powerpoint", "pps"},
455 {"application/vnd.ms-project", "mpp"},
456 {"application/vnd.ms-works", "wcm"},
457 {"application/vnd.ms-works", "wdb"},
458 {"application/vnd.ms-works", "wks"},
459 {"application/vnd.ms-works", "wps"},
460 {"application/x-compress", "z"},
461 {"application/x-compressed", "tgz"},
462 {"application/x-internet-signup", "ins"},
463 {"application/x-internet-signup", "isp"},
464 {"application/x-iphone", "iii"},
465 {"application/x-javascript", "js"},
466 {"application/x-msaccess", "mdb"},
467 {"application/x-mscardfile", "crd"},
468 {"application/x-msclip", "clp"},
469 {"application/x-msmediaview", "m13"},
470 {"application/x-msmediaview", "m14"},
471 {"application/x-msmediaview", "mvb"},
472 {"application/x-msmetafile", "wmf"},
473 {"application/x-msmoney", "mny"},
474 {"application/x-mspublisher", "pub"},
475 {"application/x-msschedule", "scd"},
476 {"application/x-msterminal", "trm"},
477 {"application/x-perfmon", "pma"},
478 {"application/x-perfmon", "pmc"},
479 {"application/x-perfmon", "pml"},
480 {"application/x-perfmon", "pmr"},
481 {"application/x-perfmon", "pmw"},
482 {"application/x-pkcs12", "p12"},
483 {"application/x-pkcs12", "pfx"},
484 {"application/x-pkcs7-certificates", "p7b"},
485 {"application/x-pkcs7-certificates", "spc"},
486 {"application/x-pkcs7-certreqresp", "p7r"},
487 {"application/x-pkcs7-mime", "p7c"},
488 {"application/x-pkcs7-mime", "p7m"},
489 {"application/x-pkcs7-signature", "p7s"},
490 {"application/x-troff-me", "me"},
491 {"application/x-x509-ca-cert", "cer"},
492 {"application/x-x509-ca-cert", "crt"},
493 {"application/x-x509-ca-cert", "der"},
494 {"application/ynd.ms-pkipko", "pko"},
495 {"audio/mid", "mid"},
496 {"audio/mid", "rmi"},
497 {"audio/mpeg", "mp3"},
498 {"audio/x-mpegurl", "m3u"},
499 {"image/bmp", "bmp"},
500 {"image/cis-cod", "cod"},
501 {"image/pipeg", "jfif"},
502 {"image/x-cmx", "cmx"},
503 {"image/x-icon", "ico"},
504 {"image/x-portable-bitmap", "pbm"},
505 {"message/rfc822", "mht"},
506 {"message/rfc822", "mhtml"},
507 {"message/rfc822", "nws"},
508 {"text/css", "css"},
509 {"text/h323", "323"},
510 {"text/html", "stm"},
511 {"text/iuls", "uls"},
512 {"text/plain", "bas"},
513 {"text/scriptlet", "sct"},
514 {"text/webviewhtml", "htt"},
515 {"text/x-component", "htc"},
516 {"text/x-vcard", "vcf"},
517 {"video/mpeg", "mp2"},
518 {"video/mpeg", "mpa"},
519 {"video/mpeg", "mpv2"},
520 {"video/x-la-asf", "lsf"},
521 {"video/x-la-asf", "lsx"},
522 {"video/x-ms-asf", "asf"},
523 {"video/x-ms-asf", "asr"},
524 {"video/x-ms-asf", "asx"},
525 {"video/x-ms-wmv", "wmv"},
526 {"x-world/x-vrml", "flr"},
527 {"x-world/x-vrml", "vrml"},
528 {"x-world/x-vrml", "wrz"},
529 {"x-world/x-vrml", "xaf"},
530 {"x-world/x-vrml", "xof"},
531
532 /* Various */
533 {"application/ogg", "ogg"},
534
535 {"application/x-java-vm", "class"},
536 {"application/x-bittorrent","torrent"},
537
538 {"", ""}
539 };
540
541 // Reserved (RFC2396)
542 #define CIS(c,ch) ( ((unsigned char)(c)) == (ch) )
543 #define CHAR_RESERVED(c) ( CIS(c,';') \
544 || CIS(c,'/') \
545 || CIS(c,'?') \
546 || CIS(c,':') \
547 || CIS(c,'@') \
548 || CIS(c,'&') \
549 || CIS(c,'=') \
550 || CIS(c,'+') \
551 || CIS(c,'$') \
552 || CIS(c,',') )
553 //#define CHAR_RESERVED(c) ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
554 // Delimiters (RFC2396)
555 #define CHAR_DELIM(c) ( CIS(c,'<') \
556 || CIS(c,'>') \
557 || CIS(c,'#') \
558 || CIS(c,'%') \
559 || CIS(c,'\"') )
560 //#define CHAR_DELIM(c) ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
561 // Unwise (RFC2396)
562 #define CHAR_UNWISE(c) ( CIS(c,'{') \
563 || CIS(c,'}') \
564 || CIS(c,'|') \
565 || CIS(c,'\\') \
566 || CIS(c,'^') \
567 || CIS(c,'[') \
568 || CIS(c,']') \
569 || CIS(c,'`') )
570 //#define CHAR_UNWISE(c) ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
571 // Special (escape chars) (RFC2396 + >127 )
572 #define CHAR_LOW(c) ( ((unsigned char)(c) <= 31) )
573 #define CHAR_HIG(c) ( ((unsigned char)(c) >= 127) )
574 #define CHAR_SPECIAL(c) ( CHAR_LOW(c) || CHAR_HIG(c) )
575 // We try to avoid them and encode them instead
576 #define CHAR_XXAVOID(c) ( CIS(c,' ') \
577 || CIS(c,'*') \
578 || CIS(c,'\'') \
579 || CIS(c,'\"') \
580 || CIS(c,'&') \
581 || CIS(c,'!') )
582 //#define CHAR_XXAVOID(c) ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
583 #define CHAR_MARK(c) ( CIS(c,'-') \
584 || CIS(c,'_') \
585 || CIS(c,'.') \
586 || CIS(c,'!') \
587 || CIS(c,'~') \
588 || CIS(c,'*') \
589 || CIS(c,'\'') \
590 || CIS(c,'(') \
591 || CIS(c,')') )
592 //#define CHAR_MARK(c) ( strchr("-_.!~*'()",(unsigned char)(c)) != 0 )
593
594 // conversion éventuelle / vers antislash
595 #ifdef _WIN32
antislash(char * catbuff,const char * s)596 char *antislash(char *catbuff, const char *s) {
597 char *a;
598
599 strcpybuff(catbuff, s);
600 while(a = strchr(catbuff, '/'))
601 *a = '\\';
602 return catbuff;
603 }
604 #endif
605
606 // Initialize a htsblk structure
hts_init_htsblk(htsblk * r)607 void hts_init_htsblk(htsblk * r) {
608 memset(r, 0, sizeof(htsblk)); // effacer
609 r->soc = INVALID_SOCKET;
610 r->msg[0] = '\0';
611 r->statuscode = STATUSCODE_INVALID;
612 r->totalsize = -1;
613 }
614
615 // ouvre une liaison http, envoie une requète GET et réceptionne le header
616 // retour: socket
http_fopen(httrackp * opt,const char * adr,const char * fil,htsblk * retour)617 T_SOC http_fopen(httrackp * opt, const char *adr, const char *fil, htsblk * retour) {
618 // / GET, traiter en-tête
619 return http_xfopen(opt, 0, 1, 1, NULL, adr, fil, retour);
620 }
621
622 // ouverture d'une liaison http, envoi d'une requète
623 // mode: 0 GET 1 HEAD [2 POST]
624 // treat: traiter header?
625 // waitconnect: attendre le connect()
626 // note: dans retour, on met les params du proxy
http_xfopen(httrackp * opt,int mode,int treat,int waitconnect,const char * xsend,const char * adr,const char * fil,htsblk * retour)627 T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect,
628 const char *xsend, const char *adr, const char *fil, htsblk * retour) {
629 //htsblk retour;
630 //int bufl=TAILLE_BUFFER; // 8Ko de buffer
631 T_SOC soc = INVALID_SOCKET;
632 char BIGSTK tempo_fil[HTS_URLMAXSIZE * 2];
633
634 //char *p,*q;
635
636 // retour prédéfini: erreur
637 if (retour) {
638 retour->adr = NULL;
639 retour->size = 0;
640 retour->msg[0] = '\0';
641 retour->statuscode = STATUSCODE_NON_FATAL; // a priori erreur non fatale
642 }
643 #if HDEBUG
644 printf("adr=%s\nfichier=%s\n", adr, fil);
645 #endif
646
647 // ouvrir liaison
648 #if HDEBUG
649 printf("Création d'une socket sur %s\n", adr);
650 #endif
651
652 #if CNXDEBUG
653 printf("..newhttp\n");
654 #endif
655
656 /* connexion */
657 if (retour) {
658 if ((!(retour->req.proxy.active))
659 || ((strcmp(adr, "file://") == 0)
660 || (strncmp(adr, "https://", 8) == 0)
661 )
662 ) { /* pas de proxy, ou non utilisable ici */
663 soc = newhttp(opt, adr, retour, -1, waitconnect);
664 } else {
665 soc = newhttp(opt, retour->req.proxy.name, retour, retour->req.proxy.port, waitconnect); // ouvrir sur le proxy à la place
666 }
667 } else {
668 soc = newhttp(opt, adr, NULL, -1, waitconnect);
669 }
670
671 // copier index socket retour
672 if (retour)
673 retour->soc = soc;
674
675 /* Check for errors */
676 if (soc == INVALID_SOCKET) {
677 if (retour) {
678 if (retour->msg) {
679 if (!strnotempty(retour->msg)) {
680 #ifdef _WIN32
681 int last_errno = WSAGetLastError();
682
683 sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
684 #else
685 int last_errno = errno;
686
687 sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
688 #endif
689 }
690 }
691 }
692 }
693 // --------------------
694 // court-circuit (court circuite aussi le proxy..)
695 // LOCAL_SOCKET_ID est une pseudo-socket locale
696 if (soc == LOCAL_SOCKET_ID) {
697 retour->is_file = 1; // fichier local
698 if (mode == 0) { // GET
699
700 // Test en cas de file:///C|...
701 if (!fexist
702 (fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
703 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil))))
704 if (fexist
705 (fconv
706 (OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
707 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil + 1)))) {
708 strcpybuff(tempo_fil, fil + 1);
709 fil = tempo_fil;
710 }
711 // Ouvrir
712 retour->totalsize = fsize(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
713 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil))); // taille du fichier
714 retour->msg[0] = '\0';
715 soc = INVALID_SOCKET;
716 if (retour->totalsize < 0)
717 strcpybuff(retour->msg, "Unable to open local file");
718 else {
719 // Note: On passe par un FILE* (plus propre)
720 //soc=open(fil,O_RDONLY,0); // en lecture seule!
721 retour->fp = FOPEN(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
722 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil)), "rb"); // ouvrir
723 if (retour->fp == NULL)
724 soc = INVALID_SOCKET;
725 else
726 soc = LOCAL_SOCKET_ID;
727 }
728 retour->soc = soc;
729 if (soc != INVALID_SOCKET) {
730 retour->statuscode = HTTP_OK; // OK
731 strcpybuff(retour->msg, "OK");
732 guess_httptype(opt, retour->contenttype, fil);
733 } else if (strnotempty(retour->msg) == 0)
734 strcpybuff(retour->msg, "Unable to open local file");
735 return soc; // renvoyer
736 } else { // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
737 strcpybuff(retour->msg, "Unexpected Head/Post local request");
738 soc = INVALID_SOCKET; // erreur
739 retour->soc = soc;
740 return soc;
741 }
742 }
743 // --------------------
744
745 if (soc != INVALID_SOCKET) {
746 char rcvd[1100];
747
748 rcvd[0] = '\0';
749 #if HDEBUG
750 printf("Ok, connexion réussie, id=%d\n", soc);
751 #endif
752
753 // connecté?
754 if (waitconnect) {
755 http_sendhead(opt, NULL, mode, xsend, adr, fil, NULL, NULL, retour);
756 }
757
758 if (soc != INVALID_SOCKET) {
759
760 #if HDEBUG
761 printf("Attente de la réponse:\n");
762 #endif
763
764 // si GET (réception d'un fichier), réceptionner en-tête d'abord,
765 // et ensuite le corps
766 // si POST on ne réceptionne rien du tout, c'est après que l'on fera
767 // une réception standard pour récupérer l'en tête
768 if ((treat) && (waitconnect)) { // traiter (attendre!) en-tête
769 // Réception de la status line et de l'en-tête (norme RFC1945)
770
771 // status-line à récupérer
772 finput(soc, rcvd, 1024);
773 if (strnotempty(rcvd) == 0)
774 finput(soc, rcvd, 1024); // "certains serveurs buggés envoient un \n au début" (RFC)
775
776 // traiter status-line
777 treatfirstline(retour, rcvd);
778
779 #if HDEBUG
780 printf("Status-Code=%d\n", retour->statuscode);
781 #endif
782
783 // en-tête
784
785 // header // ** !attention! HTTP/0.9 non supporté
786 do {
787 finput(soc, rcvd, 1024);
788 #if HDEBUG
789 printf(">%s\n", rcvd);
790 #endif
791 if (strnotempty(rcvd))
792 treathead(NULL, NULL, NULL, retour, rcvd); // traiter
793
794 } while(strnotempty(rcvd));
795
796 //rcvsize=-1; // forCER CHARGEMENT INCONNU
797
798 //if (retour)
799 // retour->totalsize=rcvsize;
800
801 } else { // si GET, on recevra l'en tête APRES
802 //rcvsize=-1; // on ne connait pas la taille de l'en-tête
803 if (retour)
804 retour->totalsize = -1;
805 }
806
807 }
808
809 }
810
811 return soc;
812 }
813
814 /* Buffer printing */
815 typedef struct buff_struct {
816 /** Buffer **/
817 char *buffer;
818 /** Buffer capacity in bytes **/
819 size_t capacity;
820 /** Buffer write position ; MUST point to a valid \0. **/
821 size_t pos;
822 } buff_struct;
823
824 static void print_buffer(buff_struct*const str, const char *format, ...)
825 HTS_PRINTF_FUN(2, 3);
826
827 /* Prints on a static buffer. asserts in case of overflow. */
print_buffer(buff_struct * const str,const char * format,...)828 static void print_buffer(buff_struct*const str, const char *format, ...) {
829 size_t result;
830 va_list args;
831 size_t remaining;
832 char *position;
833
834 /* Security check. */
835 assertf(str != NULL);
836 assertf(str->pos < str->capacity);
837
838 /* Print */
839 position = &str->buffer[str->pos];
840 remaining = str->capacity - str->pos;
841 va_start(args, format);
842 result = (size_t) vsnprintf(position, remaining, format, args);
843 va_end(args);
844 assertf(result < remaining);
845
846 /* Increment. */
847 str->pos += strlen(position);
848 assertf(str->pos < str->capacity);
849 }
850
851 // envoi d'une requète
http_sendhead(httrackp * opt,t_cookie * cookie,int mode,const char * xsend,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * retour)852 int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
853 const char *xsend, const char *adr, const char *fil,
854 const char *referer_adr, const char *referer_fil,
855 htsblk * retour) {
856 char BIGSTK buffer_head_request[8192];
857 buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
858
859 //int use_11=0; // HTTP 1.1 utilisé
860 int direct_url = 0; // ne pas analyser l'url (exemple: ftp://)
861 const char *search_tag = NULL;
862
863 // Initialize buffer
864 buffer_head_request[0] = '\0';
865
866 // header Date
867 //strcatbuff(buff,"Date: ");
868 //time_gmt_rfc822(buff); // obtenir l'heure au format rfc822
869 //sendc("\n");
870 //strcatbuff(buff,buff);
871
872 // possibilité non documentée: >post: et >postfile:
873 // si présence d'un tag >post: alors executer un POST
874 // exemple: http://www.someweb.com/test.cgi?foo>post:posteddata=10&foo=5
875 // si présence d'un tag >postfile: alors envoyer en tête brut contenu dans le fichier en question
876 // exemple: http://www.someweb.com/test.cgi?foo>postfile:post0.txt
877 search_tag = strstr(fil, POSTTOK ":");
878 if (!search_tag) {
879 search_tag = strstr(fil, POSTTOK "file:");
880 if (search_tag) { // postfile
881 if (mode == 0) { // GET!
882 FILE *fp =
883 FOPEN(unescape_http(OPT_GET_BUFF(opt),
884 OPT_GET_BUFF_SIZE(opt), search_tag + strlen(POSTTOK) + 5), "rb");
885 if (fp) {
886 char BIGSTK line[1100];
887 char BIGSTK protocol[256], url[HTS_URLMAXSIZE * 2], method[256];
888
889 linput(fp, line, 1000);
890 if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
891 size_t ret;
892 // selon que l'on a ou pas un proxy
893 if (retour->req.proxy.active) {
894 print_buffer(&bstr,
895 "%s http://%s%s %s\r\n", method, adr, url,
896 protocol);
897 } else {
898 print_buffer(&bstr,
899 "%s %s %s\r\n", method, url, protocol);
900 }
901 // lire le reste en brut
902 ret = fread(&bstr.buffer[bstr.pos],
903 bstr.capacity - bstr.pos, 1, fp);
904 if ((int) ret < 0) {
905 return -1;
906 }
907 bstr.pos += strlen(&bstr.buffer[bstr.pos]);
908 }
909 fclose(fp);
910 }
911 }
912 }
913 }
914 // Fin postfile
915
916 if (bstr.pos == 0) { // PAS POSTFILE
917 // Type de requète?
918 if ((search_tag) && (mode == 0)) {
919 print_buffer(&bstr, "POST ");
920 } else if (mode == 0) { // GET
921 print_buffer(&bstr, "GET ");
922 } else { // if (mode==1) {
923 if (!retour->req.http11) // forcer HTTP/1.0
924 print_buffer(&bstr, "GET "); // certains serveurs (cgi) buggent avec HEAD
925 else
926 print_buffer(&bstr, "HEAD ");
927 }
928
929 // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
930 if (retour->req.proxy.active && (strncmp(adr, "https://", 8) != 0)) {
931 if (!link_has_authority(adr)) { // default http
932 #if HDEBUG
933 printf("Proxy Use: for %s%s proxy %d port %d\n", adr, fil,
934 retour->req.proxy.name, retour->req.proxy.port);
935 #endif
936 print_buffer(&bstr, "http://%s", jump_identification_const(adr));
937 } else { // ftp:// en proxy http
938 #if HDEBUG
939 printf("Proxy Use for ftp: for %s%s proxy %d port %d\n", adr, fil,
940 retour->req.proxy.name, retour->req.proxy.port);
941 #endif
942 direct_url = 1; // ne pas analyser user/pass
943 print_buffer(&bstr, "%s", adr);
944 }
945 }
946 // NOM DU FICHIER
947 // on slash doit être présent en début, sinon attention aux bad request! (400)
948 if (*fil != '/')
949 print_buffer(&bstr, "/");
950
951 {
952 char BIGSTK tempo[HTS_URLMAXSIZE * 2];
953
954 tempo[0] = '\0';
955 if (search_tag)
956 strncatbuff(tempo, fil, (int) (search_tag - fil));
957 else
958 strcpybuff(tempo, fil);
959 inplace_escape_check_url(tempo, sizeof(tempo));
960 print_buffer(&bstr, "%s", tempo); // avec échappement
961 }
962
963 // protocole
964 if (!retour->req.http11) { // forcer HTTP/1.0
965 //use_11=0;
966 print_buffer(&bstr, " HTTP/1.0\x0d\x0a");
967 } else { // Requète 1.1
968 //use_11=1;
969 print_buffer(&bstr, " HTTP/1.1\x0d\x0a");
970 }
971
972 /* supplemental data */
973 if (xsend)
974 print_buffer(&bstr, "%s", xsend); // éventuelles autres lignes
975
976 // tester proxy authentication
977 if (retour->req.proxy.active) {
978 if (link_has_authorization(retour->req.proxy.name)) { // et hop, authentification proxy!
979 const char *a = jump_identification_const(retour->req.proxy.name);
980 const char *astart = jump_protocol_const(retour->req.proxy.name);
981 char autorisation[1100];
982 char user_pass[256];
983
984 autorisation[0] = user_pass[0] = '\0';
985 //
986 strncatbuff(user_pass, astart, (int) (a - astart) - 1);
987 strcpybuff(user_pass, unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
988 code64((unsigned char *) user_pass, (int) strlen(user_pass),
989 (unsigned char *) autorisation, 0);
990 print_buffer(&bstr, "Proxy-Authorization: Basic %s"H_CRLF,
991 autorisation);
992 #if HDEBUG
993 printf("Proxy-Authenticate, %s (code: %s)\n", user_pass, autorisation);
994 #endif
995 }
996 }
997 // Referer?
998 if (referer_adr != NULL && referer_fil != NULL && strnotempty(referer_adr)
999 && strnotempty(referer_fil)
1000 ) { // non vide
1001 if ((strcmp(referer_adr, "file://") != 0)
1002 && ( /* no https referer to http urls */
1003 (strncmp(referer_adr, "https://", 8) != 0) /* referer is not https */
1004 ||(strncmp(adr, "https://", 8) == 0) /* or referer AND addresses are https */
1005 )
1006 ) { // PAS file://
1007 print_buffer(&bstr, "Referer: http://%s%s"H_CRLF,
1008 jump_identification_const(referer_adr), referer_fil);
1009 }
1010 }
1011 // HTTP field: referer
1012 else if (strnotempty(retour->req.referer)) {
1013 print_buffer(&bstr, "Referer: %s"H_CRLF, retour->req.referer);
1014 }
1015 // POST?
1016 if (mode == 0) { // GET!
1017 if (search_tag) {
1018 print_buffer(&bstr, "Content-length: %d" H_CRLF,
1019 (int) (strlen
1020 (unescape_http
1021 (OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
1022 search_tag + strlen(POSTTOK) + 1))));
1023 }
1024 }
1025 // gestion cookies?
1026 if (cookie) {
1027 char buffer[8192];
1028 char *b = cookie->data;
1029 int cook = 0;
1030 int max_cookies = 8;
1031
1032 do {
1033 b = cookie_find(b, "", jump_identification_const(adr), fil); // prochain cookie satisfaisant aux conditions
1034 if (b != NULL) {
1035 max_cookies--;
1036 if (!cook) {
1037 print_buffer(&bstr, "Cookie: $Version=1; ");
1038 cook = 1;
1039 } else
1040 print_buffer(&bstr, "; ");
1041 print_buffer(&bstr, "%s", cookie_get(buffer, b, 5));
1042 print_buffer(&bstr, "=%s", cookie_get(buffer, b, 6));
1043 print_buffer(&bstr, "; $Path=%s", cookie_get(buffer, b, 2));
1044 b = cookie_nextfield(b);
1045 }
1046 } while(b != NULL && max_cookies > 0);
1047 if (cook) { // on a envoyé un (ou plusieurs) cookie?
1048 print_buffer(&bstr, H_CRLF);
1049 #if DEBUG_COOK
1050 printf("Header:\n%s\n", bstr.buffer);
1051 #endif
1052 }
1053 }
1054 // gérer le keep-alive (garder socket)
1055 if (retour->req.http11 && !retour->req.nokeepalive) {
1056 print_buffer(&bstr, "Connection: keep-alive" H_CRLF);
1057 } else {
1058 print_buffer(&bstr, "Connection: close" H_CRLF);
1059 }
1060
1061 {
1062 const char *real_adr = jump_identification_const(adr);
1063
1064 // Mandatory per RFC2616
1065 if (!direct_url) { // pas ftp:// par exemple
1066 print_buffer(&bstr, "Host: %s"H_CRLF, real_adr);
1067 }
1068
1069 // HTTP field: from
1070 if (strnotempty(retour->req.from)) { // HTTP from
1071 print_buffer(&bstr, "From: %s" H_CRLF, retour->req.from);
1072 }
1073
1074 // Présence d'un user-agent?
1075 if (retour->req.user_agent_send
1076 && strnotempty(retour->req.user_agent)) {
1077 print_buffer(&bstr, "User-Agent: %s" H_CRLF, retour->req.user_agent);
1078 }
1079
1080 // Accept
1081 if (strnotempty(retour->req.accept)) {
1082 print_buffer(&bstr, "Accept: %s" H_CRLF, retour->req.accept);
1083 }
1084
1085 // Accept-language
1086 if (strnotempty(retour->req.lang_iso)) {
1087 print_buffer(&bstr, "Accept-Language: %s"H_CRLF, retour->req.lang_iso);
1088 }
1089
1090 // Compression accepted ?
1091 if (retour->req.http11) {
1092 #if HTS_USEZLIB
1093 if ((!retour->req.range_used)
1094 && (!retour->req.nocompression))
1095 print_buffer(&bstr, "Accept-Encoding: " "gzip" /* gzip if the preffered encoding */
1096 ", " "identity;q=0.9" H_CRLF);
1097 else
1098 print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
1099 #else
1100 print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
1101 #endif
1102 }
1103
1104 /* Authentification */
1105 {
1106 char autorisation[1100];
1107 const char *a;
1108
1109 autorisation[0] = '\0';
1110 if (link_has_authorization(adr)) { // ohh une authentification!
1111 const char *a = jump_identification_const(adr);
1112 const char *astart = jump_protocol_const(adr);
1113
1114 if (!direct_url) { // pas ftp:// par exemple
1115 char user_pass[256];
1116
1117 user_pass[0] = '\0';
1118 strncatbuff(user_pass, astart, (int) (a - astart) - 1);
1119 strcpybuff(user_pass,
1120 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
1121 code64((unsigned char *) user_pass, (int) strlen(user_pass),
1122 (unsigned char *) autorisation, 0);
1123 if (strcmp(fil, "/robots.txt")) /* pas robots.txt */
1124 bauth_add(cookie, astart, fil, autorisation);
1125 }
1126 } else if ((a = bauth_check(cookie, real_adr, fil)))
1127 strcpybuff(autorisation, a);
1128 /* On a une autorisation a donner? */
1129 if (strnotempty(autorisation)) {
1130 print_buffer(&bstr, "Authorization: Basic %s"H_CRLF, autorisation);
1131 }
1132 }
1133
1134 }
1135 //strcatbuff(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
1136
1137 // Custom header(s)
1138 if (strnotempty(retour->req.headers)) {
1139 print_buffer(&bstr, "%s", retour->req.headers);
1140 }
1141
1142 // CRLF de fin d'en tête
1143 print_buffer(&bstr, H_CRLF);
1144
1145 // données complémentaires?
1146 if (search_tag)
1147 if (mode == 0) // GET!
1148 print_buffer(&bstr, "%s",
1149 unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
1150 search_tag + strlen(POSTTOK) + 1));
1151 }
1152 #if HDEBUG
1153 #endif
1154 if (_DEBUG_HEAD) {
1155 if (ioinfo) {
1156 fprintf(ioinfo, "[%d] request for %s%s:\r\n", retour->debugid,
1157 jump_identification_const(adr), fil);
1158 fprintfio(ioinfo, bstr.buffer, "<<< ");
1159 fprintf(ioinfo, "\r\n");
1160 fflush(ioinfo);
1161 }
1162 } // Fin test pas postfile
1163 //
1164
1165 // Callback
1166 {
1167 int test_head =
1168 RUN_CALLBACK6(opt, sendhead, bstr.buffer, adr, fil, referer_adr, referer_fil,
1169 retour);
1170 if (test_head != 1) {
1171 deletesoc_r(retour);
1172 strcpybuff(retour->msg, "Header refused by external wrapper");
1173 retour->soc = INVALID_SOCKET;
1174 }
1175 }
1176
1177 // Envoi
1178 HTS_STAT.last_request = mtime_local();
1179 if (sendc(retour, bstr.buffer) < 0) { // ERREUR, socket rompue?...
1180 deletesoc_r(retour); // fermer tout de même
1181 // et tenter de reconnecter
1182
1183 strcpybuff(retour->msg, "Write error");
1184 retour->soc = INVALID_SOCKET;
1185 }
1186
1187 // RX'98
1188 return 0;
1189 }
1190
1191 // traiter 1ere ligne d'en tête
treatfirstline(htsblk * retour,const char * rcvd)1192 void treatfirstline(htsblk * retour, const char *rcvd) {
1193 const char *a = rcvd;
1194
1195 // exemple:
1196 // HTTP/1.0 200 OK
1197 if (*a) {
1198 // note: certains serveurs buggés renvoient HTTP/1.0\n200 OK ou " HTTP/1.0 200 OK"
1199 while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1200 a++; // épurer espaces au début
1201 if (strfield(a, "HTTP/")) {
1202 // sauter HTTP/1.x
1203 while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
1204 && (*a != 9))
1205 a++;
1206 if (*a != '\0') {
1207 while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1208 a++; // épurer espaces
1209 if ((*a >= '0') && (*a <= '9')) {
1210 sscanf(a, "%d", &(retour->statuscode));
1211 // sauter 200
1212 while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
1213 && (*a != 9))
1214 a++;
1215 while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1216 a++; // épurer espaces
1217 if ((strlen(a) > 1) && (strlen(a) < 64)) // message retour
1218 strcpybuff(retour->msg, a);
1219 else
1220 infostatuscode(retour->msg, retour->statuscode);
1221 // type MIME par défaut2
1222 strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1223 } else { // pas de code!
1224 retour->statuscode = STATUSCODE_INVALID;
1225 strcpybuff(retour->msg, "Unknown response structure");
1226 }
1227 } else { // euhh??
1228 retour->statuscode = STATUSCODE_INVALID;
1229 strcpybuff(retour->msg, "Unknown response structure");
1230 }
1231 } else {
1232 if (*a == '<') {
1233 /* This is dirty .. */
1234 retour->statuscode = HTTP_OK;
1235 retour->keep_alive = 0;
1236 strcpybuff(retour->msg, "Unknown, assuming junky server");
1237 strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1238 } else if (strnotempty(a)) {
1239 retour->statuscode = STATUSCODE_INVALID;
1240 strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
1241 } else {
1242 /* This is dirty .. */
1243 retour->statuscode = HTTP_OK;
1244 retour->keep_alive = 0;
1245 strcpybuff(retour->msg, "Unknown, assuming junky server");
1246 strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1247 }
1248 }
1249 } else { // vide!
1250 /*
1251 retour->statuscode=STATUSCODE_INVALID;
1252 strcpybuff(retour->msg,"Empty reponse or internal error");
1253 */
1254 /* This is dirty .. */
1255 retour->statuscode = HTTP_OK;
1256 strcpybuff(retour->msg, "Unknown, assuming junky server");
1257 strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1258 }
1259 }
1260
1261 // traiter ligne par ligne l'en tête
1262 // gestion des cookies
treathead(t_cookie * cookie,const char * adr,const char * fil,htsblk * retour,char * rcvd)1263 void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * retour,
1264 char *rcvd) {
1265 int p;
1266
1267 if ((p = strfield(rcvd, "Content-length:")) != 0) {
1268 #if HDEBUG
1269 printf("ok, Content-length: détecté\n");
1270 #endif
1271 if (sscanf(rcvd + p, LLintP, &(retour->totalsize)) == 1) {
1272 if (retour->totalsize == 0) {
1273 retour->empty = 1;
1274 }
1275 }
1276 } else if ((p = strfield(rcvd, "Content-Disposition:")) != 0) {
1277 while(is_realspace(*(rcvd + p)))
1278 p++; // sauter espaces
1279 if ((int) strlen(rcvd + p) < 250) { // pas trop long?
1280 char tmp[256];
1281 char *a = NULL, *b = NULL;
1282
1283 strcpybuff(tmp, rcvd + p);
1284 a = strstr(tmp, "filename=");
1285 if (a) {
1286 a += strlen("filename=");
1287 while(is_space(*a))
1288 a++;
1289 //a=strchr(a,'"');
1290 if (a) {
1291 char *c = NULL;
1292
1293 //a++; /* jump " */
1294 while((c = strchr(a, '/'))) /* skip all / (see RFC2616) */
1295 a = c + 1;
1296 //b=strchr(a+1,'"');
1297 b = a + strlen(a) - 1;
1298 while(is_space(*b))
1299 b--;
1300 b++;
1301 if (b) {
1302 *b = '\0';
1303 if ((int) strlen(a) < 200) { // pas trop long?
1304 strcpybuff(retour->cdispo, a);
1305 }
1306 }
1307 }
1308 }
1309 }
1310 } else if ((p = strfield(rcvd, "Last-Modified:")) != 0) {
1311 while(is_realspace(*(rcvd + p)))
1312 p++; // sauter espaces
1313 if ((int) strlen(rcvd + p) < 64) { // pas trop long?
1314 //struct tm* tm_time=convert_time_rfc822(rcvd+p);
1315 strcpybuff(retour->lastmodified, rcvd + p);
1316 }
1317 } else if ((p = strfield(rcvd, "Date:")) != 0) {
1318 if (strnotempty(retour->lastmodified) == 0) { /* pas encore de last-modified */
1319 while(is_realspace(*(rcvd + p)))
1320 p++; // sauter espaces
1321 if ((int) strlen(rcvd + p) < 64) { // pas trop long?
1322 //struct tm* tm_time=convert_time_rfc822(rcvd+p);
1323 strcpybuff(retour->lastmodified, rcvd + p);
1324 }
1325 }
1326 } else if ((p = strfield(rcvd, "Etag:")) != 0) { /* Etag */
1327 if (retour) {
1328 while(is_realspace(*(rcvd + p)))
1329 p++; // sauter espaces
1330 if ((int) strlen(rcvd + p) < 64) // pas trop long?
1331 strcpybuff(retour->etag, rcvd + p);
1332 else // erreur.. ignorer
1333 retour->etag[0] = '\0';
1334 }
1335 }
1336 // else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) { // chunk!
1337 else if ((p = strfield(rcvd, "Transfer-Encoding:")) != 0) { // chunk!
1338 while(is_realspace(*(rcvd + p)))
1339 p++; // sauter espaces
1340 if (strfield(rcvd + p, "chunked")) {
1341 retour->is_chunk = 1; // chunked
1342 //retour->http11=2; // chunked
1343 #if HDEBUG
1344 printf("ok, Transfer-Encoding: détecté\n");
1345 #endif
1346 }
1347 } else if ((p = strfield(rcvd, "Content-type:")) != 0) {
1348 if (retour) {
1349 char tempo[1100];
1350
1351 // éviter les text/html; charset=foo
1352 {
1353 char *a = strchr(rcvd + p, ';');
1354
1355 if (a) { // extended information
1356 *a = '\0';
1357 a++;
1358 while(is_space(*a))
1359 a++;
1360 if (strfield(a, "charset")) {
1361 a += 7;
1362 while(is_space(*a))
1363 a++;
1364 if (*a == '=') {
1365 a++;
1366 while(is_space(*a))
1367 a++;
1368 if (*a == '\"')
1369 a++;
1370 while(is_space(*a))
1371 a++;
1372 if (*a) {
1373 char *chs = a;
1374
1375 while(*a && !is_space(*a) && *a != '\"' && *a != ';')
1376 a++;
1377 *a = '\0';
1378 if (*chs) {
1379 if (strlen(chs) < sizeof(retour->charset) - 2) {
1380 strcpybuff(retour->charset, chs);
1381 }
1382 }
1383 }
1384 }
1385 }
1386 }
1387 }
1388 sscanf(rcvd + p, "%s", tempo);
1389 if (strlen(tempo) < sizeof(retour->contenttype) - 2) // pas trop long!!
1390 strcpybuff(retour->contenttype, tempo);
1391 else
1392 strcpybuff(retour->contenttype, "application/octet-stream-unknown"); // erreur
1393 }
1394 } else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
1395 // Content-Range: bytes 0-70870/70871
1396 const char *a;
1397
1398 for(a = rcvd + p; is_space(*a); a++) ;
1399 if (strncasecmp(a, "bytes ", 6) == 0) {
1400 for(a += 6; is_space(*a); a++) ;
1401 if (sscanf
1402 (a, LLintP "-" LLintP "/" LLintP, &retour->crange_start,
1403 &retour->crange_end, &retour->crange) != 3) {
1404 retour->crange_start = 0;
1405 retour->crange_end = 0;
1406 retour->crange = 0;
1407 a = strchr(rcvd + p, '/');
1408 if (a != NULL) {
1409 a++;
1410 if (sscanf(a, LLintP, &retour->crange) == 1) {
1411 retour->crange_start = 0;
1412 retour->crange_end = retour->crange - 1;
1413 } else {
1414 retour->crange = 0;
1415 }
1416 }
1417 }
1418 }
1419 } else if ((p = strfield(rcvd, "Connection:")) != 0) {
1420 char *a = rcvd + p;
1421
1422 while(is_space(*a))
1423 a++;
1424 if (*a) {
1425 if (strfield(a, "Keep-Alive")) {
1426 if (!retour->keep_alive) {
1427 retour->keep_alive_max = 10;
1428 retour->keep_alive_t = 15;
1429 }
1430 retour->keep_alive = 1;
1431 } else {
1432 retour->keep_alive = 0;
1433 }
1434 }
1435 } else if ((p = strfield(rcvd, "Keep-Alive:")) != 0) {
1436 char *a = rcvd + p;
1437
1438 while(is_space(*a))
1439 a++;
1440 if (*a) {
1441 char *p;
1442
1443 retour->keep_alive = 1;
1444 retour->keep_alive_max = 10;
1445 retour->keep_alive_t = 15;
1446 if ((p = strstr(a, "timeout="))) {
1447 p += strlen("timeout=");
1448 sscanf(p, "%d", &retour->keep_alive_t);
1449 }
1450 if ((p = strstr(a, "max="))) {
1451 p += strlen("max=");
1452 sscanf(p, "%d", &retour->keep_alive_max);
1453 }
1454 if (retour->keep_alive_max <= 1 || retour->keep_alive_t < 1) {
1455 retour->keep_alive = 0;
1456 }
1457 }
1458 } else if ((p = strfield(rcvd, "TE:")) != 0) {
1459 char *a = rcvd + p;
1460
1461 while(is_space(*a))
1462 a++;
1463 if (*a) {
1464 if (strfield(a, "trailers")) {
1465 retour->keep_alive_trailers = 1;
1466 }
1467 }
1468 } else if ((p = strfield(rcvd, "Content-Encoding:")) != 0) {
1469 if (retour) {
1470 char tempo[1100];
1471 char *a = rcvd + p;
1472
1473 while(is_space(*a))
1474 a++;
1475 {
1476 char *a = strchr(rcvd + p, ';');
1477
1478 if (a)
1479 *a = '\0';
1480 }
1481 sscanf(a, "%s", tempo);
1482 if (strlen(tempo) < 64) // pas trop long!!
1483 strcpybuff(retour->contentencoding, tempo);
1484 else
1485 retour->contentencoding[0] = '\0'; // erreur
1486 #if HTS_USEZLIB
1487 /* Check known encodings */
1488 if (retour->contentencoding[0]) {
1489 if ((strfield2(retour->contentencoding, "gzip"))
1490 || (strfield2(retour->contentencoding, "x-gzip"))
1491 /*
1492 || (strfield2(retour->contentencoding, "compress"))
1493 || (strfield2(retour->contentencoding, "x-compress"))
1494 */
1495 || (strfield2(retour->contentencoding, "deflate"))
1496 || (strfield2(retour->contentencoding, "x-deflate"))
1497 ) {
1498 retour->compressed = 1;
1499 }
1500 }
1501 #endif
1502 }
1503 } else if ((p = strfield(rcvd, "Location:")) != 0) {
1504 if (retour) {
1505 if (retour->location) {
1506 while(is_realspace(*(rcvd + p)))
1507 p++; // sauter espaces
1508 if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE) // pas trop long?
1509 strcpybuff(retour->location, rcvd + p);
1510 else // erreur.. ignorer
1511 retour->location[0] = '\0';
1512 }
1513 }
1514 } else if (((p = strfield(rcvd, "Set-Cookie:")) != 0) && (cookie)) { // ohh un cookie
1515 char *a = rcvd + p; // pointeur
1516 char domain[256]; // domaine cookie (.netscape.com)
1517 char path[256]; // chemin (/)
1518 char cook_name[256]; // nom cookie (MYCOOK)
1519 char BIGSTK cook_value[8192]; // valeur (ID=toto,S=1234)
1520
1521 #if DEBUG_COOK
1522 printf("set-cookie detected\n");
1523 #endif
1524 while(*a) {
1525 char *token_st, *token_end;
1526 char *value_st, *value_end;
1527 char name[256];
1528 char BIGSTK value[8192];
1529 int next = 0;
1530
1531 name[0] = value[0] = '\0';
1532 //
1533
1534 // initialiser cookie lu actuellement
1535 if (adr)
1536 strcpybuff(domain, jump_identification_const(adr)); // domaine
1537 strcpybuff(path, "/"); // chemin (/)
1538 strcpybuff(cook_name, ""); // nom cookie (MYCOOK)
1539 strcpybuff(cook_value, ""); // valeur (ID=toto,S=1234)
1540 // boucler jusqu'au prochain cookie ou la fin
1541 do {
1542 char *start_loop = a;
1543
1544 while(is_space(*a))
1545 a++; // sauter espaces
1546 token_st = a; // départ token
1547 while((!is_space(*a)) && (*a) && (*a != ';') && (*a != '='))
1548 a++; // arrêter si espace, point virgule
1549 token_end = a;
1550 while(is_space(*a))
1551 a++; // sauter espaces
1552 if (*a == '=') { // name=value
1553 a++;
1554 while(is_space(*a))
1555 a++; // sauter espaces
1556 value_st = a;
1557 while((*a != ';') && (*a))
1558 a++; // prochain ;
1559 //while( ((*a!='"') || (*(a-1)=='\\')) && (*a)) a++; // prochain " (et pas \")
1560 value_end = a;
1561 //if (*a==';') { // finit par un ;
1562 // vérifier débordements
1563 if ((((int) (token_end - token_st)) < 200)
1564 && (((int) (value_end - value_st)) < 8000)
1565 && (((int) (token_end - token_st)) > 0)
1566 && (((int) (value_end - value_st)) > 0)) {
1567 int name_len = (int) (token_end - token_st);
1568 int value_len = (int) (value_end - value_st);
1569
1570 name[0] = '\0';
1571 value[0] = '\0';
1572 strncatbuff(name, token_st, name_len);
1573 strncatbuff(value, value_st, value_len);
1574 #if DEBUG_COOK
1575 printf("detected cookie-av: name=\"%s\" value=\"%s\"\n", name,
1576 value);
1577 #endif
1578 if (strfield2(name, "domain")) {
1579 if (value_len < sizeof(domain) - 1) {
1580 strcpybuff(domain, value);
1581 } else {
1582 cook_name[0] = 0;
1583 break;
1584 }
1585 } else if (strfield2(name, "path")) {
1586 if (value_len < sizeof(path) - 1) {
1587 strcpybuff(path, value);
1588 } else {
1589 cook_name[0] = 0;
1590 break;
1591 }
1592 } else if (strfield2(name, "max-age")) {
1593 // ignoré..
1594 } else if (strfield2(name, "expires")) {
1595 // ignoré..
1596 } else if (strfield2(name, "version")) {
1597 // ignoré..
1598 } else if (strfield2(name, "comment")) {
1599 // ignoré
1600 } else if (strfield2(name, "secure")) { // ne devrait pas arriver ici
1601 // ignoré
1602 } else {
1603 if (value_len < sizeof(cook_value) - 1
1604 && name_len < sizeof(cook_name) - 1) {
1605 if (strnotempty(cook_name) == 0) { // noter premier: nom et valeur cookie
1606 strcpybuff(cook_name, name);
1607 strcpybuff(cook_value, value);
1608 } else { // prochain cookie
1609 a = start_loop; // on devra recommencer à cette position
1610 next = 1; // enregistrer
1611 }
1612 } else {
1613 cook_name[0] = 0;
1614 break;
1615 }
1616 }
1617 }
1618 }
1619 if (!next) {
1620 while((*a != ';') && (*a))
1621 a++; // prochain
1622 while(*a == ';')
1623 a++; // sauter ;
1624 }
1625 } while((*a) && (!next));
1626 if (strnotempty(cook_name)) { // cookie?
1627 #if DEBUG_COOK
1628 printf
1629 ("new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",
1630 cook_name, cook_value, domain, path);
1631 #endif
1632 cookie_add(cookie, cook_name, cook_value, domain, path);
1633 }
1634 }
1635 }
1636 }
1637
1638 // transforme le message statuscode en chaîne
infostatuscode(char * msg,int statuscode)1639 HTSEXT_API void infostatuscode(char *msg, int statuscode) {
1640 switch (statuscode) {
1641 // Erreurs HTTP, selon RFC
1642 case 100:
1643 strcpybuff(msg, "Continue");
1644 break;
1645 case 101:
1646 strcpybuff(msg, "Switching Protocols");
1647 break;
1648 case 200:
1649 strcpybuff(msg, "OK");
1650 break;
1651 case 201:
1652 strcpybuff(msg, "Created");
1653 break;
1654 case 202:
1655 strcpybuff(msg, "Accepted");
1656 break;
1657 case 203:
1658 strcpybuff(msg, "Non-Authoritative Information");
1659 break;
1660 case 204:
1661 strcpybuff(msg, "No Content");
1662 break;
1663 case 205:
1664 strcpybuff(msg, "Reset Content");
1665 break;
1666 case 206:
1667 strcpybuff(msg, "Partial Content");
1668 break;
1669 case 300:
1670 strcpybuff(msg, "Multiple Choices");
1671 break;
1672 case 301:
1673 strcpybuff(msg, "Moved Permanently");
1674 break;
1675 case 302:
1676 strcpybuff(msg, "Moved Temporarily");
1677 break;
1678 case 303:
1679 strcpybuff(msg, "See Other");
1680 break;
1681 case 304:
1682 strcpybuff(msg, "Not Modified");
1683 break;
1684 case 305:
1685 strcpybuff(msg, "Use Proxy");
1686 break;
1687 case 306:
1688 strcpybuff(msg, "Undefined 306 error");
1689 break;
1690 case 307:
1691 strcpybuff(msg, "Temporary Redirect");
1692 break;
1693 case 400:
1694 strcpybuff(msg, "Bad Request");
1695 break;
1696 case 401:
1697 strcpybuff(msg, "Unauthorized");
1698 break;
1699 case 402:
1700 strcpybuff(msg, "Payment Required");
1701 break;
1702 case 403:
1703 strcpybuff(msg, "Forbidden");
1704 break;
1705 case 404:
1706 strcpybuff(msg, "Not Found");
1707 break;
1708 case 405:
1709 strcpybuff(msg, "Method Not Allowed");
1710 break;
1711 case 406:
1712 strcpybuff(msg, "Not Acceptable");
1713 break;
1714 case 407:
1715 strcpybuff(msg, "Proxy Authentication Required");
1716 break;
1717 case 408:
1718 strcpybuff(msg, "Request Time-out");
1719 break;
1720 case 409:
1721 strcpybuff(msg, "Conflict");
1722 break;
1723 case 410:
1724 strcpybuff(msg, "Gone");
1725 break;
1726 case 411:
1727 strcpybuff(msg, "Length Required");
1728 break;
1729 case 412:
1730 strcpybuff(msg, "Precondition Failed");
1731 break;
1732 case 413:
1733 strcpybuff(msg, "Request Entity Too Large");
1734 break;
1735 case 414:
1736 strcpybuff(msg, "Request-URI Too Large");
1737 break;
1738 case 415:
1739 strcpybuff(msg, "Unsupported Media Type");
1740 break;
1741 case 416:
1742 strcpybuff(msg, "Requested Range Not Satisfiable");
1743 break;
1744 case 417:
1745 strcpybuff(msg, "Expectation Failed");
1746 break;
1747 case 500:
1748 strcpybuff(msg, "Internal Server Error");
1749 break;
1750 case 501:
1751 strcpybuff(msg, "Not Implemented");
1752 break;
1753 case 502:
1754 strcpybuff(msg, "Bad Gateway");
1755 break;
1756 case 503:
1757 strcpybuff(msg, "Service Unavailable");
1758 break;
1759 case 504:
1760 strcpybuff(msg, "Gateway Time-out");
1761 break;
1762 case 505:
1763 strcpybuff(msg, "HTTP Version Not Supported");
1764 break;
1765 //
1766 default:
1767 if (strnotempty(msg) == 0)
1768 strcpybuff(msg, "Unknown error");
1769 break;
1770 }
1771 }
1772
1773 // check if data is available
check_readinput(htsblk * r)1774 int check_readinput(htsblk * r) {
1775 if (r->soc != INVALID_SOCKET) {
1776 fd_set fds; // poll structures
1777 struct timeval tv; // structure for select
1778 const int soc = (int) r->soc;
1779
1780 assertf(soc == r->soc);
1781 FD_ZERO(&fds);
1782 FD_SET(soc, &fds);
1783 tv.tv_sec = 0;
1784 tv.tv_usec = 0;
1785 select(soc + 1, &fds, NULL, NULL, &tv);
1786 if (FD_ISSET(soc, &fds))
1787 return 1;
1788 else
1789 return 0;
1790 } else
1791 return 0;
1792 }
1793
1794 // check if data is available
check_readinput_t(T_SOC soc,int timeout)1795 int check_readinput_t(T_SOC soc, int timeout) {
1796 if (soc != INVALID_SOCKET) {
1797 fd_set fds; // poll structures
1798 struct timeval tv; // structure for select
1799 const int isoc = (int) soc;
1800
1801 assertf(isoc == soc);
1802 FD_ZERO(&fds);
1803 FD_SET(isoc, &fds);
1804 tv.tv_sec = timeout;
1805 tv.tv_usec = 0;
1806 select(isoc + 1, &fds, NULL, NULL, &tv);
1807 if (FD_ISSET(isoc, &fds))
1808 return 1;
1809 else
1810 return 0;
1811 } else
1812 return 0;
1813 }
1814
1815 // idem, sauf qu'ici on peut choisir la taille max de données à recevoir
1816 // SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
1817 // en éliminant les cr (ex: header), arrêt si double-lf
1818 // SI bufl==-1 alors le buffer est censé être de 8kos, et on recoit ligne par ligne
1819 // en éliminant les cr (ex: header), arrêt si double-lf
1820 // Note: les +1 dans les malloc sont dûs à l'octet nul rajouté en fin de fichier
http_xfread1(htsblk * r,int bufl)1821 LLint http_xfread1(htsblk * r, int bufl) {
1822 int nl = -1;
1823
1824 // EOF
1825 if (r->totalsize >= 0 && r->size == r->totalsize) {
1826 return READ_EOF;
1827 }
1828
1829 if (bufl > 0) {
1830 if (!r->is_write) { // stocker en mémoire
1831 if (r->totalsize >= 0) { // totalsize déterminé ET ALLOUE
1832 if (r->adr == NULL) {
1833 r->adr = (char *) malloct((size_t) r->totalsize + 1);
1834 r->size = 0;
1835 }
1836 if (r->adr != NULL) {
1837 // lecture
1838 const size_t req_size = r->totalsize - r->size;
1839
1840 nl = req_size > 0 ? hts_read(r, r->adr + ((int) r->size), (int) req_size) : 0; /* NO 32 bit overlow possible here (no 4GB html!) */
1841 // nouvelle taille
1842 if (nl >= 0)
1843 r->size += nl;
1844
1845 /*
1846 if (r->size >= r->totalsize)
1847 nl = -1; // break
1848 */
1849
1850 r->adr[r->size] = '\0'; // caractère NULL en fin au cas où l'on traite des HTML
1851 }
1852
1853 } else { // inconnu..
1854 // réserver de la mémoire?
1855 if (r->adr == NULL) {
1856 #if HDEBUG
1857 printf("..alloc xfread\n");
1858 #endif
1859 r->adr = (char *) malloct(bufl + 1);
1860 r->size = 0;
1861 } else {
1862 #if HDEBUG
1863 printf("..realloc xfread1\n");
1864 #endif
1865 r->adr = (char *) realloct(r->adr, (int) r->size + bufl + 1);
1866 }
1867
1868 if (r->adr != NULL) {
1869 // lecture
1870 nl = hts_read(r, r->adr + (int) r->size, bufl);
1871 if (nl > 0) {
1872 // resize
1873 r->adr = (char *) realloct(r->adr, (int) r->size + nl + 1);
1874 // nouvelle taille
1875 r->size += nl;
1876 // octet nul
1877 if (r->adr)
1878 r->adr[r->size] = '\0';
1879
1880 } // sinon on a fini
1881 #if HDEBUG
1882 else if (nl < 0)
1883 printf("..end read (%d)\n", nl);
1884 #endif
1885 }
1886 #if HDEBUG
1887 else
1888 printf("..-> error\n");
1889 #endif
1890 }
1891
1892 // pas de adr=erreur
1893 if (r->adr == NULL)
1894 nl = READ_ERROR;
1895
1896 } else { // stocker sur disque
1897 char *buff;
1898
1899 buff = (char *) malloct(bufl);
1900 if (buff != NULL) {
1901 // lecture
1902 nl = hts_read(r, buff, bufl);
1903 // nouvelle taille
1904 if (nl > 0) {
1905 r->size += nl;
1906 if (fwrite(buff, 1, nl, r->out) != nl) {
1907 r->statuscode = STATUSCODE_INVALID;
1908 strcpybuff(r->msg, "Write error on disk");
1909 nl = READ_ERROR;
1910 }
1911 }
1912 //if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
1913 // nl=-1; // break
1914
1915 // libérer bloc tempo
1916 freet(buff);
1917 } else
1918 nl = READ_ERROR;
1919
1920 if ((nl < 0) && (r->out != NULL)) {
1921 fflush(r->out);
1922 }
1923
1924 } // stockage disque ou mémoire
1925
1926 } else if (bufl == -2) { // force reserve
1927 if (r->adr == NULL) {
1928 r->adr = (char *) malloct(8192);
1929 r->size = 0;
1930 return 0;
1931 }
1932 return -1;
1933 } else { // réception d'un en-tête octet par octet
1934 int count = 256;
1935 int tot_nl = 0;
1936 int lf_detected = 0;
1937 int at_beginning = 1;
1938
1939 do {
1940 nl = READ_INTERNAL_ERROR;
1941 count--;
1942 if (r->adr == NULL) {
1943 r->adr = (char *) malloct(8192);
1944 r->size = 0;
1945 }
1946 if (r->adr != NULL) {
1947 if (r->size < 8190) {
1948 // lecture
1949 nl = hts_read(r, r->adr + r->size, 1);
1950 if (nl > 0) {
1951 // exit if:
1952 // lf detected AND already detected before
1953 // or
1954 // lf detected AND first character read
1955 if (*(r->adr + r->size) == 10) {
1956 if (lf_detected || (at_beginning) || (bufl < 0))
1957 count = -1;
1958 lf_detected = 1;
1959 }
1960 if (*(r->adr + r->size) != 13) { // sauter caractères 13
1961 if ((*(r->adr + r->size) != 10)
1962 && (*(r->adr + r->size) != 13)
1963 ) {
1964 // restart for new line
1965 lf_detected = 0;
1966 }
1967 (r->size)++;
1968 at_beginning = 0;
1969 }
1970 *(r->adr + r->size) = '\0'; // terminer par octet nul
1971 }
1972 }
1973 }
1974 if (nl >= 0) {
1975 tot_nl += nl;
1976 if (!check_readinput(r))
1977 count = -1;
1978 }
1979 } while((nl >= 0) && (count > 0));
1980 if (nl >= 0) {
1981 nl = tot_nl;
1982 }
1983 }
1984 // EOF
1985 if (r->totalsize >= 0 && r->size == r->totalsize) {
1986 return READ_EOF;
1987 } else {
1988 return nl;
1989 }
1990 }
1991
1992 // teste si une URL (validité, header, taille)
1993 // retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
1994 // en cas de moved xx, dans location
1995 // abandonne désormais au bout de 30 secondes (aurevoir les sites
1996 // qui nous font poireauter 5 heures..) -> -2=timeout
http_test(httrackp * opt,const char * adr,const char * fil,char * loc)1997 htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
1998 T_SOC soc;
1999 htsblk retour;
2000
2001 //int rcvsize=-1;
2002 //char* rcv=NULL; // adresse de retour
2003 //int bufl=TAILLE_BUFFER; // 8Ko de buffer
2004 TStamp tl;
2005 int timeout = 30; // timeout pour un check (arbitraire) // **
2006
2007 // pour abandonner un site trop lent
2008 tl = time_local();
2009
2010 loc[0] = '\0';
2011 hts_init_htsblk(&retour);
2012 //memset(&retour, 0, sizeof(htsblk)); // effacer
2013 retour.location = loc; // si non nul, contiendra l'adresse véritable en cas de moved xx
2014
2015 //soc=http_fopen(adr,fil,&retour,NULL); // ouvrir, + header
2016
2017 // on ouvre en head, et on traite l'en tête
2018 soc = http_xfopen(opt, 1, 0, 1, NULL, adr, fil, &retour); // ouvrir HEAD, + envoi header
2019
2020 if (soc != INVALID_SOCKET) {
2021 int e = 0;
2022
2023 // tant qu'on a des données, et qu'on ne recoit pas deux LF, et que le timeout n'arrie pas
2024 do {
2025 if (http_xfread1(&retour, 0) < 0)
2026 e = 1;
2027 else {
2028 if (retour.adr != NULL) {
2029 if ((retour.adr[retour.size - 1] != 10)
2030 || (retour.adr[retour.size - 2] != 10))
2031 e = 1;
2032 }
2033 }
2034
2035 if (!e) {
2036 if ((time_local() - tl) >= timeout) {
2037 e = -1;
2038 }
2039 }
2040
2041 } while(!e);
2042
2043 if (e == 1) {
2044 if (adr != NULL) {
2045 int ptr = 0;
2046 char rcvd[1100];
2047
2048 // note: en gros recopie du traitement de back_wait()
2049 //
2050
2051 // ----------------------------------------
2052 // traiter en-tête!
2053 // status-line à récupérer
2054 ptr += binput(retour.adr + ptr, rcvd, 1024);
2055 if (strnotempty(rcvd) == 0)
2056 ptr += binput(retour.adr + ptr, rcvd, 1024); // "certains serveurs buggés envoient un \n au début" (RFC)
2057
2058 // traiter status-line
2059 treatfirstline(&retour, rcvd);
2060
2061 #if HDEBUG
2062 printf("(Buffer) Status-Code=%d\n", retour.statuscode);
2063 #endif
2064
2065 // en-tête
2066
2067 // header // ** !attention! HTTP/0.9 non supporté
2068 do {
2069 ptr += binput(retour.adr + ptr, rcvd, 1024);
2070 #if HDEBUG
2071 printf("(buffer)>%s\n", rcvd);
2072 #endif
2073 if (strnotempty(rcvd))
2074 treathead(NULL, NULL, NULL, &retour, rcvd); // traiter
2075
2076 } while(strnotempty(rcvd));
2077 // ----------------------------------------
2078
2079 // libérer mémoire
2080 if (retour.adr != NULL) {
2081 freet(retour.adr);
2082 retour.adr = NULL;
2083 }
2084 }
2085 } else {
2086 retour.statuscode = STATUSCODE_TIMEOUT;
2087 strcpybuff(retour.msg, "Timeout While Testing");
2088 }
2089
2090 #if HTS_DEBUG_CLOSESOCK
2091 DEBUG_W("http_test: deletehttp\n");
2092 #endif
2093 deletehttp(&retour);
2094 retour.soc = INVALID_SOCKET;
2095 }
2096 return retour;
2097 }
2098
2099 // Crée un lien (http) vers une adresse internet iadr
2100 // retour: structure (adresse, taille, message si erreur (si !adr))
2101 // peut ouvrir avec des connect() non bloquants: waitconnect=0/1
newhttp(httrackp * opt,const char * _iadr,htsblk * retour,int port,int waitconnect)2102 T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
2103 int waitconnect) {
2104 T_SOC soc; // descipteur de la socket
2105
2106 if (strcmp(_iadr, "file://") != 0) { /* non fichier */
2107 SOCaddr server;
2108 const char *error = "unknown error";
2109
2110 // tester un éventuel id:pass et virer id:pass@ si détecté
2111 const char *const iadr = jump_identification_const(_iadr);
2112
2113 SOCaddr_clear(server);
2114
2115 #if HDEBUG
2116 printf("gethostbyname\n");
2117 #endif
2118
2119 // tester un éventuel port
2120 if (port == -1) {
2121 const char *a = jump_toport_const(iadr);
2122
2123 #if HTS_USEOPENSSL
2124 if (retour->ssl)
2125 port = 443;
2126 else
2127 port = 80; // port par défaut
2128 #else
2129 port = 80; // port par défaut
2130 #endif
2131
2132 if (a != NULL) {
2133 char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
2134 int i = -1;
2135
2136 iadr2[0] = '\0';
2137 sscanf(a + 1, "%d", &i);
2138 if (i != -1) {
2139 port = (unsigned short int) i;
2140 }
2141
2142 // adresse véritable (sans :xx)
2143 strncatbuff(iadr2, iadr, (int) (a - iadr));
2144
2145 // adresse sans le :xx
2146 hts_dns_resolve2(opt, iadr2, &server, &error);
2147
2148 } else {
2149
2150 // adresse normale (port par défaut par la suite)
2151 hts_dns_resolve2(opt, iadr, &server, &error);
2152 }
2153
2154 } else { // port défini
2155 hts_dns_resolve2(opt, iadr, &server, &error);
2156 }
2157
2158 if (!SOCaddr_is_valid(server)) {
2159 #if DEBUG
2160 printf("erreur gethostbyname\n");
2161 #endif
2162 if (retour && retour->msg) {
2163 #ifdef _WIN32
2164 snprintf(retour->msg, sizeof(retour->msg),
2165 "Unable to get server's address: %s", error);
2166 #else
2167 snprintf(retour->msg, sizeof(retour->msg),
2168 "Unable to get server's address: %s", error);
2169 #endif
2170 }
2171 return INVALID_SOCKET;
2172 }
2173
2174 // make a copy for external clients
2175 SOCaddr_copy_SOCaddr(retour->address, server);
2176 retour->address_size = SOCaddr_size(retour->address);
2177
2178 // créer ("attachement") une socket (point d'accès) internet,en flot
2179 #if HDEBUG
2180 printf("socket\n");
2181 #endif
2182 #if HTS_WIDE_DEBUG
2183 DEBUG_W("socket\n");
2184 #endif
2185 soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
2186 if (retour != NULL) {
2187 retour->debugid = HTS_STAT.stat_sockid++;
2188 }
2189 #if HTS_WIDE_DEBUG
2190 DEBUG_W("socket()=%d\n" _(int) soc);
2191 #endif
2192 if (soc == INVALID_SOCKET) {
2193 if (retour && retour->msg) {
2194 #ifdef _WIN32
2195 int last_errno = WSAGetLastError();
2196
2197 sprintf(retour->msg, "Unable to create a socket: %s",
2198 strerror(last_errno));
2199 #else
2200 int last_errno = errno;
2201
2202 sprintf(retour->msg, "Unable to create a socket: %s",
2203 strerror(last_errno));
2204 #endif
2205 }
2206 return INVALID_SOCKET; // erreur création socket impossible
2207 }
2208 // bind this address
2209 if (retour != NULL && strnotempty(retour->req.proxy.bindhost)) {
2210 const char *error = "unknown error";
2211 SOCaddr bind_addr;
2212
2213 if (hts_dns_resolve2(opt, retour->req.proxy.bindhost,
2214 &bind_addr, &error) == NULL
2215 || bind(soc, &SOCaddr_sockaddr(bind_addr),
2216 SOCaddr_size(bind_addr)) != 0) {
2217 if (retour && retour->msg) {
2218 #ifdef _WIN32
2219 snprintf(retour->msg, sizeof(retour->msg),
2220 "Unable to bind the specificied server address: %s",
2221 error);
2222 #else
2223 snprintf(retour->msg, sizeof(retour->msg),
2224 "Unable to bind the specificied server address: %s",
2225 error);
2226 #endif
2227 }
2228 deletesoc(soc);
2229 return INVALID_SOCKET;
2230 }
2231 }
2232 // structure: connexion au domaine internet, port 80 (ou autre)
2233 SOCaddr_initport(server, port);
2234 #if HDEBUG
2235 printf("==%d\n", soc);
2236 #endif
2237
2238 // connexion non bloquante?
2239 if (!waitconnect) {
2240 #ifdef _WIN32
2241 unsigned long p = 1; // non bloquant
2242 if (ioctlsocket(soc, FIONBIO, &p)) {
2243 const int last_errno = WSAGetLastError();
2244 snprintf(retour->msg, sizeof(retour->msg),
2245 "Non-blocking socket failed: %s", strerror(last_errno));
2246 deletesoc(soc);
2247 return INVALID_SOCKET;
2248 }
2249 #else
2250 const int flags = fcntl(soc, F_GETFL, 0);
2251 if (flags == -1 || fcntl(soc, F_SETFL, flags | O_NONBLOCK) == -1) {
2252 snprintf(retour->msg, sizeof(retour->msg),
2253 "Non-blocking socket failed: %s", strerror(errno));
2254 deletesoc(soc);
2255 return INVALID_SOCKET;
2256 }
2257 #endif
2258 }
2259 // Connexion au serveur lui même
2260 #if HDEBUG
2261 printf("connect\n");
2262 #endif
2263 HTS_STAT.last_connect = mtime_local();
2264
2265 #if HTS_WIDE_DEBUG
2266 DEBUG_W("connect\n");
2267 #endif
2268 if (connect(soc, &SOCaddr_sockaddr(server), SOCaddr_size(server)) != 0) {
2269 // bloquant
2270 if (waitconnect) {
2271 #if HDEBUG
2272 printf("unable to connect!\n");
2273 #endif
2274 if (retour != NULL && retour->msg) {
2275 #ifdef _WIN32
2276 const int last_errno = WSAGetLastError();
2277
2278 sprintf(retour->msg, "Unable to connect to the server: %s",
2279 strerror(last_errno));
2280 #else
2281 const int last_errno = errno;
2282
2283 sprintf(retour->msg, "Unable to connect to the server: %s",
2284 strerror(last_errno));
2285 #endif
2286 }
2287 /* Close the socket and notify the error!!! */
2288 deletesoc(soc);
2289 return INVALID_SOCKET;
2290 }
2291 }
2292 #if HTS_WIDE_DEBUG
2293 DEBUG_W("connect done\n");
2294 #endif
2295
2296 #if HDEBUG
2297 printf("connexion établie\n");
2298 #endif
2299
2300 // A partir de maintenant, on peut envoyer et recevoir des données
2301 // via le flot identifié par soc (socket): write(soc,adr,taille) et
2302 // read(soc,adr,taille)
2303
2304 } else { // on doit ouvrir un fichier local!
2305 // il sera géré de la même manière qu'une socket (c'est idem!)
2306
2307 soc = LOCAL_SOCKET_ID; // pseudo-socket locale..
2308 // soc sera remplacé lors d'un http_fopen() par un handle véritable!
2309
2310 } // teste fichier local ou http
2311
2312 return soc;
2313 }
2314
2315 // couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
2316 // retour=-1 si erreur.
2317 // si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
ident_url_absolute(const char * url,lien_adrfil * adrfil)2318 int ident_url_absolute(const char *url, lien_adrfil *adrfil) {
2319 int pos = 0;
2320 int scheme = 0;
2321
2322 // effacer adrfil->adr et adrfil->fil
2323 adrfil->adr[0] = adrfil->fil[0] = '\0';
2324
2325 #if HDEBUG
2326 printf("protocol: %s\n", url);
2327 #endif
2328
2329 // Scheme?
2330 {
2331 const char *a = url;
2332
2333 while(isalpha((unsigned char) *a))
2334 a++;
2335 if (*a == ':')
2336 scheme = 1;
2337 }
2338
2339 // 1. optional scheme ":"
2340 if ((pos = strfield(url, "file:"))) { // fichier local!! (pour les tests)
2341 //!!p+=3;
2342 strcpybuff(adrfil->adr, "file://");
2343 } else if ((pos = strfield(url, "http:"))) { // HTTP
2344 //!!p+=3;
2345 } else if ((pos = strfield(url, "ftp:"))) { // FTP
2346 strcpybuff(adrfil->adr, "ftp://"); // FTP!!
2347 //!!p+=3;
2348 #if HTS_USEOPENSSL
2349 } else if ((pos = strfield(url, "https:"))) { // HTTPS
2350 strcpybuff(adrfil->adr, "https://");
2351 #endif
2352 } else if (scheme) {
2353 return -1; // erreur non reconnu
2354 } else
2355 pos = 0;
2356
2357 // 2. optional "//" authority
2358 if (strncmp(url + pos, "//", 2) == 0)
2359 pos += 2;
2360
2361 // (url+pos) now points to the path (not net path)
2362
2363 //## if (adrfil->adr[0]!=lOCAL_CHAR) { // adrfil->adresse normale http
2364 if (!strfield(adrfil->adr, "file:")) { // PAS adrfil->file://
2365 const char *p, *q;
2366
2367 p = url + pos;
2368
2369 // p pointe sur le début de l'adrfil->adresse, ex: www.truc.fr/sommaire/index.html
2370 q = strchr(jump_identification_const(p), '/');
2371 if (q == 0)
2372 q = strchr(jump_identification_const(p), '?'); // http://www.foo.com?bar=1
2373 if (q == 0)
2374 q = p + strlen(p); // pointe sur \0
2375 // q pointe sur le chemin, ex: index.html?query=recherche
2376
2377 // chemin www... trop long!!
2378 if ((((int) (q - p))) > HTS_URLMAXSIZE) {
2379 //strcpybuff(retour.msg,"Path too long");
2380 return -1; // erreur
2381 }
2382 // recopier adrfil->adresse www..
2383 strncatbuff(adrfil->adr, p, ((int) (q - p)));
2384 // *( adrfil->adr+( ((int) q) - ((int) p) ) )=0; // faut arrêter la fumette!
2385 // recopier chemin /pub/..
2386 if (q[0] != '/') // page par défaut (/)
2387 strcatbuff(adrfil->fil, "/");
2388 strcatbuff(adrfil->fil, q);
2389 // SECURITE:
2390 // simplifier url pour les ../
2391 fil_simplifie(adrfil->fil);
2392 } else { // localhost adrfil->file://
2393 const char *p;
2394 size_t i;
2395 char *a;
2396
2397 p = url + pos;
2398 if (*p == '/' || *p == '\\') { /* adrfil->file:///.. */
2399 strcatbuff(adrfil->fil, p); // fichier local ; adrfil->adr="#"
2400 } else {
2401 if (p[1] != ':') {
2402 strcatbuff(adrfil->fil, "//"); /* adrfil->file://server/foo */
2403 strcatbuff(adrfil->fil, p);
2404 } else {
2405 strcatbuff(adrfil->fil, p); // adrfil->file://C:\..
2406 }
2407 }
2408
2409 a = strchr(adrfil->fil, '?');
2410 if (a)
2411 *a = '\0'; /* couper query (inutile pour adrfil->file:// lors de la requête) */
2412 // adrfil->filtrer les \\ -> / pour les fichiers DOS
2413 for(i = 0; adrfil->fil[i] != '\0'; i++)
2414 if (adrfil->fil[i] == '\\')
2415 adrfil->fil[i] = '/';
2416 }
2417
2418 // no hostname
2419 if (!strnotempty(adrfil->adr))
2420 return -1; // erreur non reconnu
2421
2422 // nommer au besoin.. (non utilisé normalement)
2423 if (!strnotempty(adrfil->fil))
2424 strcpybuff(adrfil->fil, "default-index.html");
2425
2426 // case insensitive pour adrfil->adresse
2427 {
2428 char *a = jump_identification(adrfil->adr);
2429
2430 while(*a) {
2431 if ((*a >= 'A') && (*a <= 'Z'))
2432 *a += 'a' - 'A';
2433 a++;
2434 }
2435 }
2436
2437 return 0;
2438 }
2439
2440 /* simplify ../ and ./ */
fil_simplifie(char * f)2441 void fil_simplifie(char *f) {
2442 char *a, *b;
2443 char *rollback[128];
2444 int rollid = 0;
2445 char lc = '/';
2446 int query = 0;
2447 int wasAbsolute = (*f == '/');
2448
2449 for(a = b = f; *a != '\0';) {
2450 if (*a == '?')
2451 query = 1;
2452 if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '/') { /* foo/./bar or ./foo */
2453 a += 2;
2454 } else if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '.' && (a[2] == '/' || a[2] == '\0')) { /* foo/../bar or ../foo or .. */
2455 if (a[2] == '\0')
2456 a += 2;
2457 else
2458 a += 3;
2459 if (rollid > 1) {
2460 rollid--;
2461 b = rollback[rollid - 1];
2462 } else { /* too many ../ */
2463 rollid = 0;
2464 b = f;
2465 if (wasAbsolute)
2466 b++; /* after the / */
2467 }
2468 } else {
2469 *b++ = lc = *a;
2470 if (*a == '/') {
2471 rollback[rollid++] = b;
2472 if (rollid >= 127) {
2473 *f = '\0'; /* ERROR */
2474 break;
2475 }
2476 }
2477 a++;
2478 }
2479 }
2480 *b = '\0';
2481 if (*f == '\0') {
2482 if (wasAbsolute) {
2483 f[0] = '/';
2484 f[1] = '\0';
2485 } else {
2486 f[0] = '.';
2487 f[1] = '/';
2488 f[2] = '\0';
2489 }
2490 }
2491 }
2492
2493 // fermer liaison fichier ou socket
deletehttp(htsblk * r)2494 void deletehttp(htsblk * r) {
2495 #if HTS_DEBUG_CLOSESOCK
2496 DEBUG_W("deletehttp: (htsblk*) 0x%p\n" _(void *)r);
2497 #endif
2498 #if HTS_USEOPENSSL
2499 /* Free OpenSSL structures */
2500 if (r->ssl_con) {
2501 SSL_shutdown(r->ssl_con);
2502 SSL_free(r->ssl_con);
2503 r->ssl_con = NULL;
2504 }
2505 #endif
2506 if (r->soc != INVALID_SOCKET) {
2507 if (r->is_file) {
2508 if (r->fp)
2509 fclose(r->fp);
2510 r->fp = NULL;
2511 } else {
2512 if (r->soc != LOCAL_SOCKET_ID)
2513 deletesoc_r(r);
2514 }
2515 r->soc = INVALID_SOCKET;
2516 }
2517 }
2518
2519 // free the addr buffer
2520 // always returns 1
deleteaddr(htsblk * r)2521 int deleteaddr(htsblk * r) {
2522 if (r->adr != NULL) {
2523 freet(r->adr);
2524 r->adr = NULL;
2525 }
2526 if (r->headers != NULL) {
2527 freet(r->headers);
2528 r->headers = NULL;
2529 }
2530 return 1;
2531 }
2532
2533 // fermer une socket
deletesoc(T_SOC soc)2534 void deletesoc(T_SOC soc) {
2535 if (soc != INVALID_SOCKET && soc != LOCAL_SOCKET_ID) {
2536 #if HTS_WIDE_DEBUG
2537 DEBUG_W("close %d\n" _(int) soc);
2538 #endif
2539 #ifdef _WIN32
2540 if (closesocket(soc) != 0) {
2541 int err = WSAGetLastError();
2542
2543 fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
2544 }
2545 #else
2546 if (close(soc) != 0) {
2547 const int err = errno;
2548
2549 fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
2550 }
2551 #endif
2552 #if HTS_WIDE_DEBUG
2553 DEBUG_W(".. done\n");
2554 #endif
2555 }
2556 }
2557
2558 /* Will also clean other things */
deletesoc_r(htsblk * r)2559 void deletesoc_r(htsblk * r) {
2560 #if HTS_USEOPENSSL
2561 if (r->ssl_con) {
2562 SSL_shutdown(r->ssl_con);
2563 // SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
2564 SSL_free(r->ssl_con);
2565 r->ssl_con = NULL;
2566 }
2567 #endif
2568 if (r->soc != INVALID_SOCKET) {
2569 deletesoc(r->soc);
2570 r->soc = INVALID_SOCKET;
2571 }
2572 }
2573
2574 // renvoi le nombre de secondes depuis 1970
time_local(void)2575 TStamp time_local(void) {
2576 return ((TStamp) time(NULL));
2577 }
2578
2579 // number of millisec since 1970
mtime_local(void)2580 HTSEXT_API TStamp mtime_local(void) {
2581 #ifndef HTS_DO_NOT_USE_FTIME
2582 #ifndef _WIN32
2583 struct timeval tv;
2584 if (gettimeofday(&tv, NULL) != 0) {
2585 assert(! "gettimeofday");
2586 }
2587
2588 return (TStamp) (((TStamp) tv.tv_sec * (TStamp) 1000)
2589 + ((TStamp) tv.tv_usec / (TStamp) 1000000));
2590 #else
2591 struct timeb B;
2592 ftime(&B);
2593 return (TStamp) (((TStamp) B.time * (TStamp) 1000)
2594 + ((TStamp) B.millitm));
2595 #endif
2596 #else
2597 // not precise..
2598 return (TStamp) (((TStamp) time_local() * (TStamp) 1000)
2599 + ((TStamp) 0));
2600 #endif
2601 }
2602
2603 // convertit un nombre de secondes en temps (chaine)
sec2str(char * st,TStamp t)2604 void sec2str(char *st, TStamp t) {
2605 int j, h, m, s;
2606
2607 j = (int) (t / (3600 * 24));
2608 t -= ((TStamp) j) * (3600 * 24);
2609 h = (int) (t / (3600));
2610 t -= ((TStamp) h) * 3600;
2611 m = (int) (t / 60);
2612 t -= ((TStamp) m) * 60;
2613 s = (int) t;
2614
2615 if (j > 0)
2616 sprintf(st, "%d days, %d hours %d minutes %d seconds", j, h, m, s);
2617 else if (h > 0)
2618 sprintf(st, "%d hours %d minutes %d seconds", h, m, s);
2619 else if (m > 0)
2620 sprintf(st, "%d minutes %d seconds", m, s);
2621 else
2622 sprintf(st, "%d seconds", s);
2623 }
2624
2625 // idem, plus court (chaine)
qsec2str(char * st,TStamp t)2626 HTSEXT_API void qsec2str(char *st, TStamp t) {
2627 int j, h, m, s;
2628
2629 j = (int) (t / (3600 * 24));
2630 t -= ((TStamp) j) * (3600 * 24);
2631 h = (int) (t / (3600));
2632 t -= ((TStamp) h) * 3600;
2633 m = (int) (t / 60);
2634 t -= ((TStamp) m) * 60;
2635 s = (int) t;
2636
2637 if (j > 0)
2638 sprintf(st, "%dd,%02dh,%02dmin%02ds", j, h, m, s);
2639 else if (h > 0)
2640 sprintf(st, "%dh,%02dmin%02ds", h, m, s);
2641 else if (m > 0)
2642 sprintf(st, "%dmin%02ds", m, s);
2643 else
2644 sprintf(st, "%ds", s);
2645 }
2646
2647 // heure actuelle, GMT, format rfc (taille buffer 256o)
time_gmt_rfc822(char * s)2648 void time_gmt_rfc822(char *s) {
2649 time_t tt;
2650 struct tm *A;
2651
2652 tt = time(NULL);
2653 A = gmtime(&tt);
2654 if (A == NULL)
2655 A = localtime(&tt);
2656 time_rfc822(s, A);
2657 }
2658
2659 // heure actuelle, format rfc (taille buffer 256o)
time_local_rfc822(char * s)2660 void time_local_rfc822(char *s) {
2661 time_t tt;
2662 struct tm *A;
2663
2664 tt = time(NULL);
2665 A = localtime(&tt);
2666 time_rfc822_local(s, A);
2667 }
2668
2669 /* convertir une chaine en temps */
convert_time_rfc822(struct tm * result,const char * s)2670 struct tm *convert_time_rfc822(struct tm *result, const char *s) {
2671 char months[] = "jan feb mar apr may jun jul aug sep oct nov dec";
2672 char str[256];
2673 char *a;
2674
2675 /* */
2676 int result_mm = -1;
2677 int result_dd = -1;
2678 int result_n1 = -1;
2679 int result_n2 = -1;
2680 int result_n3 = -1;
2681 int result_n4 = -1;
2682
2683 /* */
2684
2685 if ((int) strlen(s) > 200)
2686 return NULL;
2687 strcpybuff(str, s);
2688 hts_lowcase(str);
2689 /* éliminer :,- */
2690 while((a = strchr(str, '-')))
2691 *a = ' ';
2692 while((a = strchr(str, ':')))
2693 *a = ' ';
2694 while((a = strchr(str, ',')))
2695 *a = ' ';
2696 /* tokeniser */
2697 a = str;
2698 while(*a) {
2699 char *first, *last;
2700 char tok[256];
2701
2702 /* découper mot */
2703 while(*a == ' ')
2704 a++; /* sauter espaces */
2705 first = a;
2706 while((*a) && (*a != ' '))
2707 a++;
2708 last = a;
2709 tok[0] = '\0';
2710 if (first != last) {
2711 char *pos;
2712
2713 strncatbuff(tok, first, (int) (last - first));
2714 /* analyser */
2715 if ((pos = strstr(months, tok))) { /* month always in letters */
2716 result_mm = ((int) (pos - months)) / 4;
2717 } else {
2718 int number;
2719
2720 if (sscanf(tok, "%d", &number) == 1) { /* number token */
2721 if (result_dd < 0) /* day always first number */
2722 result_dd = number;
2723 else if (result_n1 < 0)
2724 result_n1 = number;
2725 else if (result_n2 < 0)
2726 result_n2 = number;
2727 else if (result_n3 < 0)
2728 result_n3 = number;
2729 else if (result_n4 < 0)
2730 result_n4 = number;
2731 } /* sinon, bruit de fond(+1GMT for exampel) */
2732 }
2733 }
2734 }
2735 if ((result_n1 >= 0) && (result_mm >= 0) && (result_dd >= 0)
2736 && (result_n2 >= 0) && (result_n3 >= 0) && (result_n4 >= 0)) {
2737 if (result_n4 >= 1000) { /* Sun Nov 6 08:49:37 1994 */
2738 result->tm_year = result_n4 - 1900;
2739 result->tm_hour = result_n1;
2740 result->tm_min = result_n2;
2741 result->tm_sec = max(result_n3, 0);
2742 } else { /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
2743 result->tm_hour = result_n2;
2744 result->tm_min = result_n3;
2745 result->tm_sec = max(result_n4, 0);
2746 if (result_n1 <= 50) /* 00 means 2000 */
2747 result->tm_year = result_n1 + 100;
2748 else if (result_n1 < 1000) /* 99 means 1999 */
2749 result->tm_year = result_n1;
2750 else /* 2000 */
2751 result->tm_year = result_n1 - 1900;
2752 }
2753 result->tm_isdst = 0; /* assume GMT */
2754 result->tm_yday = -1; /* don't know */
2755 result->tm_wday = -1; /* don't know */
2756 result->tm_mon = result_mm;
2757 result->tm_mday = result_dd;
2758 return result;
2759 }
2760 return NULL;
2761 }
2762
getGMT(struct tm * tm)2763 static time_t getGMT(struct tm *tm) { /* hey, time_t is local! */
2764 time_t t = mktime(tm);
2765
2766 if (t != (time_t) - 1 && t != (time_t) 0) {
2767 /* BSD does not have static "timezone" declared */
2768 #if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__))
2769 time_t now = time(NULL);
2770 time_t timezone = -localtime(&now)->tm_gmtoff;
2771 #endif
2772 return (time_t) (t - timezone);
2773 }
2774 return (time_t) - 1;
2775 }
2776
2777 /* sets file time. -1 if error */
2778 /* Note: utf-8 */
set_filetime(const char * file,struct tm * tm_time)2779 int set_filetime(const char *file, struct tm *tm_time) {
2780 time_t t = getGMT(tm_time);
2781
2782 if (t != (time_t) - 1) {
2783 STRUCT_UTIMBUF tim;
2784
2785 memset(&tim, 0, sizeof(tim));
2786 tim.actime = tim.modtime = t;
2787 return UTIME(file, &tim);
2788 }
2789 return -1;
2790 }
2791
2792 /* sets file time from RFC822 date+time, -1 if error*/
2793 /* Note: utf-8 */
set_filetime_rfc822(const char * file,const char * date)2794 int set_filetime_rfc822(const char *file, const char *date) {
2795 struct tm buffer;
2796 struct tm *tm_s = convert_time_rfc822(&buffer, date);
2797
2798 if (tm_s) {
2799 return set_filetime(file, tm_s);
2800 } else
2801 return -1;
2802 }
2803
2804 /* Note: utf-8 */
get_filetime_rfc822(const char * file,char * date)2805 int get_filetime_rfc822(const char *file, char *date) {
2806 STRUCT_STAT buf;
2807
2808 date[0] = '\0';
2809 if (STAT(file, &buf) == 0) {
2810 struct tm *A;
2811 time_t tt = buf.st_mtime;
2812
2813 A = gmtime(&tt);
2814 if (A == NULL)
2815 A = localtime(&tt);
2816 if (A != NULL) {
2817 time_rfc822(date, A);
2818 return 1;
2819 }
2820 }
2821 return 0;
2822 }
2823
2824 // heure au format rfc (taille buffer 256o)
time_rfc822(char * s,struct tm * A)2825 void time_rfc822(char *s, struct tm *A) {
2826 if (A == NULL) {
2827 int localtime_returned_null = 0;
2828
2829 assertf(localtime_returned_null);
2830 }
2831 strftime(s, 256, "%a, %d %b %Y %H:%M:%S GMT", A);
2832 }
2833
2834 // heure locale au format rfc (taille buffer 256o)
time_rfc822_local(char * s,struct tm * A)2835 void time_rfc822_local(char *s, struct tm *A) {
2836 if (A == NULL) {
2837 int localtime_returned_null = 0;
2838
2839 assertf(localtime_returned_null);
2840 }
2841 strftime(s, 256, "%a, %d %b %Y %H:%M:%S", A);
2842 }
2843
2844 // conversion en b,Kb,Mb
int2bytes(strc_int2bytes2 * strc,LLint n)2845 HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n) {
2846 char **a = int2bytes2(strc, n);
2847
2848 strcpybuff(strc->catbuff, a[0]);
2849 strcatbuff(strc->catbuff, a[1]);
2850 return strc->catbuff;
2851 }
2852
2853 // conversion en b/s,Kb/s,Mb/s
int2bytessec(strc_int2bytes2 * strc,long int n)2854 HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n) {
2855 char buff[256];
2856 char **a = int2bytes2(strc, n);
2857
2858 strcpybuff(buff, a[0]);
2859 strcatbuff(buff, a[1]);
2860 return concat(strc->catbuff, sizeof(strc->catbuff), buff, "/s");
2861 }
int2char(strc_int2bytes2 * strc,int n)2862 HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n) {
2863 sprintf(strc->buff2, "%d", n);
2864 return strc->buff2;
2865 }
2866
2867 // conversion en b,Kb,Mb, nombre et type séparés
2868 // limite: 2.10^9.10^6B
2869
2870 /* See http://physics.nist.gov/cuu/Units/binary.html */
2871 #define ToLLint(a) ((LLint)(a))
2872 #define ToLLintKiB (ToLLint(1024))
2873 #define ToLLintMiB (ToLLintKiB*ToLLintKiB)
2874 #ifdef HTS_LONGLONG
2875 #define ToLLintGiB (ToLLintKiB*ToLLintKiB*ToLLintKiB)
2876 #define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
2877 #define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
2878 #endif
int2bytes2(strc_int2bytes2 * strc,LLint n)2879 HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n) {
2880 if (n < ToLLintKiB) {
2881 sprintf(strc->buff1, "%d", (int) (LLint) n);
2882 strcpybuff(strc->buff2, "B");
2883 } else if (n < ToLLintMiB) {
2884 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / ToLLintKiB)),
2885 (int) ((LLint) ((n % ToLLintKiB) * 100) / ToLLintKiB));
2886 strcpybuff(strc->buff2, "KiB");
2887 }
2888 #ifdef HTS_LONGLONG
2889 else if (n < ToLLintGiB) {
2890 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
2891 (int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
2892 strcpybuff(strc->buff2, "MiB");
2893 } else if (n < ToLLintTiB) {
2894 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintGiB))),
2895 (int) ((LLint) (((n % (ToLLintGiB)) * 100) / (ToLLintGiB))));
2896 strcpybuff(strc->buff2, "GiB");
2897 } else if (n < ToLLintPiB) {
2898 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintTiB))),
2899 (int) ((LLint) (((n % (ToLLintTiB)) * 100) / (ToLLintTiB))));
2900 strcpybuff(strc->buff2, "TiB");
2901 } else {
2902 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintPiB))),
2903 (int) ((LLint) (((n % (ToLLintPiB)) * 100) / (ToLLintPiB))));
2904 strcpybuff(strc->buff2, "PiB");
2905 }
2906 #else
2907 else {
2908 sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
2909 (int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
2910 strcpybuff(strc->buff2, "MiB");
2911 }
2912 #endif
2913 strc->buffadr[0] = strc->buff1;
2914 strc->buffadr[1] = strc->buff2;
2915 return strc->buffadr;
2916 }
2917
2918 #ifdef _WIN32
2919 #else
2920 // ignore sigpipe?
sig_ignore_flag(int setflag)2921 int sig_ignore_flag(int setflag) { // flag ignore
2922 static int flag = 0; /* YES, this one is true static */
2923
2924 if (setflag >= 0)
2925 flag = setflag;
2926 return flag;
2927 }
2928 #endif
2929
2930 // envoi de texte (en têtes généralement) sur la socket soc
sendc(htsblk * r,const char * s)2931 int sendc(htsblk * r, const char *s) {
2932 int n, ssz = (int) strlen(s);
2933
2934 #ifdef _WIN32
2935 #else
2936 sig_ignore_flag(1);
2937 #endif
2938 #if HDEBUG
2939 write(0, s, ssz);
2940 #endif
2941
2942 #if HTS_USEOPENSSL
2943 if (r->ssl) {
2944 n = SSL_write(r->ssl_con, s, ssz);
2945 } else
2946 #endif
2947 n = send(r->soc, s, ssz, 0);
2948
2949 #ifdef _WIN32
2950 #else
2951 sig_ignore_flag(0);
2952 #endif
2953
2954 return (n == ssz) ? n : -1;
2955 }
2956
2957 // Remplace read
finput(T_SOC fd,char * s,int max)2958 int finput(T_SOC fd, char *s, int max) {
2959 char c;
2960 int j = 0;
2961
2962 do {
2963 //c=fgetc(fp);
2964 if (read((int) fd, &c, 1) <= 0) {
2965 c = 0;
2966 }
2967 if (c != 0) {
2968 switch (c) {
2969 case 10:
2970 c = 0;
2971 break;
2972 case 13:
2973 break; // sauter ces caractères
2974 default:
2975 s[j++] = c;
2976 break;
2977 }
2978 }
2979 } while((c != 0) && (j < max - 1));
2980 s[j] = '\0';
2981 return j;
2982 }
2983
2984 // Like linput, but in memory (optimized)
binput(char * buff,char * s,int max)2985 int binput(char *buff, char *s, int max) {
2986 int count = 0;
2987 int destCount = 0;
2988
2989 // Note: \0 will return 1
2990 while(destCount < max && buff != NULL && buff[count] != '\0'
2991 && buff[count] != '\n') {
2992 if (buff[count] != '\r') {
2993 s[destCount++] = buff[count];
2994 }
2995 count++;
2996 }
2997 s[destCount] = '\0';
2998
2999 // then return the supplemental jump offset
3000 return count + 1;
3001 }
3002
3003 // Lecture d'une ligne (peut être unicode à priori)
linput(FILE * fp,char * s,int max)3004 int linput(FILE * fp, char *s, int max) {
3005 int c;
3006 int j = 0;
3007
3008 do {
3009 c = fgetc(fp);
3010 if (c != EOF) {
3011 switch (c) {
3012 case 13:
3013 break; // sauter CR
3014 case 10:
3015 c = -1;
3016 break;
3017 case 9:
3018 case 12:
3019 break; // sauter ces caractères
3020 default:
3021 s[j++] = (char) c;
3022 break;
3023 }
3024 }
3025 } while((c != -1) && (c != EOF) && (j < (max - 1)));
3026 s[j] = '\0';
3027 return j;
3028 }
linputsoc(T_SOC soc,char * s,int max)3029 int linputsoc(T_SOC soc, char *s, int max) {
3030 int c;
3031 int j = 0;
3032
3033 do {
3034 unsigned char ch;
3035
3036 if (recv(soc, &ch, 1, 0) == 1) {
3037 c = ch;
3038 } else {
3039 c = EOF;
3040 }
3041 if (c != EOF) {
3042 switch (c) {
3043 case 13:
3044 break; // sauter CR
3045 case 10:
3046 c = -1;
3047 break;
3048 case 9:
3049 case 12:
3050 break; // sauter ces caractères
3051 default:
3052 s[j++] = (char) c;
3053 break;
3054 }
3055 }
3056 } while((c != -1) && (c != EOF) && (j < (max - 1)));
3057 s[j] = '\0';
3058 return j;
3059 }
linputsoc_t(T_SOC soc,char * s,int max,int timeout)3060 int linputsoc_t(T_SOC soc, char *s, int max, int timeout) {
3061 if (check_readinput_t(soc, timeout)) {
3062 return linputsoc(soc, s, max);
3063 }
3064 return -1;
3065 }
linput_trim(FILE * fp,char * s,int max)3066 int linput_trim(FILE * fp, char *s, int max) {
3067 int rlen = 0;
3068 char *ls = (char *) malloct(max + 1);
3069
3070 s[0] = '\0';
3071 if (ls) {
3072 char *a;
3073
3074 // lire ligne
3075 rlen = linput(fp, ls, max);
3076 if (rlen) {
3077 // sauter espaces et tabs en fin
3078 while((rlen > 0)
3079 && ((ls[max(rlen - 1, 0)] == ' ')
3080 || (ls[max(rlen - 1, 0)] == '\t')))
3081 ls[--rlen] = '\0';
3082 // sauter espaces en début
3083 a = ls;
3084 while((rlen > 0) && ((*a == ' ') || (*a == '\t'))) {
3085 a++;
3086 rlen--;
3087 }
3088 if (rlen > 0) {
3089 memcpy(s, a, rlen); // can copy \0 chars
3090 s[rlen] = '\0';
3091 }
3092 }
3093 //
3094 freet(ls);
3095 }
3096 return rlen;
3097 }
linput_cpp(FILE * fp,char * s,int max)3098 int linput_cpp(FILE * fp, char *s, int max) {
3099 int rlen = 0;
3100
3101 s[0] = '\0';
3102 do {
3103 int ret;
3104
3105 if (rlen > 0)
3106 if (s[rlen - 1] == '\\')
3107 s[--rlen] = '\0'; // couper \ final
3108 // lire ligne
3109 ret = linput_trim(fp, s + rlen, max - rlen);
3110 if (ret > 0)
3111 rlen += ret;
3112 } while((s[max(rlen - 1, 0)] == '\\') && (rlen < max));
3113 return rlen;
3114 }
3115
3116 // idem avec les car spéciaux
rawlinput(FILE * fp,char * s,int max)3117 void rawlinput(FILE * fp, char *s, int max) {
3118 int c;
3119 int j = 0;
3120
3121 do {
3122 c = fgetc(fp);
3123 if (c != EOF) {
3124 switch (c) {
3125 case 13:
3126 break; // sauter CR
3127 case 10:
3128 c = -1;
3129 break;
3130 default:
3131 s[j++] = (char) c;
3132 break;
3133 }
3134 }
3135 } while((c != -1) && (c != EOF) && (j < (max - 1)));
3136 s[j++] = '\0';
3137 }
3138
3139 //cherche chaine, case insensitive
strstrcase(const char * s,const char * o)3140 const char *strstrcase(const char *s, const char *o) {
3141 while(*s && strfield(s, o) == 0)
3142 s++;
3143 if (*s == '\0')
3144 return NULL;
3145 return s;
3146 }
3147
3148 // Unicode detector
3149 // See http://www.unicode.org/unicode/reports/tr28/
3150 // (sect Table 3.1B. Legal UTF-8 Byte Sequences)
3151 typedef struct {
3152 unsigned int pos;
3153 unsigned char data[4];
3154 } t_auto_seq;
3155
3156 // char between a and b
3157 #define CHAR_BETWEEN(c, a, b) ( (c) >= 0x##a ) && ( (c) <= 0x##b )
3158 // sequence start
3159 #define SEQBEG ( inseq == 0 )
3160 // in this block
3161 #define BLK(n,a, b) ( (seq.pos >= n) && ((err = CHAR_BETWEEN(seq.data[n], a, b))) )
3162 #define ELT(n,a) BLK(n,a,a)
3163 // end
3164 #define SEQEND ((ok = 1))
3165 // sequence started, character will fail if error
3166 #define IN_SEQ ( (inseq = 1) )
3167 // decoding error
3168 #define BAD_SEQ ( (ok == 0) && (inseq != 0) && (!err) )
3169 // no sequence started
3170 #define NO_SEQ ( inseq == 0 )
3171
3172 // is this block an UTF unicode textfile?
3173 // 0 : no
3174 // 1 : yes
3175 // -1: don't know
is_unicode_utf8(const char * buffer_,const size_t size)3176 int is_unicode_utf8(const char *buffer_, const size_t size) {
3177 const unsigned char *buffer = (const unsigned char *) buffer_;
3178 t_auto_seq seq;
3179 size_t i;
3180 int is_utf = -1;
3181
3182 RUNTIME_TIME_CHECK_SIZE(size);
3183
3184 seq.pos = 0;
3185 for(i = 0; i < size; i++) {
3186 unsigned int ok = 0;
3187 unsigned int inseq = 0;
3188 unsigned int err = 0;
3189
3190 seq.data[seq.pos] = buffer[i];
3191 /**/ if (SEQBEG && BLK(0, 00, 7F) && IN_SEQ && SEQEND) {
3192 } else if (SEQBEG && BLK(0, C2, DF) && IN_SEQ && BLK(1, 80, BF) && SEQEND) {
3193 } else if (SEQBEG && ELT(0, E0) && IN_SEQ && BLK(1, A0, BF)
3194 && BLK(2, 80, BF) && SEQEND) {
3195 } else if (SEQBEG && BLK(0, E1, EC) && IN_SEQ && BLK(1, 80, BF)
3196 && BLK(2, 80, BF) && SEQEND) {
3197 } else if (SEQBEG && ELT(0, ED) && IN_SEQ && BLK(1, 80, 9F)
3198 && BLK(2, 80, BF) && SEQEND) {
3199 } else if (SEQBEG && BLK(0, EE, EF) && IN_SEQ && BLK(1, 80, BF)
3200 && BLK(2, 80, BF) && SEQEND) {
3201 } else if (SEQBEG && ELT(0, F0) && IN_SEQ && BLK(1, 90, BF)
3202 && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3203 } else if (SEQBEG && BLK(0, F1, F3) && IN_SEQ && BLK(1, 80, BF)
3204 && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3205 } else if (SEQBEG && ELT(0, F4) && IN_SEQ && BLK(1, 80, 8F)
3206 && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3207 } else if (NO_SEQ) { // bad, unknown
3208 return 0;
3209 }
3210 /* */
3211
3212 /* Error */
3213 if (BAD_SEQ) {
3214 return 0;
3215 }
3216
3217 /* unicode character */
3218 if (seq.pos > 0)
3219 is_utf = 1;
3220
3221 /* Next */
3222 if (ok)
3223 seq.pos = 0;
3224 else
3225 seq.pos++;
3226
3227 /* Internal error */
3228 if (seq.pos >= 4)
3229 return 0;
3230
3231 }
3232
3233 return is_utf;
3234 }
3235
map_characters(unsigned char * buffer,unsigned int size,unsigned int * map)3236 void map_characters(unsigned char *buffer, unsigned int size, unsigned int *map) {
3237 unsigned int i;
3238
3239 memset(map, 0, sizeof(unsigned int) * 256);
3240 for(i = 0; i < size; i++) {
3241 map[buffer[i]]++;
3242 }
3243 }
3244
3245 // le fichier est-il un fichier html?
3246 // 0 : non
3247 // 1 : oui
3248 // -1 : on sait pas
3249 // -2 : on sait pas, pas d'extension
ishtml(httrackp * opt,const char * fil)3250 int ishtml(httrackp * opt, const char *fil) {
3251 /* User-defined MIME types (overrides ishtml()) */
3252 char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
3253 char mime[256];
3254 char *a;
3255
3256 strcpybuff(fil_noquery, fil);
3257 if ((a = strchr(fil_noquery, '?')) != NULL) {
3258 *a = '\0';
3259 }
3260 if (get_userhttptype(opt, mime, fil_noquery)) {
3261 if (is_html_mime_type(mime)) {
3262 return 1;
3263 } else {
3264 return 0;
3265 }
3266 }
3267
3268 if (!strnotempty(fil_noquery)) {
3269 return -2;
3270 }
3271
3272 /* Search for known ext */
3273 for(a = fil_noquery + strlen(fil_noquery) - 1;
3274 *a != '.' && *a != '/' && a > fil_noquery; a--) ;
3275 if (*a == '.') { // a une extension
3276 char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
3277 char *b;
3278 int ret;
3279 char *dotted = a;
3280
3281 fil_noquery[0] = '\0';
3282 a++; // pointer sur extension
3283 strncatbuff(fil_noquery, a, HTS_URLMAXSIZE);
3284 b = strchr(fil_noquery, '?');
3285 if (b)
3286 *b = '\0';
3287 ret = ishtml_ext(fil_noquery); // retour
3288 if (ret == -1) {
3289 switch (is_knowntype(opt, dotted)) {
3290 case 1:
3291 ret = 0; // connu, non html
3292 break;
3293 case 2:
3294 ret = 1; // connu, html
3295 break;
3296 default:
3297 ret = -1; // inconnu..
3298 break;
3299 }
3300 }
3301 return ret;
3302 } else
3303 return -2; // indéterminé, par exemple /truc
3304 }
3305
3306 // idem, mais pour uniquement l'extension
ishtml_ext(const char * a)3307 int ishtml_ext(const char *a) {
3308 int html = 0;
3309
3310 //
3311 if (strfield2(a, "html"))
3312 html = 1;
3313 else if (strfield2(a, "htm"))
3314 html = 1;
3315 else if (strfield2(a, "shtml"))
3316 html = 1;
3317 else if (strfield2(a, "phtml"))
3318 html = 1;
3319 else if (strfield2(a, "htmlx"))
3320 html = 1;
3321 else if (strfield2(a, "shtm"))
3322 html = 1;
3323 else if (strfield2(a, "phtm"))
3324 html = 1;
3325 else if (strfield2(a, "htmx"))
3326 html = 1;
3327 //
3328 // insuccès..
3329 else {
3330 #if 1
3331 html = -1; // inconnu..
3332 #else
3333 // XXXXXX not suitable (ext)
3334 switch (is_knownext(a)) {
3335 case 1:
3336 html = 0; // connu, non html
3337 break;
3338 case 2:
3339 html = 1; // connu, html
3340 break;
3341 default:
3342 html = -1; // inconnu..
3343 break;
3344 }
3345 #endif
3346 }
3347 return html;
3348 }
3349
3350 // error (404,500..)
ishttperror(int err)3351 int ishttperror(int err) {
3352 switch (err / 100) {
3353 case 4:
3354 case 5:
3355 return 1;
3356 break;
3357 }
3358 return 0;
3359 }
3360
3361 /* Declare a non-const version of FUN */
3362 #define DECLARE_NON_CONST_VERSION(FUN) \
3363 char *FUN(char *source) { \
3364 const char *const ret = FUN ##_const(source); \
3365 return ret != NULL ? source + ( ret - source ) : NULL; \
3366 }
3367
3368 // retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
3369 // une identification
jump_identification_const(const char * source)3370 HTSEXT_API const char *jump_identification_const(const char *source) {
3371 const char *a, *trytofind;
3372
3373 if (strcmp(source, "file://") == 0)
3374 return source;
3375 // rechercher dernier @ (car parfois email transmise dans adresse!)
3376 // mais sauter ftp:// éventuel
3377 a = jump_protocol_const(source);
3378 trytofind = strrchr_limit(a, '@', strchr(a, '/'));
3379 return trytofind != NULL ? trytofind : a;
3380 }
3381
DECLARE_NON_CONST_VERSION(jump_identification)3382 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_identification)
3383
3384 HTSEXT_API const char *jump_normalized_const(const char *source) {
3385 if (strcmp(source, "file://") == 0)
3386 return source;
3387 source = jump_identification_const(source);
3388 if (strfield(source, "www") && source[3] != '\0') {
3389 if (source[3] == '.') { // www.foo.com -> foo.com
3390 source += 4;
3391 } else { // www-4.foo.com -> foo.com
3392 const char *a = source + 3;
3393
3394 while(*a && (isdigit(*a) || *a == '-'))
3395 a++;
3396 if (*a == '.') {
3397 source = a + 1;
3398 }
3399 }
3400 }
3401 return source;
3402 }
3403
DECLARE_NON_CONST_VERSION(jump_normalized)3404 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_normalized)
3405
3406 static int sortNormFnc(const void *a_, const void *b_) {
3407 const char *const*const a = (const char *const*) a_;
3408 const char *const*const b = (const char *const*) b_;
3409
3410 return strcmp(*a + 1, *b + 1);
3411 }
3412
fil_normalized(const char * source,char * dest)3413 HTSEXT_API char *fil_normalized(const char *source, char *dest) {
3414 char lastc = 0;
3415 int gotquery = 0;
3416 int ampargs = 0;
3417 size_t i, j;
3418 char *query = NULL;
3419
3420 for(i = j = 0; source[i] != '\0'; i++) {
3421 if (!gotquery && source[i] == '?')
3422 gotquery = ampargs = 1;
3423 if ((!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
3424 ) {
3425 } else {
3426 if (gotquery && source[i] == '&') {
3427 ampargs++;
3428 }
3429 dest[j++] = source[i];
3430 }
3431 lastc = source[i];
3432 }
3433 dest[j++] = '\0';
3434
3435 /* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
3436 if (ampargs > 1) {
3437 char **amps = malloct(ampargs * sizeof(char *));
3438 char *copyBuff = NULL;
3439 size_t qLen = 0;
3440
3441 assertf(amps != NULL);
3442 gotquery = 0;
3443 for(i = j = 0; dest[i] != '\0'; i++) {
3444 if ((gotquery && dest[i] == '&') || (!gotquery && dest[i] == '?')) {
3445 if (!gotquery) {
3446 gotquery = 1;
3447 query = &dest[i];
3448 qLen = strlen(query);
3449 }
3450 assertf(j < ampargs);
3451 amps[j++] = &dest[i];
3452 dest[i] = '\0';
3453 }
3454 }
3455 assertf(gotquery);
3456 assertf(j == ampargs);
3457
3458 /* Sort 'em all */
3459 qsort(amps, ampargs, sizeof(char *), sortNormFnc);
3460
3461 /* Replace query by sorted query */
3462 copyBuff = malloct(qLen + 1);
3463 assertf(copyBuff != NULL);
3464 copyBuff[0] = '\0';
3465 for(i = 0; i < ampargs; i++) {
3466 if (i == 0)
3467 strcatbuff(copyBuff, "?");
3468 else
3469 strcatbuff(copyBuff, "&");
3470 strcatbuff(copyBuff, amps[i] + 1);
3471 }
3472 assertf(strlen(copyBuff) == qLen);
3473 strcpybuff(query, copyBuff);
3474
3475 /* Cleanup */
3476 freet(amps);
3477 freet(copyBuff);
3478 }
3479
3480 return dest;
3481 }
3482
3483 #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
adr_normalized(const char * source,char * dest)3484 HTSEXT_API char *adr_normalized(const char *source, char *dest) {
3485 /* not yet too aggressive (no com<->net<->org checkings) */
3486 strcpybuff(dest, jump_normalized_const(source));
3487 return dest;
3488 }
3489
3490 #undef endwith
3491
3492 // find port (:80) or NULL if not found
3493 // can handle IPV6 addresses
jump_toport_const(const char * source)3494 HTSEXT_API const char *jump_toport_const(const char *source) {
3495 const char *a, *trytofind;
3496
3497 a = jump_identification_const(source);
3498 trytofind = strrchr_limit(a, ']', strchr(source, '/')); // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
3499 a = strchr((trytofind) ? trytofind : a, ':');
3500 return a;
3501 }
3502
DECLARE_NON_CONST_VERSION(jump_toport)3503 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_toport)
3504
3505 // strrchr, but not too far
3506 const char *strrchr_limit(const char *s, char c, const char *limit) {
3507 if (limit == NULL) {
3508 const char *p = strrchr(s, c);
3509
3510 return p ? (p + 1) : NULL;
3511 } else {
3512 const char *a = NULL, *p;
3513
3514 for(;;) {
3515 p = strchr((a) ? a : s, c);
3516 if ((p >= limit) || (p == NULL))
3517 return a;
3518 a = p + 1;
3519 }
3520 }
3521 }
3522
3523 // retourner adr sans ftp://
jump_protocol_const(const char * source)3524 const char *jump_protocol_const(const char *source) {
3525 int p;
3526
3527 // scheme
3528 // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
3529 if ((p = strfield(source, "http:")))
3530 source += p;
3531 else if ((p = strfield(source, "ftp:")))
3532 source += p;
3533 else if ((p = strfield(source, "https:")))
3534 source += p;
3535 else if ((p = strfield(source, "file:")))
3536 source += p;
3537 // net_path
3538 if (strncmp(source, "//", 2) == 0)
3539 source += 2;
3540 return source;
3541 }
3542
DECLARE_NON_CONST_VERSION(jump_protocol)3543 DECLARE_NON_CONST_VERSION(jump_protocol)
3544
3545 // codage base 64 a vers b
3546 void code64(unsigned char *a, int size_a, unsigned char *b, int crlf) {
3547 int i1 = 0, i2 = 0, i3 = 0, i4 = 0;
3548 int loop = 0;
3549 unsigned long int store;
3550 int n;
3551 const char _hts_base64[] =
3552 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3553 while(size_a-- > 0) {
3554 // 24 bits
3555 n = 1;
3556 store = *a++;
3557 if (size_a-- > 0) {
3558 n = 2;
3559 store <<= 8;
3560 store |= *a++;
3561 }
3562 if (size_a-- > 0) {
3563 n = 3;
3564 store <<= 8;
3565 store |= *a++;
3566 }
3567 if (n == 3) {
3568 i4 = store & 63;
3569 i3 = (store >> 6) & 63;
3570 i2 = (store >> 12) & 63;
3571 i1 = (store >> 18) & 63;
3572 } else if (n == 2) {
3573 store <<= 2;
3574 i3 = store & 63;
3575 i2 = (store >> 6) & 63;
3576 i1 = (store >> 12) & 63;
3577 } else {
3578 store <<= 4;
3579 i2 = store & 63;
3580 i1 = (store >> 6) & 63;
3581 }
3582
3583 *b++ = _hts_base64[i1];
3584 *b++ = _hts_base64[i2];
3585 if (n >= 2)
3586 *b++ = _hts_base64[i3];
3587 else
3588 *b++ = '=';
3589 if (n >= 3)
3590 *b++ = _hts_base64[i4];
3591 else
3592 *b++ = '=';
3593
3594 if (crlf && ((loop += 3) % 60) == 0) {
3595 *b++ = '\r';
3596 *b++ = '\n';
3597 }
3598 }
3599 *b++ = '\0';
3600 }
3601
3602 // return the hex character value, or -1 on error.
ehexh(const char c)3603 static HTS_INLINE int ehexh(const char c) {
3604 if (c >= '0' && c <= '9')
3605 return c - '0';
3606 else if (c >= 'a' && c <= 'f')
3607 return (c - 'a' + 10);
3608 else if (c >= 'A' && c <= 'F')
3609 return (c - 'A' + 10);
3610 else
3611 return -1;
3612 }
3613
3614 // return the two-hex character value, or -1 on error.
ehex(const char * s)3615 static HTS_INLINE int ehex(const char *s) {
3616 const int c1 = ehexh(s[0]);
3617 if (c1 >= 0) {
3618 const int c2 = ehexh(s[1]);
3619 if (c2 >= 0) {
3620 return 16*c1 + c2;
3621 }
3622 }
3623 return -1;
3624 }
3625
unescape_amp(char * s)3626 void unescape_amp(char *s) {
3627 if (hts_unescapeEntities(s, s, strlen(s) + 1) != 0) {
3628 assertf(! "error escaping html entities");
3629 }
3630 }
3631
3632 // remplacer %20 par ' ', etc..
3633 // buffer MAX 1Ko
unescape_http(char * const catbuff,const size_t size,const char * const s)3634 HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s) {
3635 size_t i, j;
3636
3637 RUNTIME_TIME_CHECK_SIZE(size);
3638
3639 for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
3640 int h;
3641 if (s[i] == '%' && (h = ehex(&s[i + 1])) >= 0) {
3642 catbuff[j++] = (char) h;
3643 i += 2;
3644 }
3645 else
3646 catbuff[j++] = s[i];
3647 }
3648 catbuff[j++] = '\0';
3649 return catbuff;
3650 }
3651
3652 // unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
3653 // DOES NOT DECODE %25 (part of CHAR_DELIM)
3654 // no_high & 1: decode high chars
3655 // no_high & 2: decode space
unescape_http_unharm(char * const catbuff,const size_t size,const char * s,const int no_high)3656 HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
3657 const char *s, const int no_high) {
3658 size_t i, j;
3659
3660 RUNTIME_TIME_CHECK_SIZE(size);
3661
3662 for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
3663 if (s[i] == '%') {
3664 const int nchar = ehex(&s[i + 1]);
3665
3666 const int test =
3667 ( CHAR_RESERVED(nchar) && nchar != '+' ) /* %2B => + (not in query!) */
3668 || CHAR_DELIM(nchar)
3669 || CHAR_UNWISE(nchar)
3670 || CHAR_LOW(nchar) /* CHAR_SPECIAL */
3671 || ( CHAR_XXAVOID(nchar) && ( nchar != ' ' || ( no_high & 2) == 0 ) )
3672 || ( ( no_high & 1 ) && CHAR_HIG(nchar) )
3673 ;
3674
3675 if (!test && nchar >= 0) { /* can safely unescape */
3676 catbuff[j++] = (char) nchar;
3677 i += 2;
3678 } else {
3679 catbuff[j++] = '%';
3680 }
3681 } else {
3682 catbuff[j++] = s[i];
3683 }
3684 }
3685 catbuff[j++] = '\0';
3686 return catbuff;
3687 }
3688
3689 // remplacer " par %xx etc..
3690 // buffer MAX 1Ko
escape_spc_url(const char * const src,char * const dest,const size_t size)3691 HTSEXT_API size_t escape_spc_url(const char *const src,
3692 char *const dest, const size_t size) {
3693 return x_escape_http(src, dest, size, 2);
3694 }
3695
3696 // smith / john -> smith%20%2f%20john
escape_in_url(const char * const src,char * const dest,const size_t size)3697 HTSEXT_API size_t escape_in_url(const char *const src,
3698 char *const dest, const size_t size) {
3699 return x_escape_http(src, dest, size, 1);
3700 }
3701
3702 // smith / john -> smith%20/%20john
escape_uri(const char * const src,char * const dest,const size_t size)3703 HTSEXT_API size_t escape_uri(const char *const src,
3704 char *const dest, const size_t size) {
3705 return x_escape_http(src, dest, size, 3);
3706 }
3707
escape_uri_utf(const char * const src,char * const dest,const size_t size)3708 HTSEXT_API size_t escape_uri_utf(const char *const src,
3709 char *const dest, const size_t size) {
3710 return x_escape_http(src, dest, size, 30);
3711 }
3712
escape_check_url(const char * const src,char * const dest,const size_t size)3713 HTSEXT_API size_t escape_check_url(const char *const src,
3714 char *const dest, const size_t size) {
3715 return x_escape_http(src, dest, size, 0);
3716 }
3717
3718 // same as escape_check_url, but returns char*
escape_check_url_addr(const char * const src,char * const dest,const size_t size)3719 HTSEXT_API char *escape_check_url_addr(const char *const src,
3720 char *const dest, const size_t size) {
3721 escape_check_url(src, dest, size);
3722 return dest;
3723 }
3724
3725 // Same as above, but appending to "dest"
3726 #undef DECLARE_APPEND_ESCAPE_VERSION
3727 #define DECLARE_APPEND_ESCAPE_VERSION(NAME) \
3728 HTSEXT_API size_t append_ ##NAME(const char *const src, char *const dest, const size_t size) { \
3729 const size_t len = strnlen(dest, size); \
3730 assertf(len < size); \
3731 return NAME(src, dest + len, size - len); \
3732 }
3733
3734 DECLARE_APPEND_ESCAPE_VERSION(escape_in_url)
DECLARE_APPEND_ESCAPE_VERSION(escape_spc_url)3735 DECLARE_APPEND_ESCAPE_VERSION(escape_spc_url)
3736 DECLARE_APPEND_ESCAPE_VERSION(escape_uri_utf)
3737 DECLARE_APPEND_ESCAPE_VERSION(escape_check_url)
3738 DECLARE_APPEND_ESCAPE_VERSION(escape_uri)
3739
3740 #undef DECLARE_APPEND_ESCAPE_VERSION
3741
3742 // Same as above, but in-place
3743 #undef DECLARE_INPLACE_ESCAPE_VERSION
3744 #define DECLARE_INPLACE_ESCAPE_VERSION(NAME) \
3745 HTSEXT_API size_t inplace_ ##NAME(char *const dest, const size_t size) { \
3746 char buffer[256]; \
3747 const size_t len = strnlen(dest, size); \
3748 const int in_buffer = len + 1 < sizeof(buffer); \
3749 char *src = in_buffer ? buffer : malloct(len + 1); \
3750 size_t ret; \
3751 assertf(src != NULL); \
3752 assertf(len < size); \
3753 memcpy(src, dest, len + 1); \
3754 ret = NAME(src, dest, size); \
3755 if (!in_buffer) { \
3756 freet(src); \
3757 } \
3758 return ret; \
3759 }
3760
3761 DECLARE_INPLACE_ESCAPE_VERSION(escape_in_url)
3762 DECLARE_INPLACE_ESCAPE_VERSION(escape_spc_url)
3763 DECLARE_INPLACE_ESCAPE_VERSION(escape_uri_utf)
3764 DECLARE_INPLACE_ESCAPE_VERSION(escape_check_url)
3765 DECLARE_INPLACE_ESCAPE_VERSION(escape_uri)
3766
3767 #undef DECLARE_INPLACE_ESCAPE_VERSION
3768
3769
3770 HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil,
3771 char *const dest, const size_t size) {
3772 char *a;
3773 size_t esc_size = escape_in_url(adr, dest, size);
3774 esc_size += escape_in_url(fil, dest + esc_size, size - esc_size);
3775 RUNTIME_TIME_CHECK_SIZE(size);
3776 for(a = dest ; (a = strchr(a, '%')) != NULL ; a++) {
3777 *a = 'X';
3778 }
3779 return esc_size;
3780 }
3781
3782 // strip all control characters
escape_remove_control(char * const s)3783 HTSEXT_API void escape_remove_control(char *const s) {
3784 size_t i, j;
3785 for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3786 const unsigned char c = (unsigned char) s[i];
3787 if (c >= 32) {
3788 if (i != j) {
3789 assertf(j < i);
3790 s[j] = s[i];
3791 }
3792 j++;
3793 }
3794 }
3795 }
3796
3797 #undef ADD_CHAR
3798 #define ADD_CHAR(C) do { \
3799 assertf(j < size); \
3800 if (j + 1 == size) { \
3801 dest[j] = '\0'; \
3802 return size; \
3803 } \
3804 dest[j++] = (C); \
3805 } while(0)
3806
3807 /* Returns the number of characters written (not taking in account the terminating \0), or 'size' upon overflow. */
x_escape_http(const char * const s,char * const dest,const size_t size,const int mode)3808 HTSEXT_API size_t x_escape_http(const char *const s, char *const dest,
3809 const size_t size, const int mode) {
3810 static const char hex[] = "0123456789abcdef";
3811 size_t i, j;
3812
3813 RUNTIME_TIME_CHECK_SIZE(size);
3814
3815 // Out-of-bound.
3816 // Previous character is supposed to be the terminating \0.
3817 if (size == 0) {
3818 return 0;
3819 }
3820
3821 for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3822 const unsigned char c = (unsigned char) s[i];
3823 int test = 0;
3824
3825 if (mode == 0)
3826 test = c == '"' || c == ' ' || CHAR_SPECIAL(c);
3827 else if (mode == 1)
3828 test = CHAR_RESERVED(c)
3829 || CHAR_DELIM(c)
3830 || CHAR_UNWISE(c)
3831 || CHAR_SPECIAL(c)
3832 || CHAR_XXAVOID(c)
3833 || CHAR_MARK(c);
3834 else if (mode == 2)
3835 test = c == ' '; // n'escaper que espace
3836 else if (mode == 3) // échapper que ce qui est nécessaire
3837 test = CHAR_SPECIAL(c)
3838 || CHAR_XXAVOID(c);
3839 else if (mode == 30) // échapper que ce qui est nécessaire
3840 test = (c != '/' && CHAR_RESERVED(c))
3841 || CHAR_DELIM(c)
3842 || CHAR_UNWISE(c)
3843 || CHAR_SPECIAL(c)
3844 || CHAR_XXAVOID(c);
3845
3846 if (!test) {
3847 ADD_CHAR(c);
3848 } else {
3849 ADD_CHAR('%');
3850 ADD_CHAR(hex[c / 16]);
3851 ADD_CHAR(hex[c % 16]);
3852 }
3853 }
3854
3855 assertf(j < size);
3856 dest[j] = '\0';
3857 return j;
3858 }
3859
escape_for_html_print(const char * const s,char * const dest,const size_t size)3860 HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size) {
3861 size_t i, j;
3862
3863 RUNTIME_TIME_CHECK_SIZE(size);
3864
3865 for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3866 const unsigned char c = (unsigned char) s[i];
3867 if (c == '&') {
3868 ADD_CHAR('&');
3869 ADD_CHAR('a');
3870 ADD_CHAR('m');
3871 ADD_CHAR('p');
3872 ADD_CHAR(';');
3873 } else {
3874 ADD_CHAR(c);
3875 }
3876 }
3877 assertf(j < size);
3878 dest[j] = '\0';
3879 return j;
3880 }
3881
escape_for_html_print_full(const char * const s,char * const dest,const size_t size)3882 HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size) {
3883 static const char hex[] = "0123456789abcdef";
3884 size_t i, j;
3885
3886 RUNTIME_TIME_CHECK_SIZE(size);
3887
3888 for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3889 const unsigned char c = (unsigned char) s[i];
3890 if (c == '&') {
3891 ADD_CHAR('&');
3892 ADD_CHAR('a');
3893 ADD_CHAR('m');
3894 ADD_CHAR('p');
3895 ADD_CHAR(';');
3896 } else if (CHAR_HIG(c)) {
3897 ADD_CHAR('&');
3898 ADD_CHAR('#');
3899 ADD_CHAR('x');
3900 ADD_CHAR(hex[c / 16]);
3901 ADD_CHAR(hex[c % 16]);
3902 ADD_CHAR(';');
3903 } else {
3904 ADD_CHAR(c);
3905 }
3906 }
3907 assertf(j < size);
3908 dest[j] = '\0';
3909 return j;
3910 }
3911
3912 #undef ADD_CHAR
3913
3914 // conversion minuscules, avec buffer
convtolower(char * catbuff,const char * a)3915 char *convtolower(char *catbuff, const char *a) {
3916 strcpybuff(catbuff, a);
3917 hts_lowcase(catbuff); // lower case
3918 return catbuff;
3919 }
3920
3921 // conversion en minuscules
hts_lowcase(char * s)3922 void hts_lowcase(char *s) {
3923 size_t i;
3924
3925 for(i = 0; s[i] != '\0'; i++)
3926 if ((s[i] >= 'A') && (s[i] <= 'Z'))
3927 s[i] += ('a' - 'A');
3928 }
3929
3930 // remplacer un caractère d'une chaîne dans une autre
hts_replace(char * s,char from,char to)3931 void hts_replace(char *s, char from, char to) {
3932 char *a;
3933
3934 while((a = strchr(s, from)) != NULL) {
3935 *a = to;
3936 }
3937 }
3938
3939 // deviner type d'un fichier local..
3940 // ex: fil="toto.gif" -> s="image/gif"
guess_httptype(httrackp * opt,char * s,const char * fil)3941 void guess_httptype(httrackp * opt, char *s, const char *fil) {
3942 get_httptype(opt, s, fil, 1);
3943 }
3944
3945 // idem
3946 // flag: 1 si toujours renvoyer un type
get_httptype(httrackp * opt,char * s,const char * fil,int flag)3947 HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag) {
3948 // userdef overrides get_httptype
3949 if (get_userhttptype(opt, s, fil)) {
3950 return;
3951 }
3952 // regular tests
3953 if (ishtml(opt, fil) == 1) {
3954 strcpybuff(s, "text/html");
3955 } else {
3956 /* Check html -> text/html */
3957 const char *a = fil + strlen(fil) - 1;
3958
3959 while((*a != '.') && (*a != '/') && (a > fil))
3960 a--;
3961 if (*a == '.' && strlen(a) < 32) {
3962 int j = 0;
3963
3964 a++;
3965 while(strnotempty(hts_mime[j][1])) {
3966 if (strfield2(hts_mime[j][1], a)) {
3967 if (hts_mime[j][0][0] != '*') { // Une correspondance existe
3968 strcpybuff(s, hts_mime[j][0]);
3969 return;
3970 }
3971 }
3972 j++;
3973 }
3974
3975 if (flag)
3976 sprintf(s, "application/%s", a);
3977 } else {
3978 if (flag)
3979 strcpybuff(s, "application/octet-stream");
3980 }
3981 }
3982 }
3983
3984 // get type of fil (php)
3985 // s: buffer (text/html) or NULL
3986 // return: 1 if known by user
get_userhttptype(httrackp * opt,char * s,const char * fil)3987 int get_userhttptype(httrackp * opt, char *s, const char *fil) {
3988 if (s != NULL) {
3989 if (s)
3990 s[0] = '\0';
3991 if (fil == NULL || *fil == '\0')
3992 return 0;
3993 #if 1
3994 if (StringLength(opt->mimedefs) > 0) {
3995
3996 /* Check --assume foooo/foo/bar.cgi=text/html, then foo/bar.cgi=text/html, then bar.cgi=text/html */
3997 /* also: --assume baz,bar,foooo/foo/bar.cgi=text/html */
3998 /* start from path beginning */
3999 do {
4000 const char *next;
4001 const char *mimedefs = StringBuff(opt->mimedefs); /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */
4002
4003 while(*mimedefs != '\0') {
4004 const char *segment = fil + 1;
4005
4006 if (*mimedefs == '\n') {
4007 mimedefs++;
4008 }
4009 /* compare current segment with user's definition */
4010 do {
4011 int i;
4012
4013 /* check current item */
4014 for(i = 0; mimedefs[i] != '\0' /* end of all defs */
4015 && mimedefs[i] != ' ' /* next item in left list */
4016 && mimedefs[i] != '=' /* end of left list */
4017 && mimedefs[i] != '\n' /* end of this def (?) */
4018 && mimedefs[i] == segment[i] /* same item */
4019 ; i++) ;
4020 /* success */
4021 if ((mimedefs[i] == '=' || mimedefs[i] == ' ')
4022 && segment[i] == '\0') {
4023 int i2;
4024
4025 while(mimedefs[i] != 0 && mimedefs[i] != '\n'
4026 && mimedefs[i] != '=')
4027 i++;
4028 if (mimedefs[i] == '=') {
4029 i++;
4030 for(i2 = 0;
4031 mimedefs[i + i2] != '\n' && mimedefs[i + i2] != '\0';
4032 i2++) {
4033 s[i2] = mimedefs[i + i2];
4034 }
4035 s[i2] = '\0';
4036 return 1; /* SUCCESS! */
4037 }
4038 }
4039 /* next item in list */
4040 for(mimedefs += i;
4041 *mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '='
4042 && *mimedefs != ' '; mimedefs++) ;
4043 if (*mimedefs == ' ') {
4044 mimedefs++;
4045 }
4046 } while(*mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '=');
4047 /* next user-def */
4048 for(; *mimedefs != '\0' && *mimedefs != '\n'; mimedefs++) ;
4049 }
4050 /* shorten segment */
4051 next = strchr(fil + 1, '/');
4052 if (next == NULL) {
4053 /* ext tests */
4054 next = strchr(fil + 1, '.');
4055 }
4056 fil = next;
4057 } while(fil != NULL);
4058 }
4059 #else
4060 if (*buffer) {
4061 char BIGSTK search[1024];
4062 char *detect;
4063
4064 sprintf(search, "\n%s=", ext); // php=text/html
4065 detect = strstr(*buffer, search);
4066 if (!detect) {
4067 sprintf(search, "\n%s\n", ext); // php\ncgi=text/html
4068 detect = strstr(*buffer, search);
4069 }
4070 if (detect) {
4071 detect = strchr(detect, '=');
4072 if (detect) {
4073 detect++;
4074 if (s) {
4075 char *a;
4076
4077 a = strchr(detect, '\n');
4078 if (a) {
4079 strncatbuff(s, detect, (int) (a - detect));
4080 }
4081 }
4082 return 1;
4083 }
4084 }
4085 }
4086 #endif
4087 }
4088 return 0;
4089 }
4090
4091 // renvoyer extesion d'un type mime..
4092 // ex: "image/gif" -> gif
give_mimext(char * s,const char * st)4093 void give_mimext(char *s, const char *st) {
4094 int ok = 0;
4095 int j = 0;
4096
4097 s[0] = '\0';
4098 while((!ok) && (strnotempty(hts_mime[j][1]))) {
4099 if (strfield2(hts_mime[j][0], st)) {
4100 if (hts_mime[j][1][0] != '*') { // Une correspondance existe
4101 strcpybuff(s, hts_mime[j][1]);
4102 ok = 1;
4103 }
4104 }
4105 j++;
4106 }
4107 // wrap "x" mimetypes, such as:
4108 // application/x-mp3
4109 // or
4110 // application/mp3
4111 if (!ok) {
4112 int p;
4113 const char *a = NULL;
4114
4115 if ((p = strfield(st, "application/x-")))
4116 a = st + p;
4117 else if ((p = strfield(st, "application/")))
4118 a = st + p;
4119 if (a) {
4120 if ((int) strlen(a) >= 1) {
4121 if ((int) strlen(a) <= 4) {
4122 strcpybuff(s, a);
4123 ok = 1;
4124 }
4125 }
4126 }
4127 }
4128 }
4129
4130 // extension connue?..
4131 // 0 : non
4132 // 1 : oui
4133 // 2 : html
is_knowntype(httrackp * opt,const char * fil)4134 HTSEXT_API int is_knowntype(httrackp * opt, const char *fil) {
4135 char catbuff[CATBUFF_SIZE];
4136 const char *ext;
4137 int j = 0;
4138
4139 if (!fil)
4140 return 0;
4141 ext = get_ext(catbuff, sizeof(catbuff), fil);
4142 while(strnotempty(hts_mime[j][1])) {
4143 if (strfield2(hts_mime[j][1], ext)) {
4144 if (is_html_mime_type(hts_mime[j][0]))
4145 return 2;
4146 else
4147 return 1;
4148 }
4149 j++;
4150 }
4151
4152 // Known by user?
4153 return (is_userknowntype(opt, fil));
4154 }
4155
4156 // known type?..
4157 // 0 : no
4158 // 1 : yes
4159 // 2 : html
4160 // setdefs : set mime buffer:
4161 // file=(char*) "asp=text/html\nphp=text/html\n"
is_userknowntype(httrackp * opt,const char * fil)4162 HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil) {
4163 char BIGSTK mime[1024];
4164
4165 if (!fil)
4166 return 0;
4167 if (!strnotempty(fil))
4168 return 0;
4169 mime[0] = '\0';
4170 get_userhttptype(opt, mime, fil);
4171 if (!strnotempty(mime))
4172 return 0;
4173 else if (is_html_mime_type(mime))
4174 return 2;
4175 else
4176 return 1;
4177 }
4178
4179 // page dynamique?
4180 // is_dyntype(get_ext("foo.asp"))
is_dyntype(const char * fil)4181 HTSEXT_API int is_dyntype(const char *fil) {
4182 int j = 0;
4183
4184 if (!fil)
4185 return 0;
4186 if (!strnotempty(fil))
4187 return 0;
4188 while(strnotempty(hts_ext_dynamic[j])) {
4189 if (strfield2(hts_ext_dynamic[j], fil)) {
4190 return 1;
4191 }
4192 j++;
4193 }
4194 return 0;
4195 }
4196
4197 // types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
4198 // connaissent pas le type
may_unknown(httrackp * opt,const char * st)4199 int may_unknown(httrackp * opt, const char *st) {
4200 int j = 0;
4201
4202 // types média
4203 if (may_be_hypertext_mime(opt, st, "")) {
4204 return 1;
4205 }
4206 while(strnotempty(hts_mime_keep[j])) {
4207 if (strfield2(hts_mime_keep[j], st)) { // trouvé
4208 return 1;
4209 }
4210 j++;
4211 }
4212 return 0;
4213 }
4214
4215 /* returns 1 if the mime/filename seems to be bogus because of badly recognized multiple extension
4216 ; such as "application/x-wais-source" for "httrack-3.42-1.el5.src.rpm"
4217 reported by Hippy Dave 08/2008 (3.43) */
may_bogus_multiple(httrackp * opt,const char * mime,const char * filename)4218 int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
4219 int j;
4220
4221 for(j = 0; strnotempty(hts_mime_bogus_multiple[j]); j++) {
4222 if (strfield2(hts_mime_bogus_multiple[j], mime)) { /* found mime type in suspicious list */
4223 char ext[64];
4224
4225 ext[0] = '\0';
4226 give_mimext(ext, mime);
4227 if (ext[0] != 0) { /* we have an extension for that */
4228 const size_t ext_size = strlen(ext);
4229 const char *file = strrchr(filename, '/'); /* fetch terminal filename */
4230
4231 if (file != NULL) {
4232 int i;
4233
4234 for(i = 0; file[i] != 0; i++) {
4235 if (i > 0 && file[i - 1] == '.'
4236 && strncasecmp(&file[i], ext, ext_size) == 0
4237 && (file[i + ext_size] == 0 || file[i + ext_size] == '.'
4238 || file[i + ext_size] == '?')) {
4239 return 1; /* is ambiguous */
4240 }
4241 }
4242 }
4243 }
4244 return 0;
4245 }
4246 }
4247 return 0;
4248 }
4249
4250 /* filename extension should not be changed because potentially bogus ; replaces may_unknown() (3.43) */
may_unknown2(httrackp * opt,const char * mime,const char * filename)4251 int may_unknown2(httrackp * opt, const char *mime, const char *filename) {
4252 int ret = may_unknown(opt, mime);
4253
4254 if (ret == 0) {
4255 ret = may_bogus_multiple(opt, mime, filename);
4256 }
4257 return ret;
4258 }
4259
4260 // -- Utils fichiers
4261
4262 // pretty print for i/o
fprintfio(FILE * fp,const char * buff,const char * prefix)4263 void fprintfio(FILE * fp, const char *buff, const char *prefix) {
4264 char nl = 1;
4265
4266 while(*buff) {
4267 switch (*buff) {
4268 case 13:
4269 break;
4270 case 10:
4271 fprintf(fp, "\r\n");
4272 nl = 1;
4273 break;
4274 default:
4275 if (nl)
4276 fprintf(fp, "%s", prefix);
4277 nl = 0;
4278 fputc(*buff, fp);
4279 }
4280 buff++;
4281 }
4282 }
4283
4284 /* Le fichier existe-t-il? (ou est-il accessible?) */
4285 /* Note: NOT utf-8 */
4286 /* Note: preserve errno */
fexist(const char * s)4287 int fexist(const char *s) {
4288 char catbuff[CATBUFF_SIZE];
4289 const int err = errno;
4290 struct stat st;
4291
4292 memset(&st, 0, sizeof(st));
4293 if (stat(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
4294 if (S_ISREG(st.st_mode)) {
4295 return 1;
4296 } else {
4297 return 0;
4298 }
4299 }
4300 errno = err;
4301 return 0;
4302 }
4303
4304 /* Le fichier existe-t-il? (ou est-il accessible?) */
4305 /* Note: utf-8 */
4306 /* Note: preserve errno */
fexist_utf8(const char * s)4307 int fexist_utf8(const char *s) {
4308 char catbuff[CATBUFF_SIZE];
4309 const int err = errno;
4310 STRUCT_STAT st;
4311
4312 memset(&st, 0, sizeof(st));
4313 if (STAT(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
4314 if (S_ISREG(st.st_mode)) {
4315 return 1;
4316 } else {
4317 return 0;
4318 }
4319 }
4320 errno = err;
4321 return 0;
4322 }
4323
4324 /* Taille d'un fichier, -1 si n'existe pas */
4325 /* Note: NOT utf-8 */
fsize(const char * s)4326 off_t fsize(const char *s) {
4327 struct stat st;
4328
4329 if (!strnotempty(s)) // nom vide: erreur
4330 return -1;
4331 if (stat(s, &st) == 0 && S_ISREG(st.st_mode)) {
4332 return st.st_size;
4333 } else {
4334 return -1;
4335 }
4336 }
4337
4338 /* Taille d'un fichier, -1 si n'existe pas */
4339 /* Note: utf-8 */
fsize_utf8(const char * s)4340 off_t fsize_utf8(const char *s) {
4341 STRUCT_STAT st;
4342
4343 if (!strnotempty(s)) // nom vide: erreur
4344 return -1;
4345 if (STAT(s, &st) == 0 && S_ISREG(st.st_mode)) {
4346 return st.st_size;
4347 } else {
4348 return -1;
4349 }
4350 }
4351
fpsize(FILE * fp)4352 off_t fpsize(FILE * fp) {
4353 off_t oldpos, size;
4354
4355 if (!fp)
4356 return -1;
4357 #ifdef HTS_FSEEKO
4358 oldpos = ftello(fp);
4359 #else
4360 oldpos = ftell(fp);
4361 #endif
4362 fseek(fp, 0, SEEK_END);
4363 #ifdef HTS_FSEEKO
4364 size = ftello(fp);
4365 fseeko(fp, oldpos, SEEK_SET);
4366 #else
4367 size = ftell(fp);
4368 fseek(fp, oldpos, SEEK_SET);
4369 #endif
4370 return size;
4371 }
4372
4373 /* root dir, with ending / */
4374 typedef struct {
4375 char path[1024 + 4];
4376 int init;
4377 } hts_rootdir_strc;
hts_rootdir(char * file)4378 HTSEXT_API const char *hts_rootdir(char *file) {
4379 static hts_rootdir_strc strc = { "", 0 };
4380 if (file) {
4381 if (!strc.init) {
4382 strc.path[0] = '\0';
4383 strc.init = 1;
4384 if (strnotempty(file)) {
4385 const size_t file_len = strlen(file);
4386 char *a;
4387
4388 assertf(file_len < sizeof(strc.path));
4389 strcpybuff(strc.path, file);
4390 while((a = strrchr(strc.path, '\\')))
4391 *a = '/';
4392 if ((a = strrchr(strc.path, '/'))) {
4393 *(a + 1) = '\0';
4394 } else
4395 strc.path[0] = '\0';
4396 }
4397 if (!strnotempty(strc.path)) {
4398 if (getcwd(strc.path, sizeof(strc.path)) == NULL)
4399 strc.path[0] = '\0';
4400 else
4401 strcatbuff(strc.path, "/");
4402 }
4403 }
4404 return NULL;
4405 } else if (strc.init)
4406 return strc.path;
4407 else
4408 return "";
4409 }
4410
4411 HTSEXT_API hts_stat_struct HTS_STAT;
4412
4413 //
4414 // return number of downloadable bytes, depending on rate limiter
4415 // see engine_stats() routine, too
4416 // this routine works quite well for big files and regular ones, but apparently the rate limiter has
4417 // some problems with very small files (rate too high)
check_downloadable_bytes(int rate)4418 LLint check_downloadable_bytes(int rate) {
4419 if (rate > 0) {
4420 TStamp time_now;
4421 TStamp elapsed_useconds;
4422 LLint bytes_transferred_during_period;
4423 LLint left;
4424
4425 // get the older timer
4426 int id_timer = (HTS_STAT.istat_idlasttimer + 1) % 2;
4427
4428 time_now = mtime_local();
4429 elapsed_useconds = time_now - HTS_STAT.istat_timestart[id_timer];
4430 // NO totally stupid - elapsed_useconds+=1000; // for the next second, too
4431 bytes_transferred_during_period =
4432 (HTS_STAT.HTS_TOTAL_RECV - HTS_STAT.istat_bytes[id_timer]);
4433
4434 left = ((rate * elapsed_useconds) / 1000) - bytes_transferred_during_period;
4435 if (left <= 0)
4436 left = 0;
4437
4438 return left;
4439 } else
4440 return TAILLE_BUFFER;
4441 }
4442
4443 //
4444 // 0 : OK
4445 // 1 : slow down
4446 #if 0
4447 int HTS_TOTAL_RECV_CHECK(int var) {
4448 if (HTS_STAT.HTS_TOTAL_RECV_STATE)
4449 return 1;
4450 /*
4451 {
4452 if (HTS_STAT.HTS_TOTAL_RECV_STATE==3) {
4453 var = min(var,32);
4454 Sleep(250);
4455 } else if (HTS_STAT.HTS_TOTAL_RECV_STATE==2) {
4456 var = min(var,256);
4457 Sleep(100);
4458 } else {
4459 var/=2;
4460 if (var<=0) var=1;
4461 Sleep(50);
4462 }
4463 }
4464 */
4465 return 0;
4466 }
4467 #endif
4468
4469 // Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
4470 // returns:
4471 // >0 : data received
4472 // == 0 : not yet data
4473 // <0: error or no data: READ_ERROR, READ_EOF or READ_TIMEOUT
hts_read(htsblk * r,char * buff,int size)4474 int hts_read(htsblk * r, char *buff, int size) {
4475 int retour;
4476
4477 // return read(soc,buff,size);
4478 if (r->is_file) {
4479 #if HTS_WIDE_DEBUG
4480 DEBUG_W("read(%p, %d, %d)\n" _(void *)buff _(int) size _(int) r->fp);
4481 #endif
4482 if (r->fp) {
4483 retour = (int) fread(buff, 1, size, r->fp);
4484 if (retour == 0) // can happen with directories (!)
4485 retour = READ_ERROR;
4486 } else
4487 retour = READ_ERROR;
4488 } else {
4489 #if HTS_WIDE_DEBUG
4490 DEBUG_W("recv(%d, %p, %d)\n" _(int) r->soc _(void *)buff _(int) size);
4491
4492 if (r->soc == INVALID_SOCKET)
4493 printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
4494 #endif
4495 //HTS_TOTAL_RECV_CHECK(size); // Diminuer au besoin si trop de données reçues
4496 #if HTS_USEOPENSSL
4497 if (r->ssl) {
4498 retour = SSL_read(r->ssl_con, buff, size);
4499 if (retour <= 0) {
4500 int err_code = SSL_get_error(r->ssl_con, retour);
4501
4502 if ((err_code == SSL_ERROR_WANT_READ)
4503 || (err_code == SSL_ERROR_WANT_WRITE)
4504 ) {
4505 retour = 0; /* no data yet (ssl cache) */
4506 } else if (err_code == SSL_ERROR_ZERO_RETURN) {
4507 retour = READ_EOF; /* completed */
4508 } else {
4509 retour = READ_ERROR; /* eof or error */
4510 }
4511 }
4512 } else {
4513 #endif
4514 retour = recv(r->soc, buff, size, 0);
4515 if (retour == 0) {
4516 retour = READ_EOF;
4517 } else if (retour < 0) {
4518 retour = READ_ERROR;
4519 }
4520 }
4521 if (retour > 0) // compter flux entrant
4522 HTS_STAT.HTS_TOTAL_RECV += retour;
4523 #if HTS_USEOPENSSL
4524 }
4525 #endif
4526 #if HTS_WIDE_DEBUG
4527 DEBUG_W("recv/read done (%d bytes)\n" _(int) retour);
4528 #endif
4529 return retour;
4530 }
4531
4532 // -- Gestion cache DNS --
4533 // 'RX98
4534
4535 // 'capsule' contenant uniquement le cache
hts_cache(httrackp * opt)4536 t_dnscache *hts_cache(httrackp * opt) {
4537 assertf(opt != NULL);
4538 if (opt->state.dns_cache == NULL) {
4539 opt->state.dns_cache = (t_dnscache *) malloct(sizeof(t_dnscache));
4540 memset(opt->state.dns_cache, 0, sizeof(t_dnscache));
4541 }
4542 assertf(opt->state.dns_cache != NULL);
4543 /* first entry is NULL */
4544 assertf(opt->state.dns_cache->iadr == NULL);
4545 return opt->state.dns_cache;
4546 }
4547
4548 // Free DNS cache.
hts_cache_free(t_dnscache * const root)4549 void hts_cache_free(t_dnscache *const root) {
4550 if (root != NULL) {
4551 t_dnscache *cache;
4552 for(cache = root; cache != NULL; ) {
4553 t_dnscache *const next = cache->next;
4554 cache->next = NULL;
4555 freet(cache);
4556 cache = next;
4557 }
4558 }
4559 }
4560
4561 // lock le cache dns pour tout opération d'ajout
4562 // plus prudent quand plusieurs threads peuvent écrire dedans..
4563 // -1: status? 0: libérer 1:locker
4564
4565 // MUST BE LOCKED
4566 // routine pour le cache - retour optionnel à donner à chaque fois
4567 // NULL: nom non encore testé dans le cache
4568 // si h_length==0 alors le nom n'existe pas dans le dns
hts_ghbn(const t_dnscache * cache,const char * const iadr,SOCaddr * const addr)4569 static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCaddr *const addr) {
4570 assertf(addr != NULL);
4571 assertf(iadr != NULL);
4572 if (*iadr == '\0') {
4573 return NULL;
4574 }
4575 /* first entry is empty */
4576 if (cache->iadr == NULL) {
4577 cache = cache->next;
4578 }
4579 for(; cache != NULL; cache = cache->next) {
4580 assertf(cache != NULL);
4581 assertf(cache->iadr != NULL);
4582 assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
4583 if (strcmp(cache->iadr, iadr) == 0) { // ok trouvé
4584 if (cache->host_length != 0) { // entrée valide
4585 assertf(cache->host_length <= sizeof(cache->host_addr));
4586 SOCaddr_copyaddr2(*addr, cache->host_addr, cache->host_length);
4587 return addr;
4588 } else { // erreur dans le dns, déja vérifié
4589 SOCaddr_clear(*addr);
4590 return addr;
4591 }
4592 }
4593 }
4594 return NULL;
4595 }
4596
hts_dns_resolve_nocache2_(const char * const hostname,SOCaddr * const addr,const char ** error)4597 static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
4598 SOCaddr *const addr,
4599 const char **error) {
4600 {
4601 #if HTS_INET6==0
4602 /* IPv4 resolver */
4603 struct hostent *const hp = gethostbyname(hostname);
4604
4605 if (hp != NULL) {
4606 SOCaddr_copyaddr2(addr, hp->h_addr_list[0], hp->h_length);
4607 return SOCaddr_is_valid(addr) ? &addr : NULL;
4608 } else {
4609 SOCaddr_clear(*addr);
4610 }
4611 #else
4612 /* IPv6 resolver */
4613 struct addrinfo *res = NULL;
4614 struct addrinfo hints;
4615 int gerr;
4616
4617 SOCaddr_clear(*addr);
4618 memset(&hints, 0, sizeof(hints));
4619 if (IPV6_resolver == 1) // V4 only (for bogus V6 entries)
4620 hints.ai_family = PF_INET;
4621 else if (IPV6_resolver == 2) // V6 only (for testing V6 only)
4622 hints.ai_family = PF_INET6;
4623 else // V4 + V6
4624 hints.ai_family = PF_UNSPEC;
4625 hints.ai_socktype = SOCK_STREAM;
4626 hints.ai_protocol = IPPROTO_TCP;
4627 if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
4628 if (res != NULL) {
4629 if (res->ai_addr != NULL && res->ai_addrlen != 0) {
4630 SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
4631 }
4632 }
4633 } else {
4634 if (error != NULL) {
4635 *error = gai_strerror(gerr);
4636 }
4637 }
4638 if (res) {
4639 freeaddrinfo(res);
4640 }
4641 #endif
4642 }
4643
4644 return SOCaddr_is_valid(*addr) ? addr : NULL;
4645 }
4646
hts_dns_resolve_nocache2(const char * const hostname,SOCaddr * const addr,const char ** error)4647 HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
4648 SOCaddr *const addr, const char **error) {
4649 /* Protection */
4650 if (!strnotempty(hostname)) {
4651 return NULL;
4652 }
4653
4654 /*
4655 Strip [] if any : [3ffe:b80:1234:1::1]
4656 The resolver doesn't seem to handle IP6 addresses in brackets
4657 */
4658 if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
4659 SOCaddr *ret;
4660 size_t size = strlen(hostname);
4661 char *copy = malloct(size + 1);
4662 assertf(copy != NULL);
4663 copy[0] = '\0';
4664 strncat(copy, hostname + 1, size - 2);
4665 ret = hts_dns_resolve_nocache2_(copy, addr, error);
4666 freet(copy);
4667 return ret;
4668 } else {
4669 return hts_dns_resolve_nocache2_(hostname, addr, error);
4670 }
4671 }
4672
hts_dns_resolve_nocache(const char * const hostname,SOCaddr * const addr)4673 HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
4674 return hts_dns_resolve_nocache2(hostname, addr, NULL);
4675 }
4676
check_hostname_dns(const char * const hostname)4677 HTSEXT_API int check_hostname_dns(const char *const hostname) {
4678 SOCaddr buffer;
4679 return hts_dns_resolve_nocache(hostname, &buffer) != NULL;
4680 }
4681
4682 // Needs locking
4683 // cache dns interne à HTS // ** FREE A FAIRE sur la chaine
hts_dns_resolve_(httrackp * opt,const char * _iadr,SOCaddr * const addr,const char ** error)4684 static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
4685 SOCaddr *const addr, const char **error) {
4686 char BIGSTK iadr[HTS_URLMAXSIZE * 2];
4687 t_dnscache *cache = hts_cache(opt); // adresse du cache
4688 SOCaddr *sa;
4689
4690 assertf(opt != NULL);
4691 assertf(_iadr != NULL);
4692 assertf(addr != NULL);
4693
4694 strcpybuff(iadr, jump_identification_const(_iadr));
4695 // couper éventuel :
4696 {
4697 char *a;
4698
4699 if ((a = jump_toport(iadr)))
4700 *a = '\0';
4701 }
4702
4703 /* get IP from the dns cache */
4704 sa = hts_ghbn(cache, iadr, addr);
4705 if (sa != NULL) {
4706 return SOCaddr_is_valid(*sa) ? sa : NULL;
4707 } else { // non présent dans le cache dns, tester
4708 const size_t iadr_len = strlen(iadr) + 1;
4709
4710 // find queue
4711 for(; cache->next != NULL; cache = cache->next) ;
4712
4713 #if DEBUGDNS
4714 printf("resolving (not cached) %s\n", iadr);
4715 #endif
4716
4717 sa = hts_dns_resolve_nocache2(iadr, addr, error); // calculer IP host
4718
4719 #if HTS_WIDE_DEBUG
4720 DEBUG_W("gethostbyname done\n");
4721 #endif
4722
4723 /* attempt to store new entry */
4724 cache->next = malloct(sizeof(t_dnscache) + iadr_len);
4725 if (cache->next != NULL) {
4726 t_dnscache *const next = cache->next;
4727 char *const block = (char*) cache->next;
4728 char *const str = block + sizeof(t_dnscache);
4729 memcpy(str, iadr, iadr_len);
4730 next->iadr = str;
4731 if (sa != NULL) {
4732 next->host_length = SOCaddr_size(*sa);
4733 assertf(next->host_length <= sizeof(next->host_addr));
4734 memcpy(next->host_addr, &SOCaddr_sockaddr(*sa), next->host_length);
4735 } else {
4736 next->host_length = 0; // non existant dans le dns
4737 }
4738 next->next = NULL;
4739 return sa;
4740 }
4741
4742 /* return result if any */
4743 return sa;
4744 } // retour hp du cache
4745 }
4746
hts_dns_resolve2(httrackp * opt,const char * _iadr,SOCaddr * const addr,const char ** error)4747 SOCaddr* hts_dns_resolve2(httrackp * opt, const char *_iadr, SOCaddr *const addr, const char **error) {
4748 SOCaddr *ret;
4749 hts_mutexlock(&opt->state.lock);
4750 ret = hts_dns_resolve_(opt, _iadr, addr, error);
4751 hts_mutexrelease(&opt->state.lock);
4752 return ret;
4753 }
4754
hts_dns_resolve(httrackp * opt,const char * _iadr,SOCaddr * const addr)4755 SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {
4756 return hts_dns_resolve2(opt, _iadr, addr, NULL);
4757 }
4758
4759 // --- Tracage des mallocs() ---
4760 #ifdef HTS_TRACE_MALLOC
4761 //#define htsLocker(A, N) htsLocker(A, N)
4762 #define htsLocker(A, N) do {} while(0)
4763 static mlink trmalloc = { NULL, 0, 0, NULL };
4764
4765 static int trmalloc_id = 0;
4766 static htsmutex *mallocMutex = NULL;
hts_meminit(void)4767 static void hts_meminit(void) {
4768 //if (mallocMutex == NULL) {
4769 // mallocMutex = calloc(sizeof(*mallocMutex), 1);
4770 // htsLocker(mallocMutex, -999);
4771 //}
4772 }
hts_malloc(size_t len)4773 void *hts_malloc(size_t len) {
4774 void *adr;
4775
4776 hts_meminit();
4777 htsLocker(mallocMutex, 1);
4778 assertf(len > 0);
4779 adr = hts_xmalloc(len, 0);
4780 htsLocker(mallocMutex, 0);
4781 return adr;
4782 }
hts_calloc(size_t len,size_t len2)4783 void *hts_calloc(size_t len, size_t len2) {
4784 void *adr;
4785
4786 hts_meminit();
4787 assertf(len > 0);
4788 assertf(len2 > 0);
4789 htsLocker(mallocMutex, 1);
4790 adr = hts_xmalloc(len, len2);
4791 htsLocker(mallocMutex, 0);
4792 memset(adr, 0, len * len2);
4793 return adr;
4794 }
hts_strdup(char * str)4795 void *hts_strdup(char *str) {
4796 size_t size = str ? strlen(str) : 0;
4797 char *adr = (char *) hts_malloc(size + 1);
4798
4799 assertf(adr != NULL);
4800 strcpy(adr, str ? str : "");
4801 return adr;
4802 }
hts_xmalloc(size_t len,size_t len2)4803 void *hts_xmalloc(size_t len, size_t len2) {
4804 mlink *lnk = (mlink *) calloc(1, sizeof(mlink));
4805
4806 assertf(lnk != NULL);
4807 assertf(len > 0);
4808 assertf(len2 >= 0);
4809 if (lnk) {
4810 void *r = NULL;
4811 int size, bsize = sizeof(t_htsboundary);
4812
4813 if (len2)
4814 size = len * len2;
4815 else
4816 size = len;
4817 size += ((bsize - (size % bsize)) % bsize); /* check alignement */
4818 r = malloc(size + bsize * 2);
4819 assertf(r != NULL);
4820 if (r) {
4821 *((t_htsboundary *) ((char *) r))
4822 = *((t_htsboundary *) ((char *) r + size + bsize))
4823 = htsboundary;
4824 ((char *) r) += bsize; /* boundary */
4825 lnk->adr = r;
4826 lnk->len = size;
4827 lnk->id = trmalloc_id++;
4828 lnk->next = trmalloc.next;
4829 trmalloc.next = lnk;
4830 return r;
4831 } else {
4832 free(lnk);
4833 }
4834 }
4835 return NULL;
4836 }
hts_free(void * adr)4837 void hts_free(void *adr) {
4838 mlink *lnk = &trmalloc;
4839 int bsize = sizeof(t_htsboundary);
4840
4841 assertf(adr != NULL);
4842 if (!adr) {
4843 return;
4844 }
4845 htsLocker(mallocMutex, 1);
4846 while(lnk->next != NULL) {
4847 if (lnk->next->adr == adr) {
4848 mlink *blk_free = lnk->next;
4849
4850 assertf(blk_free->id != -1);
4851 assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
4852 assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
4853 htsboundary);
4854 lnk->next = lnk->next->next;
4855 free((void *) blk_free);
4856 //blk_free->id=-1;
4857 free((char *) adr - bsize);
4858 htsLocker(mallocMutex, 0);
4859 return;
4860 }
4861 lnk = lnk->next;
4862 assertf(lnk->next != NULL);
4863 }
4864 free(adr);
4865 htsLocker(mallocMutex, 0);
4866 }
hts_realloc(void * adr,size_t len)4867 void *hts_realloc(void *adr, size_t len) {
4868 int bsize = sizeof(t_htsboundary);
4869
4870 len += ((bsize - (len % bsize)) % bsize); /* check alignement */
4871 if (adr != NULL) {
4872 mlink *lnk = &trmalloc;
4873
4874 htsLocker(mallocMutex, 1);
4875 while(lnk->next != NULL) {
4876 if (lnk->next->adr == adr) {
4877 {
4878 mlink *blk_free = lnk->next;
4879
4880 assertf(blk_free->id != -1);
4881 assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
4882 assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
4883 htsboundary);
4884 }
4885 adr = realloc((char *) adr - bsize, len + bsize * 2);
4886 assertf(adr != NULL);
4887 lnk->next->adr = (char *) adr + bsize;
4888 lnk->next->len = len;
4889 *((t_htsboundary *) ((char *) adr))
4890 = *((t_htsboundary *) ((char *) adr + len + bsize))
4891 = htsboundary;
4892 htsLocker(mallocMutex, 0);
4893 return (char *) adr + bsize;
4894 }
4895 lnk = lnk->next;
4896 assertf(lnk->next != NULL);
4897 }
4898 htsLocker(mallocMutex, 0);
4899 }
4900 return hts_malloc(len);
4901 }
hts_find(char * adr)4902 mlink *hts_find(char *adr) {
4903 char *stkframe = (char *) &stkframe;
4904 mlink *lnk = &trmalloc;
4905 int bsize = sizeof(t_htsboundary);
4906
4907 assertf(adr != NULL);
4908 if (!adr) {
4909 return NULL;
4910 }
4911 htsLocker(mallocMutex, 1);
4912 while(lnk->next != NULL) {
4913 if (adr >= lnk->next->adr && adr <= lnk->next->adr + lnk->next->len) { /* found */
4914 htsLocker(mallocMutex, 0);
4915 return lnk->next;
4916 }
4917 lnk = lnk->next;
4918 }
4919 htsLocker(mallocMutex, 0);
4920 {
4921 int depl = (int) (adr - stkframe);
4922
4923 if (depl < 0)
4924 depl = -depl;
4925 //assertf(depl < 512000); /* near the stack frame.. doesn't look like malloc but stack variable */
4926 return NULL;
4927 }
4928 }
4929
4930 // check the malloct() and calloct() trace stack
hts_freeall(void)4931 void hts_freeall(void) {
4932 int bsize = sizeof(t_htsboundary);
4933
4934 while(trmalloc.next) {
4935 #if MEMDEBUG
4936 printf("* block %d\t not released: at %d\t (%d\t bytes)\n",
4937 trmalloc.next->id, trmalloc.next->adr, trmalloc.next->len);
4938 #endif
4939 if (trmalloc.next->id != -1) {
4940 free((char *) trmalloc.next->adr - bsize);
4941 }
4942 }
4943 }
4944 #endif
4945
4946 // -- divers //
4947
4948 // cut path and project name
4949 // patch also initial path
cut_path(char * fullpath,char * path,char * pname)4950 void cut_path(char *fullpath, char *path, char *pname) {
4951 path[0] = pname[0] = '\0';
4952 if (strnotempty(fullpath)) {
4953 if ((fullpath[strlen(fullpath) - 1] == '/')
4954 || (fullpath[strlen(fullpath) - 1] == '\\'))
4955 fullpath[strlen(fullpath) - 1] = '\0';
4956 if (strlen(fullpath) > 1) {
4957 char *a;
4958
4959 while((a = strchr(fullpath, '\\')))
4960 *a = '/'; // remplacer par /
4961 a = fullpath + strlen(fullpath) - 2;
4962 while((*a != '/') && (a > fullpath))
4963 a--;
4964 if (*a == '/')
4965 a++;
4966 strcpybuff(pname, a);
4967 strncatbuff(path, fullpath, (int) (a - fullpath));
4968 }
4969 }
4970 }
4971
4972 // -- Gestion protocole ftp --
4973
4974 #ifdef _WIN32
ftp_available(void)4975 int ftp_available(void) {
4976 return 1;
4977 }
4978 #else
ftp_available(void)4979 int ftp_available(void) {
4980 return 1; // ok!
4981 //return 0; // SOUS UNIX, PROBLEMESs
4982 }
4983 #endif
4984
4985 static void hts_debug_log_print(const char *format, ...);
4986
4987 static int hts_dgb_init = 0;
4988 static FILE *hts_dgb_init_fp = NULL;
hts_debug(int level)4989 HTSEXT_API void hts_debug(int level) {
4990 hts_dgb_init = level;
4991 if (hts_dgb_init > 0) {
4992 hts_debug_log_print("hts_debug() called");
4993 }
4994 }
4995
hts_dgb_(void)4996 static FILE *hts_dgb_(void) {
4997 if (hts_dgb_init_fp == NULL) {
4998 if ((hts_dgb_init & 0x80) == 0) {
4999 hts_dgb_init_fp = stderr;
5000 } else {
5001 hts_dgb_init_fp = FOPEN("hts-debug.txt", "wb");
5002 if (hts_dgb_init_fp != NULL) {
5003 fprintf(hts_dgb_init_fp, "* Creating file\r\n");
5004 }
5005 }
5006 }
5007 return hts_dgb_init_fp;
5008 }
5009
hts_debug_log_print(const char * format,...)5010 static void hts_debug_log_print(const char *format, ...) {
5011 if (hts_dgb_init > 0) {
5012 const int error = errno;
5013 FILE *const fp = hts_dgb_();
5014 va_list args;
5015
5016 assertf(format != NULL);
5017 va_start(args, format);
5018 (void) vfprintf(fp, format, args);
5019 va_end(args);
5020 fputs("\n", fp);
5021 fflush(fp);
5022 errno = error;
5023 }
5024 }
5025
hts_version(void)5026 HTSEXT_API const char* hts_version(void) {
5027 return HTTRACK_VERSIONID;
5028 }
5029
ssl_vulnerable(const char * version)5030 static int ssl_vulnerable(const char *version) {
5031 #ifdef _WIN32
5032 static const char *const match = "OpenSSL 1.0.1";
5033 const size_t match_len = strlen(match);
5034 if (version != NULL && strncmp(version, match, match_len) == 0) {
5035 // CVE-2014-0160
5036 // "OpenSSL 1.0.1g 7 Apr 2014"
5037 const char minor = version[match_len];
5038 return minor == ' ' || ( minor >= 'a' && minor <= 'f' );
5039 }
5040 #endif
5041 return 0;
5042 }
5043
5044 /* user abort callback */
5045 htsErrorCallback htsCallbackErr = NULL;
5046
hts_set_error_callback(htsErrorCallback handler)5047 HTSEXT_API void hts_set_error_callback(htsErrorCallback handler) {
5048 htsCallbackErr = handler;
5049 }
5050
hts_get_error_callback(void)5051 HTSEXT_API htsErrorCallback hts_get_error_callback(void) {
5052 return htsCallbackErr;
5053 }
5054
default_coucal_asserthandler(void * arg,const char * exp,const char * file,int line)5055 static void default_coucal_asserthandler(void *arg, const char* exp, const char* file, int line) {
5056 abortf_(exp, file, line);
5057 }
5058
get_loglevel_from_coucal(coucal_loglevel level)5059 static int get_loglevel_from_coucal(coucal_loglevel level) {
5060 switch(level) {
5061 case coucal_log_critical:
5062 return LOG_PANIC;
5063 break;
5064 case coucal_log_warning:
5065 return LOG_WARNING;
5066 break;
5067 case coucal_log_info:
5068 return LOG_INFO;
5069 break;
5070 case coucal_log_debug:
5071 return LOG_DEBUG;
5072 break;
5073 case coucal_log_trace:
5074 return LOG_TRACE;
5075 break;
5076 default:
5077 return LOG_ERROR;
5078 break;
5079 }
5080 }
5081
5082 /* log to default console */
default_coucal_loghandler(void * arg,coucal_loglevel level,const char * format,va_list args)5083 static void default_coucal_loghandler(void *arg, coucal_loglevel level,
5084 const char* format, va_list args) {
5085
5086 if (level <= coucal_log_warning) {
5087 fprintf(stderr, "** warning: ");
5088 }
5089 vfprintf(stderr, format, args);
5090 fprintf(stderr, "\n");
5091 }
5092
5093 /* log to project log */
htsopt_coucal_loghandler(void * arg,coucal_loglevel level,const char * format,va_list args)5094 static void htsopt_coucal_loghandler(void *arg, coucal_loglevel level,
5095 const char* format, va_list args) {
5096 httrackp *const opt = (httrackp*) arg;
5097 if (opt != NULL && opt->log != NULL) {
5098 hts_log_vprint(opt, get_loglevel_from_coucal(level),
5099 format, args);
5100 } else {
5101 default_coucal_loghandler(NULL, level, format, args);
5102 }
5103 }
5104
5105 /* attach hashtable logger to project log */
hts_set_hash_handler(coucal hashtable,httrackp * opt)5106 void hts_set_hash_handler(coucal hashtable, httrackp *opt) {
5107 /* Init hashtable default assertion handler. */
5108 coucal_set_assert_handler(hashtable,
5109 htsopt_coucal_loghandler,
5110 default_coucal_asserthandler,
5111 opt);
5112 }
5113
5114 static int hts_init_ok = 0;
hts_init(void)5115 HTSEXT_API int hts_init(void) {
5116 const char *dbg_env;
5117
5118 /* */
5119 if (hts_init_ok)
5120 return 1;
5121 hts_init_ok = 1;
5122
5123 /* enable debugging ? */
5124 dbg_env = getenv("HTS_LOG");
5125 if (dbg_env != NULL && *dbg_env != 0) {
5126 int level = 0;
5127
5128 if (sscanf(dbg_env, "%d", &level) == 1) {
5129 hts_debug(level);
5130 }
5131 }
5132
5133 hts_debug_log_print("entering hts_init()"); /* debug */
5134
5135 /* Init hashtable default assertion handler. */
5136 coucal_set_global_assert_handler(default_coucal_loghandler,
5137 default_coucal_asserthandler);
5138
5139 /* Init threads (lazy init) */
5140 htsthread_init();
5141
5142 /* Ensure external modules are loaded */
5143 hts_debug_log_print("calling htspe_init()"); /* debug */
5144 htspe_init(); /* module load (lazy) */
5145
5146 /* MD5 Auto-test */
5147 {
5148 char digest[32 + 2];
5149 const char *atest = "MD5 Checksum Autotest";
5150
5151 digest[0] = '\0';
5152 domd5mem(atest, strlen(atest), digest, 1); /* a42ec44369da07ace5ec1d660ba4a69a */
5153 if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) {
5154 int fatal_broken_md5 = 0;
5155
5156 assertf(fatal_broken_md5);
5157 }
5158 }
5159
5160 hts_debug_log_print("initializing SSL"); /* debug */
5161 #if HTS_USEOPENSSL
5162 /*
5163 Initialize the OpensSSL library
5164 */
5165 if (!openssl_ctx) {
5166 const char *version;
5167
5168 SSL_load_error_strings();
5169 SSL_library_init();
5170
5171 // Check CVE-2014-0160.
5172 version = SSLeay_version(SSLEAY_VERSION);
5173 if (ssl_vulnerable(version)) {
5174 fprintf(stderr,
5175 "SSLeay_version(SSLEAY_VERSION) == '%s'\n", version);
5176 abortLog("unable to initialize TLS: OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)");
5177 assertf("OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)" == NULL);
5178 }
5179
5180 // OpenSSL_add_all_algorithms();
5181 openssl_ctx = SSL_CTX_new(SSLv23_client_method());
5182 if (!openssl_ctx) {
5183 fprintf(stderr,
5184 "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n");
5185 abortLog("unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)");
5186 assertf("unable to initialize TLS" == NULL);
5187 }
5188 }
5189 #endif
5190
5191 hts_debug_log_print("ending hts_init()"); /* debug */
5192 return 1;
5193 }
5194
5195 /* will not free thread env. */
hts_uninit(void)5196 HTSEXT_API int hts_uninit(void) {
5197 /* hts_init() is a lazy initializer, with limited a allocation (one or two mutexes) ;
5198 we won't free anything here as the .h semantic was never being very clear */
5199 return 1;
5200 }
5201
hts_uninit_module(void)5202 HTSEXT_API int hts_uninit_module(void) {
5203 if (!hts_init_ok)
5204 return 1;
5205 htsthread_uninit();
5206 htspe_uninit();
5207 hts_init_ok = 0;
5208 return 1;
5209 }
5210
5211 // legacy. do not use
hts_log(httrackp * opt,const char * prefix,const char * msg)5212 HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg) {
5213 if (opt->log != NULL) {
5214 fspc(opt, opt->log, prefix);
5215 fprintf(opt->log, "%s" LF, msg);
5216 return 0;
5217 }
5218 return 1; /* Error */
5219 }
5220
5221 static void (*hts_log_print_callback)(httrackp * opt, int type, const char *format, va_list args) = NULL;
5222
hts_set_log_vprint_callback(void (* callback)(httrackp * opt,int type,const char * format,va_list args))5223 HTSEXT_API void hts_set_log_vprint_callback(void (*callback)(httrackp * opt,
5224 int type, const char *format, va_list args)) {
5225 hts_log_print_callback = callback;
5226 }
5227
hts_log_vprint(httrackp * opt,int type,const char * format,va_list args)5228 HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format, va_list args) {
5229 assertf(format != NULL);
5230 if (hts_log_print_callback != NULL) {
5231 va_list args_copy;
5232 va_copy(args_copy, args);
5233 hts_log_print_callback(opt, type, format, args);
5234 va_end(args_copy);
5235 }
5236 if (opt != NULL && opt->log != NULL) {
5237 const int save_errno = errno;
5238 const char *s_type = "unknown";
5239 const int level = type & 0xff;
5240
5241 // Check log level
5242 if (opt->debug < level) {
5243 return;
5244 }
5245
5246 switch (level) {
5247 case LOG_TRACE:
5248 s_type = "trace";
5249 break;
5250 case LOG_DEBUG:
5251 s_type = "debug";
5252 break;
5253 case LOG_INFO:
5254 s_type = "info";
5255 break;
5256 case LOG_NOTICE:
5257 case LOG_WARNING:
5258 s_type = "warning";
5259 break;
5260 case LOG_ERROR:
5261 s_type = "error";
5262 break;
5263 case LOG_PANIC:
5264 s_type = "panic";
5265 break;
5266 }
5267 fspc(opt, opt->log, s_type);
5268 (void) vfprintf(opt->log, format, args);
5269 if ((type & LOG_ERRNO) != 0) {
5270 fprintf(opt->log, ": %s", strerror(save_errno));
5271 }
5272 fputs(LF, opt->log);
5273 if (opt->flush) {
5274 fflush(opt->log);
5275 }
5276 errno = save_errno;
5277 }
5278 }
5279
hts_log_print(httrackp * opt,int type,const char * format,...)5280 HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format, ...) {
5281 va_list args;
5282 assertf(format != NULL);
5283 va_start(args, format);
5284 hts_log_vprint(opt, type, format, args);
5285 va_end(args);
5286 }
5287
set_wrappers(httrackp * opt)5288 HTSEXT_API void set_wrappers(httrackp * opt) { // LEGACY
5289 }
5290
plug_wrapper(httrackp * opt,const char * moduleName,const char * argv)5291 HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
5292 const char *argv) {
5293 void *handle = openFunctionLib(moduleName);
5294
5295 if (handle != NULL) {
5296 t_hts_plug plug = (t_hts_plug) getFunctionPtr(handle, "hts_plug");
5297 t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(handle, "hts_unplug");
5298
5299 if (plug != NULL) {
5300 int ret = plug(opt, argv);
5301
5302 if (hts_dgb_init > 0 && opt->log != NULL) {
5303 hts_debug_log_print("plugged module '%s' (return code=%d)", moduleName,
5304 ret);
5305 }
5306 if (ret == 1) { /* Success! */
5307 opt->libHandles.handles =
5308 (htslibhandle *) realloct(opt->libHandles.handles,
5309 (opt->libHandles.count +
5310 1) * sizeof(htslibhandle));
5311 opt->libHandles.handles[opt->libHandles.count].handle = handle;
5312 opt->libHandles.handles[opt->libHandles.count].moduleName =
5313 strdupt(moduleName);
5314 opt->libHandles.count++;
5315 return 1;
5316 } else {
5317 hts_debug_log_print
5318 ("* note: error while running entry point 'hts_plug' in %s",
5319 moduleName);
5320 if (unplug)
5321 unplug(opt);
5322 }
5323 } else {
5324 int last_errno = errno;
5325
5326 hts_debug_log_print("* note: can't find entry point 'hts_plug' in %s: %s",
5327 moduleName, strerror(last_errno));
5328 }
5329 closeFunctionLib(handle);
5330 return 0;
5331 } else {
5332 int last_errno = errno;
5333
5334 hts_debug_log_print("* note: can't load %s: %s", moduleName,
5335 strerror(last_errno));
5336 }
5337 return -1;
5338 }
5339
unplug_wrappers(httrackp * opt)5340 static void unplug_wrappers(httrackp * opt) {
5341 if (opt->libHandles.handles != NULL) {
5342 int i;
5343
5344 for(i = 0; i < opt->libHandles.count; i++) {
5345 if (opt->libHandles.handles[i].handle != NULL) {
5346 /* hts_unplug(), the dll exit point (finalizer) */
5347 t_hts_unplug unplug =
5348 (t_hts_unplug) getFunctionPtr(opt->libHandles.handles[i].handle,
5349 "hts_unplug");
5350 if (unplug != NULL)
5351 unplug(opt);
5352 closeFunctionLib(opt->libHandles.handles[i].handle);
5353 opt->libHandles.handles[i].handle = NULL;
5354 }
5355 if (opt->libHandles.handles[i].moduleName != NULL) {
5356 freet(opt->libHandles.handles[i].moduleName);
5357 opt->libHandles.handles[i].moduleName = NULL;
5358 }
5359 }
5360 freet(opt->libHandles.handles);
5361 opt->libHandles.handles = NULL;
5362 opt->libHandles.count = 0;
5363 }
5364 }
5365
multipleStringMatch(const char * s,const char * match)5366 int multipleStringMatch(const char *s, const char *match) {
5367 int ret = 0;
5368 String name = STRING_EMPTY;
5369
5370 if (match == NULL || s == NULL || *s == 0)
5371 return 0;
5372 for(; *match != 0; match++) {
5373 StringClear(name);
5374 for(; *match != 0 && *match != '\n'; match++) {
5375 StringAddchar(name, *match);
5376 }
5377 if (StringLength(name) > 0 && strstr(s, StringBuff(name)) != NULL) {
5378 ret = 1;
5379 break;
5380 }
5381 }
5382 StringFree(name);
5383 return ret;
5384 }
5385
hts_create_opt(void)5386 HTSEXT_API httrackp *hts_create_opt(void) {
5387 #if ( defined(_WIN32) || defined(__ANDROID__) )
5388 static const char *defaultModules[] = {
5389 "htsswf", "htsjava", "httrack-plugin", NULL
5390 };
5391 #else
5392 static const char *defaultModules[] = {
5393 "libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL
5394 };
5395 #endif
5396 httrackp *opt = malloc(sizeof(httrackp));
5397
5398 /* default options */
5399 memset(opt, 0, sizeof(httrackp));
5400 opt->size_httrackp = sizeof(httrackp);
5401
5402 /* mutexes */
5403 hts_mutexinit(&opt->state.lock);
5404
5405 /* custom wrappers */
5406 opt->libHandles.count = 0;
5407
5408 /* default settings */
5409
5410 opt->wizard = 2; // wizard automatique
5411 opt->quiet = 0; // questions
5412 //
5413 opt->travel = 0; // même adresse
5414 opt->depth = 9999; // mirror total par défaut
5415 opt->extdepth = 0; // mais pas à l'extérieur
5416 opt->seeker = 1; // down
5417 opt->urlmode = 2; // relatif par défaut
5418 opt->no_type_change = 0; // change file types
5419 opt->debug = LOG_NOTICE; // small log
5420 opt->getmode = 3; // linear scan
5421 opt->maxsite = -1; // taille max site (aucune)
5422 opt->maxfile_nonhtml = -1; // taille max fichier non html
5423 opt->maxfile_html = -1; // idem pour html
5424 opt->maxsoc = 4; // nbre socket max
5425 opt->fragment = -1; // pas de fragmentation
5426 opt->nearlink = 0; // ne pas prendre les liens non-html "adjacents"
5427 opt->makeindex = 1; // faire un index
5428 opt->kindex = 0; // index 'keyword'
5429 opt->delete_old = 1; // effacer anciens fichiers
5430 opt->background_on_suspend = 1; // Background the process if Control Z calls signal suspend.
5431 opt->makestat = 0; // pas de fichier de stats
5432 opt->maketrack = 0; // ni de tracking
5433 opt->timeout = 120; // timeout par défaut (2 minutes)
5434 opt->cache = 1; // cache prioritaire
5435 opt->shell = 0; // pas de shell par defaut
5436 opt->proxy.active = 0; // pas de proxy
5437 opt->user_agent_send = 1; // envoyer un user-agent
5438 StringCopy(opt->user_agent,
5439 "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
5440 StringCopy(opt->referer, "");
5441 StringCopy(opt->from, "");
5442 opt->savename_83 = 0; // noms longs par défaut
5443 opt->savename_type = 0; // avec structure originale
5444 opt->savename_delayed = 2; // hard delayed type (default)
5445 opt->delayed_cached = 1; // cached delayed type (default)
5446 opt->mimehtml = 0; // pas MIME-html
5447 opt->parsejava = HTSPARSE_DEFAULT; // parser classes
5448 opt->hostcontrol = 0; // PAS de control host pour timeout et traffic jammer
5449 opt->retry = 2; // 2 retry par défaut
5450 opt->errpage = 1; // copier ou générer une page d'erreur en cas d'erreur (404 etc.)
5451 opt->check_type = 1; // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html
5452 opt->all_in_cache = 0; // ne pas tout stocker en cache
5453 opt->robots = 2; // traiter les robots.txt
5454 opt->external = 0; // liens externes normaux
5455 opt->passprivacy = 0; // mots de passe dans les fichiers
5456 opt->includequery = 1; // include query-string par défaut
5457 opt->mirror_first_page = 0; // pas mode mirror links
5458 opt->accept_cookie = 1; // gérer les cookies
5459 opt->cookie = NULL;
5460 opt->http10 = 0; // laisser http/1.1
5461 opt->nokeepalive = 0; // pas keep-alive
5462 opt->nocompression = 0; // pas de compression
5463 opt->tolerant = 0; // ne pas accepter content-length incorrect
5464 opt->parseall = 1; // tout parser (tags inconnus, par exemple)
5465 opt->parsedebug = 0; // pas de mode débuggage
5466 opt->norecatch = 0; // ne pas reprendre les fichiers effacés par l'utilisateur
5467 opt->verbosedisplay = 0; // pas d'animation texte
5468 opt->sizehack = 0; // size hack
5469 opt->urlhack = 1; // url hack (normalizer)
5470 StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
5471 opt->ftp_proxy = 1; // proxy http pour ftp
5472 opt->convert_utf8 = 1; // convert html to UTF-8
5473 StringCopy(opt->filelist, "");
5474 StringCopy(opt->lang_iso, "en, *");
5475 StringCopy(opt->accept,
5476 "text/html,image/png,image/jpeg,image/pjpeg,image/x-xbitmap,image/svg+xml,image/gif;q=0.9,*/*;q=0.1");
5477 StringCopy(opt->headers, "");
5478 StringCopy(opt->mimedefs, "\n"); // aucun filtre mime (\n IMPORTANT)
5479 StringClear(opt->mod_blacklist);
5480 //
5481 opt->log = stdout;
5482 opt->errlog = stderr;
5483 opt->flush = 1; // flush sur les fichiers log
5484 //opt->aff_progress=0;
5485 opt->keyboard = 0;
5486 //
5487 StringCopy(opt->path_html, "");
5488 StringCopy(opt->path_html_utf8, "");
5489 StringCopy(opt->path_log, "");
5490 StringCopy(opt->path_bin, "");
5491 //
5492 opt->maxlink = 100000; // 100,000 liens max par défaut
5493 opt->maxfilter = 200; // 200 filtres max par défaut
5494 opt->maxcache = 1048576 * 32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
5495 //opt->maxcache_anticipate=256; // maximum de liens à anticiper
5496 opt->maxtime = -1; // temps max en secondes
5497 opt->maxrate = 25000; // taux maxi
5498 opt->maxconn = 5.0; // nombre connexions/s
5499 opt->waittime = -1; // wait until.. hh*3600+mm*60+ss
5500 //
5501 opt->exec = "";
5502 opt->is_update = 0; // not an update (yet)
5503 opt->dir_topindex = 0; // do not built top index (yet)
5504 //
5505 opt->bypass_limits = 0; // enforce limits by default
5506 opt->state.stop = 0; // stopper
5507 opt->state.exit_xh = 0; // abort
5508 //
5509 opt->state.is_ended = 0;
5510
5511 /* Alocated buffers */
5512
5513 opt->callbacks_fun =
5514 (t_hts_htmlcheck_callbacks *) malloct(sizeof(t_hts_htmlcheck_callbacks));
5515 memset(opt->callbacks_fun, 0, sizeof(t_hts_htmlcheck_callbacks));
5516
5517 /* Preload callbacks : java and flash parser, and the automatic user-defined callback */
5518
5519 {
5520 int i;
5521
5522 for(i = 0; defaultModules[i] != NULL; i++) {
5523 int ret = plug_wrapper(opt, defaultModules[i], defaultModules[i]);
5524
5525 if (ret == 0) { /* Module aborted initialization */
5526 /* Ignored. */
5527 }
5528 }
5529 }
5530
5531 return opt;
5532 }
5533
hts_sizeof_opt(void)5534 HTSEXT_API size_t hts_sizeof_opt(void) {
5535 return sizeof(httrackp);
5536 }
5537
hts_free_opt(httrackp * opt)5538 HTSEXT_API void hts_free_opt(httrackp * opt) {
5539 if (opt != NULL) {
5540
5541 /* Alocated callbacks */
5542
5543 if (opt->callbacks_fun != NULL) {
5544 int i;
5545 t_hts_htmlcheck_callbacks_item *items =
5546 (t_hts_htmlcheck_callbacks_item *) opt->callbacks_fun;
5547 const int size =
5548 (int) sizeof(t_hts_htmlcheck_callbacks) /
5549 sizeof(t_hts_htmlcheck_callbacks_item);
5550 assertf(sizeof(t_hts_htmlcheck_callbacks_item) * size ==
5551 sizeof(t_hts_htmlcheck_callbacks));
5552
5553 /* Free all linked lists */
5554 for(i = 0; i < size; i++) {
5555 t_hts_callbackarg *carg, *next_carg;
5556
5557 for(carg = items[i].carg;
5558 carg != NULL && (next_carg = carg->prev.carg, carg != NULL);
5559 carg = next_carg) {
5560 hts_free(carg);
5561 }
5562 }
5563
5564 freet(opt->callbacks_fun);
5565 opt->callbacks_fun = NULL;
5566 }
5567
5568 /* Close library handles */
5569 unplug_wrappers(opt);
5570
5571 /* Cache */
5572 if (opt->state.dns_cache != NULL) {
5573 t_dnscache *root;
5574
5575 hts_mutexlock(&opt->state.lock);
5576 root = opt->state.dns_cache;
5577 opt->state.dns_cache = NULL;
5578 hts_mutexrelease(&opt->state.lock);
5579
5580 hts_cache_free(root);
5581 }
5582
5583 /* Cancel chain */
5584 if (opt->state.cancel != NULL) {
5585 htsoptstatecancel *cancel;
5586
5587 for(cancel = opt->state.cancel; cancel != NULL;) {
5588 htsoptstatecancel *next = cancel->next;
5589
5590 if (cancel->url != NULL) {
5591 freet(cancel->url);
5592 }
5593 freet(cancel);
5594 cancel = next;
5595 }
5596 opt->state.cancel = NULL;
5597 }
5598
5599 /* Free strings */
5600
5601 StringFree(opt->proxy.name);
5602 StringFree(opt->proxy.bindhost);
5603
5604 StringFree(opt->savename_userdef);
5605 StringFree(opt->user_agent);
5606 StringFree(opt->referer);
5607 StringFree(opt->from);
5608 StringFree(opt->lang_iso);
5609 StringFree(opt->sys_com);
5610 StringFree(opt->mimedefs);
5611 StringFree(opt->filelist);
5612 StringFree(opt->urllist);
5613 StringFree(opt->footer);
5614 StringFree(opt->mod_blacklist);
5615
5616 StringFree(opt->path_html);
5617 StringFree(opt->path_html_utf8);
5618 StringFree(opt->path_log);
5619 StringFree(opt->path_bin);
5620
5621 /* mutexes */
5622 hts_mutexfree(&opt->state.lock);
5623
5624 /* Free structure */
5625 free(opt);
5626 }
5627 }
5628
5629 // TEMPORARY - PUT THIS STRUCTURE INSIDE httrackp !
hts_get_stats(httrackp * opt)5630 const hts_stat_struct* hts_get_stats(httrackp * opt) {
5631 if (opt == NULL) {
5632 return NULL;
5633 }
5634
5635 HTS_STAT.stat_nsocket = 0;
5636 HTS_STAT.stat_errors = fspc(opt, NULL, "error");
5637 HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
5638 HTS_STAT.stat_infos = fspc(opt, NULL, "info");
5639 HTS_STAT.nbk = 0;
5640 HTS_STAT.nb = 0;
5641
5642 return &HTS_STAT;
5643 }
5644
5645 // defaut wrappers
htsdefault_init(t_hts_callbackarg * carg)5646 static void __cdecl htsdefault_init(t_hts_callbackarg * carg) {
5647 }
htsdefault_uninit(t_hts_callbackarg * carg)5648 static void __cdecl htsdefault_uninit(t_hts_callbackarg * carg) {
5649 // hts_freevar();
5650 }
htsdefault_start(t_hts_callbackarg * carg,httrackp * opt)5651 static int __cdecl htsdefault_start(t_hts_callbackarg * carg, httrackp * opt) {
5652 return 1;
5653 }
htsdefault_chopt(t_hts_callbackarg * carg,httrackp * opt)5654 static int __cdecl htsdefault_chopt(t_hts_callbackarg * carg, httrackp * opt) {
5655 return 1;
5656 }
htsdefault_end(t_hts_callbackarg * carg,httrackp * opt)5657 static int __cdecl htsdefault_end(t_hts_callbackarg * carg, httrackp * opt) {
5658 return 1;
5659 }
htsdefault_preprocesshtml(t_hts_callbackarg * carg,httrackp * opt,char ** html,int * len,const char * url_adresse,const char * url_fichier)5660 static int __cdecl htsdefault_preprocesshtml(t_hts_callbackarg * carg,
5661 httrackp * opt, char **html,
5662 int *len, const char *url_adresse,
5663 const char *url_fichier) {
5664 return 1;
5665 }
htsdefault_postprocesshtml(t_hts_callbackarg * carg,httrackp * opt,char ** html,int * len,const char * url_adresse,const char * url_fichier)5666 static int __cdecl htsdefault_postprocesshtml(t_hts_callbackarg * carg,
5667 httrackp * opt, char **html,
5668 int *len, const char *url_adresse,
5669 const char *url_fichier) {
5670 return 1;
5671 }
htsdefault_checkhtml(t_hts_callbackarg * carg,httrackp * opt,char * html,int len,const char * url_adresse,const char * url_fichier)5672 static int __cdecl htsdefault_checkhtml(t_hts_callbackarg * carg,
5673 httrackp * opt, char *html, int len,
5674 const char *url_adresse,
5675 const char *url_fichier) {
5676 return 1;
5677 }
htsdefault_loop(t_hts_callbackarg * carg,httrackp * opt,lien_back * back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct * stats)5678 static int __cdecl htsdefault_loop(t_hts_callbackarg * carg, httrackp * opt, lien_back * back, int back_max, int back_index, int lien_n, int lien_tot, int stat_time, hts_stat_struct * stats) { // appelé à chaque boucle de HTTrack
5679 return 1;
5680 }
htsdefault_query(t_hts_callbackarg * carg,httrackp * opt,const char * question)5681 static const char *__cdecl htsdefault_query(t_hts_callbackarg * carg,
5682 httrackp * opt,
5683 const char *question) {
5684 return "";
5685 }
htsdefault_query2(t_hts_callbackarg * carg,httrackp * opt,const char * question)5686 static const char *__cdecl htsdefault_query2(t_hts_callbackarg * carg,
5687 httrackp * opt,
5688 const char *question) {
5689 return "";
5690 }
htsdefault_query3(t_hts_callbackarg * carg,httrackp * opt,const char * question)5691 static const char *__cdecl htsdefault_query3(t_hts_callbackarg * carg,
5692 httrackp * opt,
5693 const char *question) {
5694 return "";
5695 }
htsdefault_check(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * fil,int status)5696 static int __cdecl htsdefault_check(t_hts_callbackarg * carg, httrackp * opt,
5697 const char *adr, const char *fil,
5698 int status) {
5699 return -1;
5700 }
htsdefault_check_mime(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * fil,const char * mime,int status)5701 static int __cdecl htsdefault_check_mime(t_hts_callbackarg * carg,
5702 httrackp * opt, const char *adr,
5703 const char *fil, const char *mime,
5704 int status) {
5705 return -1;
5706 }
htsdefault_pause(t_hts_callbackarg * carg,httrackp * opt,const char * lockfile)5707 static void __cdecl htsdefault_pause(t_hts_callbackarg * carg, httrackp * opt,
5708 const char *lockfile) {
5709 while(fexist(lockfile)) {
5710 Sleep(1000);
5711 }
5712 }
htsdefault_filesave(t_hts_callbackarg * carg,httrackp * opt,const char * file)5713 static void __cdecl htsdefault_filesave(t_hts_callbackarg * carg,
5714 httrackp * opt, const char *file) {
5715 }
htsdefault_filesave2(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * file,const char * sav,int is_new,int is_modified,int not_updated)5716 static void __cdecl htsdefault_filesave2(t_hts_callbackarg * carg,
5717 httrackp * opt, const char *adr,
5718 const char *file, const char *sav,
5719 int is_new, int is_modified,
5720 int not_updated) {
5721 }
htsdefault_linkdetected(t_hts_callbackarg * carg,httrackp * opt,char * link)5722 static int __cdecl htsdefault_linkdetected(t_hts_callbackarg * carg,
5723 httrackp * opt, char *link) {
5724 return 1;
5725 }
htsdefault_linkdetected2(t_hts_callbackarg * carg,httrackp * opt,char * link,const char * start_tag)5726 static int __cdecl htsdefault_linkdetected2(t_hts_callbackarg * carg,
5727 httrackp * opt, char *link,
5728 const char *start_tag) {
5729 return 1;
5730 }
htsdefault_xfrstatus(t_hts_callbackarg * carg,httrackp * opt,lien_back * back)5731 static int __cdecl htsdefault_xfrstatus(t_hts_callbackarg * carg,
5732 httrackp * opt, lien_back * back) {
5733 return 1;
5734 }
htsdefault_savename(t_hts_callbackarg * carg,httrackp * opt,const char * adr_complete,const char * fil_complete,const char * referer_adr,const char * referer_fil,char * save)5735 static int __cdecl htsdefault_savename(t_hts_callbackarg * carg, httrackp * opt,
5736 const char *adr_complete,
5737 const char *fil_complete,
5738 const char *referer_adr,
5739 const char *referer_fil, char *save) {
5740 return 1;
5741 }
htsdefault_sendhead(t_hts_callbackarg * carg,httrackp * opt,char * buff,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * outgoing)5742 static int __cdecl htsdefault_sendhead(t_hts_callbackarg * carg, httrackp * opt,
5743 char *buff, const char *adr,
5744 const char *fil, const char *referer_adr,
5745 const char *referer_fil,
5746 htsblk * outgoing) {
5747 return 1;
5748 }
htsdefault_receivehead(t_hts_callbackarg * carg,httrackp * opt,char * buff,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * incoming)5749 static int __cdecl htsdefault_receivehead(t_hts_callbackarg * carg,
5750 httrackp * opt, char *buff,
5751 const char *adr, const char *fil,
5752 const char *referer_adr,
5753 const char *referer_fil,
5754 htsblk * incoming) {
5755 return 1;
5756 }
htsdefault_detect(t_hts_callbackarg * carg,httrackp * opt,htsmoduleStruct * str)5757 static int __cdecl htsdefault_detect(t_hts_callbackarg * carg, httrackp * opt,
5758 htsmoduleStruct * str) {
5759 return 0;
5760 }
htsdefault_parse(t_hts_callbackarg * carg,httrackp * opt,htsmoduleStruct * str)5761 static int __cdecl htsdefault_parse(t_hts_callbackarg * carg, httrackp * opt,
5762 htsmoduleStruct * str) {
5763 return 0;
5764 }
5765
5766 /* Default internal dummy callbacks */
5767 const t_hts_htmlcheck_callbacks default_callbacks = {
5768 {htsdefault_init, NULL},
5769 {htsdefault_uninit, NULL},
5770 {htsdefault_start, NULL},
5771 {htsdefault_end, NULL},
5772 {htsdefault_chopt, NULL},
5773 {htsdefault_preprocesshtml, NULL},
5774 {htsdefault_postprocesshtml, NULL},
5775 {htsdefault_checkhtml, NULL},
5776 {htsdefault_query, NULL},
5777 {htsdefault_query2, NULL},
5778 {htsdefault_query3, NULL},
5779 {htsdefault_loop, NULL},
5780 {htsdefault_check, NULL},
5781 {htsdefault_check_mime, NULL},
5782 {htsdefault_pause, NULL},
5783 {htsdefault_filesave, NULL},
5784 {htsdefault_filesave2, NULL},
5785 {htsdefault_linkdetected, NULL},
5786 {htsdefault_linkdetected2, NULL},
5787 {htsdefault_xfrstatus, NULL},
5788 {htsdefault_savename, NULL},
5789 {htsdefault_sendhead, NULL},
5790 {htsdefault_receivehead, NULL},
5791 {htsdefault_detect, NULL},
5792 {htsdefault_parse, NULL}
5793 };
5794
5795 #define CALLBACK_OP(CB, NAME, OPERATION, S, FUN) do { \
5796 if (strcmp(NAME, S) == 0) { \
5797 OPERATION(t_hts_htmlcheck_ ##FUN, (CB)->FUN.fun); \
5798 } \
5799 } while(0)
5800
5801 #define DISPATCH_CALLBACK(CB, NAME, OPERATION) do { \
5802 CALLBACK_OP(CB, NAME, OPERATION, "init", init); \
5803 CALLBACK_OP(CB, NAME, OPERATION, "free", uninit); \
5804 CALLBACK_OP(CB, NAME, OPERATION, "start", start); \
5805 CALLBACK_OP(CB, NAME, OPERATION, "end", end); \
5806 CALLBACK_OP(CB, NAME, OPERATION, "change-options", chopt); \
5807 CALLBACK_OP(CB, NAME, OPERATION, "preprocess-html", preprocess); \
5808 CALLBACK_OP(CB, NAME, OPERATION, "postprocess-html", postprocess); \
5809 CALLBACK_OP(CB, NAME, OPERATION, "check-html", check_html); \
5810 CALLBACK_OP(CB, NAME, OPERATION, "query", query); \
5811 CALLBACK_OP(CB, NAME, OPERATION, "query2", query2); \
5812 CALLBACK_OP(CB, NAME, OPERATION, "query3", query3); \
5813 CALLBACK_OP(CB, NAME, OPERATION, "loop", loop); \
5814 CALLBACK_OP(CB, NAME, OPERATION, "check-link", check_link); \
5815 CALLBACK_OP(CB, NAME, OPERATION, "check-mime", check_mime); \
5816 CALLBACK_OP(CB, NAME, OPERATION, "pause", pause); \
5817 CALLBACK_OP(CB, NAME, OPERATION, "save-file", filesave); \
5818 CALLBACK_OP(CB, NAME, OPERATION, "save-file2", filesave2); \
5819 CALLBACK_OP(CB, NAME, OPERATION, "link-detected", linkdetected); \
5820 CALLBACK_OP(CB, NAME, OPERATION, "link-detected2", linkdetected2); \
5821 CALLBACK_OP(CB, NAME, OPERATION, "transfer-status", xfrstatus); \
5822 CALLBACK_OP(CB, NAME, OPERATION, "save-name", savename); \
5823 CALLBACK_OP(CB, NAME, OPERATION, "send-header", sendhead); \
5824 CALLBACK_OP(CB, NAME, OPERATION, "receive-header", receivehead); \
5825 } while(0)
5826
hts_set_callback(t_hts_htmlcheck_callbacks * callbacks,const char * name,void * function)5827 int hts_set_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name,
5828 void *function) {
5829 int error = 1;
5830 #define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
5831 FUNCTION = (TYPE) function; \
5832 error = 0; \
5833 } while(0)
5834 DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
5835 #undef CALLBACK_OPERATION
5836 return error;
5837 }
5838
hts_get_callback(t_hts_htmlcheck_callbacks * callbacks,const char * name)5839 void *hts_get_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name) {
5840 #define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
5841 return (void*) FUNCTION; \
5842 } while(0)
5843 DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
5844 #undef CALLBACK_OPERATION
5845 return NULL;
5846 }
5847
5848 // end defaut wrappers
5849
5850 /* libc stubs */
5851
hts_strdup(const char * str)5852 HTSEXT_API char *hts_strdup(const char *str) {
5853 return strdup(str);
5854 }
5855
hts_malloc(size_t size)5856 HTSEXT_API void *hts_malloc(size_t size) {
5857 return malloc(size);
5858 }
5859
hts_realloc(void * const data,const size_t size)5860 HTSEXT_API void *hts_realloc(void *const data, const size_t size) {
5861 return realloc(data, size);
5862 }
5863
hts_free(void * data)5864 HTSEXT_API void hts_free(void *data) {
5865 free(data);
5866 }
5867
5868 /* Dummy functions */
hts_resetvar(void)5869 HTSEXT_API int hts_resetvar(void) {
5870 return 0;
5871 }
5872
5873 #ifdef _WIN32
5874
5875 typedef struct dirent dirent;
opendir(const char * name)5876 DIR *opendir(const char *name) {
5877 WIN32_FILE_ATTRIBUTE_DATA st;
5878 DIR *dir;
5879 size_t len;
5880 int i;
5881
5882 if (name == NULL || *name == '\0') {
5883 errno = ENOENT;
5884 return NULL;
5885 }
5886 if (!GetFileAttributesEx(name, GetFileExInfoStandard, &st)
5887 || (st.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) {
5888 errno = ENOENT;
5889 return NULL;
5890 }
5891 dir = calloc(sizeof(DIR), 1);
5892 if (dir == NULL) {
5893 errno = ENOMEM;
5894 return NULL;
5895 }
5896 len = strlen(name);
5897 dir->h = INVALID_HANDLE_VALUE;
5898 dir->name = malloc(len + 2 + 1);
5899 strcpy(dir->name, name);
5900 for(i = 0; dir->name[i] != '\0'; i++) {
5901 if (dir->name[i] == '/') {
5902 dir->name[i] = '\\';
5903 }
5904 }
5905 strcat(dir->name, "\\*");
5906 return dir;
5907 }
5908
readdir(DIR * dir)5909 struct dirent *readdir(DIR * dir) {
5910 WIN32_FIND_DATAA find;
5911
5912 if (dir->h == INVALID_HANDLE_VALUE) {
5913 dir->h = FindFirstFileA(dir->name, &find);
5914 } else {
5915 if (!FindNextFile(dir->h, &find)) {
5916 FindClose(dir->h);
5917 dir->h = INVALID_HANDLE_VALUE;
5918 }
5919 }
5920 if (dir->h != INVALID_HANDLE_VALUE) {
5921 dir->entry.d_name[0] = 0;
5922 strncat(dir->entry.d_name, find.cFileName, HTS_DIRENT_SIZE - 1);
5923 return &dir->entry;
5924 }
5925 errno = ENOENT;
5926 return NULL;
5927 }
5928
closedir(DIR * dir)5929 int closedir(DIR * dir) {
5930 if (dir != NULL) {
5931 if (dir->h != INVALID_HANDLE_VALUE) {
5932 CloseHandle(dir->h);
5933 }
5934 if (dir->name != NULL) {
5935 free(dir->name);
5936 }
5937 free(dir);
5938 return 0;
5939 }
5940 errno = EBADF;
5941 return -1;
5942 }
5943
5944 // UTF-8 aware FILE API
5945
copyWchar(LPWSTR dest,const char * src)5946 static void copyWchar(LPWSTR dest, const char *src) {
5947 int i;
5948
5949 for(i = 0; src[i]; i++) {
5950 dest[i] = src[i];
5951 }
5952 dest[i] = '\0';
5953 }
5954
hts_fopen_utf8(const char * path,const char * mode)5955 FILE *hts_fopen_utf8(const char *path, const char *mode) {
5956 WCHAR wmode[32];
5957 LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5958
5959 assertf(strlen(mode) < sizeof(wmode) / sizeof(WCHAR));
5960 copyWchar(wmode, mode);
5961 if (wpath != NULL) {
5962 FILE *const fp = _wfopen(wpath, wmode);
5963
5964 free(wpath);
5965 return fp;
5966 } else {
5967 // Fallback on conversion error.
5968 return fopen(path, mode);
5969 }
5970 }
5971
hts_stat_utf8(const char * path,STRUCT_STAT * buf)5972 int hts_stat_utf8(const char *path, STRUCT_STAT * buf) {
5973 LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5974
5975 if (wpath != NULL) {
5976 const int result = _wstat(wpath, buf);
5977
5978 free(wpath);
5979 return result;
5980 } else {
5981 // Fallback on conversion error.
5982 return _stat(path, buf);
5983 }
5984 }
5985
hts_unlink_utf8(const char * path)5986 int hts_unlink_utf8(const char *path) {
5987 LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5988
5989 if (wpath != NULL) {
5990 const int result = _wunlink(wpath);
5991
5992 free(wpath);
5993 return result;
5994 } else {
5995 // Fallback on conversion error.
5996 return _unlink(path);
5997 }
5998 }
5999
hts_rename_utf8(const char * oldpath,const char * newpath)6000 int hts_rename_utf8(const char *oldpath, const char *newpath) {
6001 LPWSTR woldpath =
6002 hts_convertUTF8StringToUCS2(oldpath, (int) strlen(oldpath), NULL);
6003 LPWSTR wnewpath =
6004 hts_convertUTF8StringToUCS2(newpath, (int) strlen(newpath), NULL);
6005 if (woldpath != NULL && wnewpath != NULL) {
6006 const int result = _wrename(woldpath, wnewpath);
6007
6008 free(woldpath);
6009 free(wnewpath);
6010 return result;
6011 } else {
6012 if (woldpath != NULL)
6013 free(woldpath);
6014 if (wnewpath != NULL)
6015 free(wnewpath);
6016 // Fallback on conversion error.
6017 return rename(oldpath, newpath);
6018 }
6019 }
6020
hts_mkdir_utf8(const char * path)6021 int hts_mkdir_utf8(const char *path) {
6022 LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
6023
6024 if (wpath != NULL) {
6025 const int result = _wmkdir(wpath);
6026
6027 free(wpath);
6028 return result;
6029 } else {
6030 // Fallback on conversion error.
6031 return _mkdir(path);
6032 }
6033 }
6034
hts_utime_utf8(const char * path,const STRUCT_UTIMBUF * times)6035 HTSEXT_API int hts_utime_utf8(const char *path, const STRUCT_UTIMBUF * times) {
6036 STRUCT_UTIMBUF mtimes = *times;
6037 LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
6038
6039 if (wpath != NULL) {
6040 const int result = _wutime(wpath, &mtimes);
6041
6042 free(wpath);
6043 return result;
6044 } else {
6045 // Fallback on conversion error.
6046 return _utime(path, &mtimes);
6047 }
6048 }
6049
6050 #endif
6051
6052 // Fin
6053