1 /* ------------------------------------------------------------ */
2 /*
3 HTTrack Website Copier, Offline Browser for Windows and Unix
4 Copyright (C) 1998-2017 Xavier Roche and other contributors
5 
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 
19 Important notes:
20 
21 - We hereby ask people using this source NOT to use it in purpose of grabbing
22 emails addresses, or collecting any other private information on persons.
23 This would disgrace our work, and spoil the many hours we spent on it.
24 
25 Please visit our Website: http://www.httrack.com
26 */
27 
28 /* ------------------------------------------------------------ */
29 /* File: Subroutines                                            */
30 /* Author: Xavier Roche                                         */
31 /* ------------------------------------------------------------ */
32 
33 /* Internal engine bytecode */
34 #define HTS_INTERNAL_BYTECODE
35 
36 // Fichier librairie .c
37 
38 #include "htscore.h"
39 
40 /* specific definitions */
41 #include "htsbase.h"
42 #include "htsnet.h"
43 #include "htsbauth.h"
44 #include "htsthread.h"
45 #include "htsback.h"
46 #include "htswrap.h"
47 #include "htsmd5.h"
48 #include "htsmodules.h"
49 #include "htscharset.h"
50 #include "htsencoding.h"
51 
52 #ifdef _WIN32
53 #include <direct.h>
54 #else
55 #ifdef HAVE_SYS_TYPES_H
56 #include <sys/types.h>
57 #endif
58 #ifdef HAVE_SYS_STAT_H
59 #include <sys/stat.h>
60 #endif
61 #ifdef HAVE_UNISTD_H
62 #include <unistd.h>
63 #endif
64 #endif /* _WIN32 */
65 #include <stdarg.h>
66 
67 #include <string.h>
68 #include <time.h>
69 #include <stdarg.h>
70 
71 #ifndef _WIN32
72 #include <sys/time.h>
73 #else
74 #include <sys/timeb.h>
75 #endif
76 #include <fcntl.h>
77 
78 // pour utimbuf
79 #ifdef _WIN32
80 #include <sys/utime.h>
81 #else
82 #include <utime.h>
83 #endif /* _WIN32 */
84 
85 #include <sys/stat.h>
86 
87 #ifdef __ANDROID__
88 #define timezone 0
89 #endif
90 /* END specific definitions */
91 
92 /* Windows might be missing va_copy */
93 #ifdef _WIN32
94 #ifndef va_copy
95 #define va_copy(dst, src) ((dst) = (src))
96 #endif
97 #endif
98 
99 // Debugging
100 #if _HTS_WIDE
101 FILE *DEBUG_fp = NULL;
102 #endif
103 
104 /* variables globales */
105 int _DEBUG_HEAD;
106 FILE *ioinfo;
107 
108 #if HTS_USEOPENSSL
109 SSL_CTX *openssl_ctx = NULL;
110 #endif
111 int IPV6_resolver = 0;
112 
113 /* détection complémentaire */
114 const char *hts_detect[] = {
115   "archive",
116   "background",
117   "data",                       // OBJECT
118   "dynsrc",
119   "lowsrc",
120   "profile",                    // element META
121   "src",
122   "swurl",
123   "url",
124   "usemap",
125   "longdesc",                   // accessibility
126   "xlink:href",                 // xml/svg tag
127   "poster",                     // HTML5
128   ""
129 };
130 
131 /* détecter début */
132 const char *hts_detectbeg[] = {
133   "hotspot",                    /* hotspot1=..,hotspot2=.. */
134   ""
135 };
136 
137 /* ne pas détcter de liens dedans */
138 const char *hts_nodetect[] = {
139   "accept-charset",
140   "accesskey",
141   "action",
142   "align",
143   "alt",
144   "axes",
145   "axis",
146   "char",
147   "charset",
148   "cite",
149   "class",
150   "classid",
151   "code",
152   "color",
153   "datetime",
154   "dir",
155   "enctype",
156   "face",
157   "height",
158   "id",
159   "lang",
160   "language",
161   "media",
162   "method",
163   "name",
164   "prompt",
165   "scheme",
166   "size",
167   "style",
168   "target",
169   "title",
170   "type",
171   "valign",
172   "version",
173   "width",
174   ""
175 };
176 
177 /* détection de mini-code javascript */
178 /* ALSO USED: detection based on the name: onXXX="<tag>" where XXX starts with upper case letter */
179 const char *hts_detect_js[] = {
180   "onAbort",
181   "onBlur",
182   "onChange",
183   "onClick",
184   "onDblClick",
185   "onDragDrop",
186   "onError",
187   "onFocus",
188   "onKeyDown",
189   "onKeyPress",
190   "onKeyUp",
191   "onLoad",
192   "onMouseDown",
193   "onMouseMove",
194   "onMouseOut",
195   "onMouseOver",
196   "onMouseUp",
197   "onMove",
198   "onReset",
199   "onResize",
200   "onSelect",
201   "onSubmit",
202   "onUnload",
203   "style",                      /* hack for CSS code data */
204   ""
205 };
206 
207 const char *hts_main_mime[] = {
208   "application",
209   "audio",
210   "image",
211   "message",
212   "multipart",
213   "text",
214   "video",
215   ""
216 };
217 
218 /* détection "...URL=<url>" */
219 const char *hts_detectURL[] = {
220   "content",
221   ""
222 };
223 
224 /* tags où l'URL doit être réécrite mais non capturée */
225 const char *hts_detectandleave[] = {
226   "action",
227   ""
228 };
229 
230 /* ne pas renommer les types renvoyés (souvent types inconnus) */
231 const char *hts_mime_keep[] = {
232   "application/octet-stream",
233   "text/plain",
234   "application/xml",
235   "text/xml",
236   ""
237 };
238 
239 /* bogus servers returns these mime types when the extension is seen within the filename */
240 const char *hts_mime_bogus_multiple[] = {
241   "application/x-wais-source",  /* src (src.rpm) */
242   ""
243 };
244 
245 /* pas de type mime connu, mais extension connue */
246 const char *hts_ext_dynamic[] = {
247   "php3",
248   "php",
249   "php4",
250   "php2",
251   "cgi",
252   "asp",
253   "jsp",
254   "pl",
255   /*"exe", */
256   "cfm",
257   "nsf",                        /* lotus */
258   ""
259 };
260 
261 /* types MIME
262    note: application/octet-stream should not be used here
263 */
264 const char *hts_mime[][2] = {
265   {"application/acad", "dwg"},
266   {"application/arj", "arj"},
267   {"application/clariscad", "ccad"},
268   {"application/drafting", "drw"},
269   {"application/dxf", "dxf"},
270   {"application/excel", "xls"},
271   {"application/i-deas", "unv"},
272   {"application/iges", "isg"},
273   {"application/iges", "iges"},
274   {"application/mac-binhex40", "hqx"},
275   {"application/mac-compactpro", "cpt"},
276   {"application/msword", "doc"},
277   {"application/msword", "w6w"},
278   {"application/msword", "word"},
279   {"application/mswrite", "wri"},
280   /*{"application/octet-stream","dms"}, */
281   /*{"application/octet-stream","lzh"}, */
282   /*{"application/octet-stream","lha"}, */
283   /*{"application/octet-stream","bin"}, */
284   {"application/oda", "oda"},
285   {"application/pdf", "pdf"},
286   {"application/postscript", "ps"},
287   {"application/postscript", "ai"},
288   {"application/postscript", "eps"},
289   {"application/powerpoint", "ppt"},
290   {"application/pro_eng", "prt"},
291   {"application/pro_eng", "part"},
292   {"application/rtf", "rtf"},
293   {"application/set", "set"},
294   {"application/sla", "stl"},
295   {"application/smil", "smi"},
296   {"application/smil", "smil"},
297   {"application/smil", "sml"},
298   {"application/solids", "sol"},
299   {"application/STEP", "stp"},
300   {"application/STEP", "step"},
301   {"application/vda", "vda"},
302   {"application/x-authorware-map", "aam"},
303   {"application/x-authorware-seg", "aas"},
304   {"application/x-authorware-bin", "aab"},
305   {"application/x-bzip2", "bz2"},
306   {"application/x-cocoa", "cco"},
307   {"application/x-csh", "csh"},
308   {"application/x-director", "dir"},
309   {"application/x-director", "dcr"},
310   {"application/x-director", "dxr"},
311   {"application/x-mif", "mif"},
312   {"application/x-dvi", "dvi"},
313   {"application/x-gzip", "gz"},
314   {"application/x-gzip", "gzip"},
315   {"application/x-hdf", "hdf"},
316   {"application/x-javascript", "js"},
317   {"application/x-koan", "skp"},
318   {"application/x-koan", "skd"},
319   {"application/x-koan", "skt"},
320   {"application/x-koan", "skm"},
321   {"application/x-latex", "latex"},
322   {"application/x-netcdf", "nc"},
323   {"application/x-netcdf", "cdf"},
324   /* {"application/x-sh","sh"}, */
325   /* {"application/x-csh","csh"}, */
326   /* {"application/x-ksh","ksh"}, */
327   {"application/x-shar", "shar"},
328   {"application/x-stuffit", "sit"},
329   {"application/x-tcl", "tcl"},
330   {"application/x-tex", "tex"},
331   {"application/x-texinfo", "texinfo"},
332   {"application/x-texinfo", "texi"},
333   {"application/x-troff", "t"},
334   {"application/x-troff", "tr"},
335   {"application/x-troff", "roff"},
336   {"application/x-troff-man", "man"},
337   {"application/x-troff-me", "ms"},
338   {"application/x-wais-source", "src"},
339   {"application/zip", "zip"},
340   {"application/x-zip-compressed", "zip"},
341   {"application/x-bcpio", "bcpio"},
342   {"application/x-cdlink", "vcd"},
343   {"application/x-cpio", "cpio"},
344   {"application/x-gtar", "tgz"},
345   {"application/x-gtar", "gtar"},
346   {"application/x-shar", "shar"},
347   {"application/x-shockwave-flash", "swf"},
348   {"application/x-sv4cpio", "sv4cpio"},
349   {"application/x-sv4crc", "sv4crc"},
350   {"application/x-tar", "tar"},
351   {"application/x-ustar", "ustar"},
352   {"application/x-winhelp", "hlp"},
353   {"application/xml", "xml"},
354   {"audio/midi", "mid"},
355   {"audio/midi", "midi"},
356   {"audio/midi", "kar"},
357   {"audio/mpeg", "mp3"},
358   {"audio/mpeg", "mpga"},
359   {"audio/mpeg", "mp2"},
360   {"audio/basic", "au"},
361   {"audio/basic", "snd"},
362   {"audio/x-aiff", "aif"},
363   {"audio/x-aiff", "aiff"},
364   {"audio/x-aiff", "aifc"},
365   {"audio/x-pn-realaudio", "rm"},
366   {"audio/x-pn-realaudio", "ram"},
367   {"audio/x-pn-realaudio", "ra"},
368   {"audio/x-pn-realaudio-plugin", "rpm"},
369   {"audio/x-wav", "wav"},
370   {"chemical/x-pdb", "pdb"},
371   {"chemical/x-pdb", "xyz"},
372   {"drawing/x-dwf", "dwf"},
373   {"image/gif", "gif"},
374   {"image/ief", "ief"},
375   {"image/jpeg", "jpg"},
376   {"image/jpeg", "jpe"},
377   {"image/jpeg", "jpeg"},
378   {"image/pict", "pict"},
379   {"image/png", "png"},
380   {"image/tiff", "tiff"},
381   {"image/tiff", "tif"},
382   {"image/svg+xml", "svg"},
383   {"image/svg-xml", "svg"},
384   {"image/x-cmu-raster", "ras"},
385   {"image/x-freehand", "fh4"},
386   {"image/x-freehand", "fh7"},
387   {"image/x-freehand", "fh5"},
388   {"image/x-freehand", "fhc"},
389   {"image/x-freehand", "fh"},
390   {"image/x-portable-anymap", "pnm"},
391   {"image/x-portable-bitmap", "pgm"},
392   {"image/x-portable-pixmap", "ppm"},
393   {"image/x-rgb", "rgb"},
394   {"image/x-xbitmap", "xbm"},
395   {"image/x-xpixmap", "xpm"},
396   {"image/x-xwindowdump", "xwd"},
397   {"model/mesh", "msh"},
398   {"model/mesh", "mesh"},
399   {"model/mesh", "silo"},
400   {"multipart/x-zip", "zip"},
401   {"multipart/x-gzip", "gzip"},
402   {"text/css", "css"},
403   {"text/html", "html"},
404   {"text/html", "htm"},
405   {"text/plain", "txt"},
406   {"text/plain", "g"},
407   {"text/plain", "h"},
408   {"text/plain", "c"},
409   {"text/plain", "cc"},
410   {"text/plain", "hh"},
411   {"text/plain", "m"},
412   {"text/plain", "f90"},
413   {"text/richtext", "rtx"},
414   {"text/tab-separated-values", "tsv"},
415   {"text/x-setext", "etx"},
416   {"text/x-sgml", "sgml"},
417   {"text/x-sgml", "sgm"},
418   {"text/xml", "xml"},
419   {"text/xml", "dtd"},
420   {"video/mpeg", "mpeg"},
421   {"video/mpeg", "mpg"},
422   {"video/mpeg", "mpe"},
423   {"video/quicktime", "qt"},
424   {"video/quicktime", "mov"},
425   {"video/x-msvideo", "avi"},
426   {"video/x-sgi-movie", "movie"},
427   {"x-conference/x-cooltalk", "ice"},
428   /*{"application/x-httpd-cgi","cgi"}, */
429   {"x-world/x-vrml", "wrl"},
430 
431   /* More from w3schools.com */
432   {"application/envoy", "evy"},
433   {"application/fractals", "fif"},
434   {"application/futuresplash", "spl"},
435   {"application/hta", "hta"},
436   {"application/internet-property-stream", "acx"},
437   {"application/msword", "dot"},
438   {"application/olescript", "axs"},
439   {"application/pics-rules", "prf"},
440   {"application/pkcs10", "p10"},
441   {"application/pkix-crl", "crl"},
442   {"application/set-payment-initiation", "setpay"},
443   {"application/set-registration-initiation", "setreg"},
444   {"application/vnd.ms-excel", "xls"},
445   {"application/vnd.ms-excel", "xla"},
446   {"application/vnd.ms-excel", "xlc"},
447   {"application/vnd.ms-excel", "xlm"},
448   {"application/vnd.ms-excel", "xlt"},
449   {"application/vnd.ms-excel", "xlw"},
450   {"application/vnd.ms-pkicertstore", "sst"},
451   {"application/vnd.ms-pkiseccat", "cat"},
452   {"application/vnd.ms-powerpoint", "ppt"},
453   {"application/vnd.ms-powerpoint", "pot"},
454   {"application/vnd.ms-powerpoint", "pps"},
455   {"application/vnd.ms-project", "mpp"},
456   {"application/vnd.ms-works", "wcm"},
457   {"application/vnd.ms-works", "wdb"},
458   {"application/vnd.ms-works", "wks"},
459   {"application/vnd.ms-works", "wps"},
460   {"application/x-compress", "z"},
461   {"application/x-compressed", "tgz"},
462   {"application/x-internet-signup", "ins"},
463   {"application/x-internet-signup", "isp"},
464   {"application/x-iphone", "iii"},
465   {"application/x-javascript", "js"},
466   {"application/x-msaccess", "mdb"},
467   {"application/x-mscardfile", "crd"},
468   {"application/x-msclip", "clp"},
469   {"application/x-msmediaview", "m13"},
470   {"application/x-msmediaview", "m14"},
471   {"application/x-msmediaview", "mvb"},
472   {"application/x-msmetafile", "wmf"},
473   {"application/x-msmoney", "mny"},
474   {"application/x-mspublisher", "pub"},
475   {"application/x-msschedule", "scd"},
476   {"application/x-msterminal", "trm"},
477   {"application/x-perfmon", "pma"},
478   {"application/x-perfmon", "pmc"},
479   {"application/x-perfmon", "pml"},
480   {"application/x-perfmon", "pmr"},
481   {"application/x-perfmon", "pmw"},
482   {"application/x-pkcs12", "p12"},
483   {"application/x-pkcs12", "pfx"},
484   {"application/x-pkcs7-certificates", "p7b"},
485   {"application/x-pkcs7-certificates", "spc"},
486   {"application/x-pkcs7-certreqresp", "p7r"},
487   {"application/x-pkcs7-mime", "p7c"},
488   {"application/x-pkcs7-mime", "p7m"},
489   {"application/x-pkcs7-signature", "p7s"},
490   {"application/x-troff-me", "me"},
491   {"application/x-x509-ca-cert", "cer"},
492   {"application/x-x509-ca-cert", "crt"},
493   {"application/x-x509-ca-cert", "der"},
494   {"application/ynd.ms-pkipko", "pko"},
495   {"audio/mid", "mid"},
496   {"audio/mid", "rmi"},
497   {"audio/mpeg", "mp3"},
498   {"audio/x-mpegurl", "m3u"},
499   {"image/bmp", "bmp"},
500   {"image/cis-cod", "cod"},
501   {"image/pipeg", "jfif"},
502   {"image/x-cmx", "cmx"},
503   {"image/x-icon", "ico"},
504   {"image/x-portable-bitmap", "pbm"},
505   {"message/rfc822", "mht"},
506   {"message/rfc822", "mhtml"},
507   {"message/rfc822", "nws"},
508   {"text/css", "css"},
509   {"text/h323", "323"},
510   {"text/html", "stm"},
511   {"text/iuls", "uls"},
512   {"text/plain", "bas"},
513   {"text/scriptlet", "sct"},
514   {"text/webviewhtml", "htt"},
515   {"text/x-component", "htc"},
516   {"text/x-vcard", "vcf"},
517   {"video/mpeg", "mp2"},
518   {"video/mpeg", "mpa"},
519   {"video/mpeg", "mpv2"},
520   {"video/x-la-asf", "lsf"},
521   {"video/x-la-asf", "lsx"},
522   {"video/x-ms-asf", "asf"},
523   {"video/x-ms-asf", "asr"},
524   {"video/x-ms-asf", "asx"},
525   {"video/x-ms-wmv", "wmv"},
526   {"x-world/x-vrml", "flr"},
527   {"x-world/x-vrml", "vrml"},
528   {"x-world/x-vrml", "wrz"},
529   {"x-world/x-vrml", "xaf"},
530   {"x-world/x-vrml", "xof"},
531 
532   /* Various */
533   {"application/ogg", "ogg"},
534 
535   {"application/x-java-vm", "class"},
536   {"application/x-bittorrent","torrent"},
537 
538   {"", ""}
539 };
540 
541 // Reserved (RFC2396)
542 #define CIS(c,ch) ( ((unsigned char)(c)) == (ch) )
543 #define CHAR_RESERVED(c)  ( CIS(c,';') \
544                          || CIS(c,'/') \
545                          || CIS(c,'?') \
546                          || CIS(c,':') \
547                          || CIS(c,'@') \
548                          || CIS(c,'&') \
549                          || CIS(c,'=') \
550                          || CIS(c,'+') \
551                          || CIS(c,'$') \
552                          || CIS(c,',') )
553 //#define CHAR_RESERVED(c)  ( strchr(";/?:@&=+$,",(unsigned char)(c)) != 0 )
554 // Delimiters (RFC2396)
555 #define CHAR_DELIM(c)     ( CIS(c,'<') \
556                          || CIS(c,'>') \
557                          || CIS(c,'#') \
558                          || CIS(c,'%') \
559                          || CIS(c,'\"') )
560 //#define CHAR_DELIM(c)     ( strchr("<>#%\"",(unsigned char)(c)) != 0 )
561 // Unwise (RFC2396)
562 #define CHAR_UNWISE(c)    ( CIS(c,'{') \
563                          || CIS(c,'}') \
564                          || CIS(c,'|') \
565                          || CIS(c,'\\') \
566                          || CIS(c,'^') \
567                          || CIS(c,'[') \
568                          || CIS(c,']') \
569                          || CIS(c,'`') )
570 //#define CHAR_UNWISE(c)    ( strchr("{}|\\^[]`",(unsigned char)(c)) != 0 )
571 // Special (escape chars) (RFC2396 + >127 )
572 #define CHAR_LOW(c)       ( ((unsigned char)(c) <= 31) )
573 #define CHAR_HIG(c)       ( ((unsigned char)(c) >= 127) )
574 #define CHAR_SPECIAL(c)   ( CHAR_LOW(c) || CHAR_HIG(c) )
575 // We try to avoid them and encode them instead
576 #define CHAR_XXAVOID(c)   ( CIS(c,' ') \
577                          || CIS(c,'*') \
578                          || CIS(c,'\'') \
579                          || CIS(c,'\"') \
580                          || CIS(c,'&') \
581                          || CIS(c,'!') )
582 //#define CHAR_XXAVOID(c)   ( strchr(" *'\"!",(unsigned char)(c)) != 0 )
583 #define CHAR_MARK(c)      ( CIS(c,'-') \
584                          || CIS(c,'_') \
585                          || CIS(c,'.') \
586                          || CIS(c,'!') \
587                          || CIS(c,'~') \
588                          || CIS(c,'*') \
589                          || CIS(c,'\'') \
590                          || CIS(c,'(') \
591                          || CIS(c,')') )
592 //#define CHAR_MARK(c)      ( strchr("-_.!~*'()",(unsigned char)(c)) != 0 )
593 
594 // conversion éventuelle / vers antislash
595 #ifdef _WIN32
antislash(char * catbuff,const char * s)596 char *antislash(char *catbuff, const char *s) {
597   char *a;
598 
599   strcpybuff(catbuff, s);
600   while(a = strchr(catbuff, '/'))
601     *a = '\\';
602   return catbuff;
603 }
604 #endif
605 
606 // Initialize a htsblk structure
hts_init_htsblk(htsblk * r)607 void hts_init_htsblk(htsblk * r) {
608   memset(r, 0, sizeof(htsblk)); // effacer
609   r->soc = INVALID_SOCKET;
610   r->msg[0] = '\0';
611   r->statuscode = STATUSCODE_INVALID;
612   r->totalsize = -1;
613 }
614 
615 // ouvre une liaison http, envoie une requète GET et réceptionne le header
616 // retour: socket
http_fopen(httrackp * opt,const char * adr,const char * fil,htsblk * retour)617 T_SOC http_fopen(httrackp * opt, const char *adr, const char *fil, htsblk * retour) {
618   //                / GET, traiter en-tête
619   return http_xfopen(opt, 0, 1, 1, NULL, adr, fil, retour);
620 }
621 
622 // ouverture d'une liaison http, envoi d'une requète
623 // mode: 0 GET  1 HEAD  [2 POST]
624 // treat: traiter header?
625 // waitconnect: attendre le connect()
626 // note: dans retour, on met les params du proxy
http_xfopen(httrackp * opt,int mode,int treat,int waitconnect,const char * xsend,const char * adr,const char * fil,htsblk * retour)627 T_SOC http_xfopen(httrackp * opt, int mode, int treat, int waitconnect,
628                   const char *xsend, const char *adr, const char *fil, htsblk * retour) {
629   //htsblk retour;
630   //int bufl=TAILLE_BUFFER;    // 8Ko de buffer
631   T_SOC soc = INVALID_SOCKET;
632   char BIGSTK tempo_fil[HTS_URLMAXSIZE * 2];
633 
634   //char *p,*q;
635 
636   // retour prédéfini: erreur
637   if (retour) {
638     retour->adr = NULL;
639     retour->size = 0;
640     retour->msg[0] = '\0';
641     retour->statuscode = STATUSCODE_NON_FATAL;  // a priori erreur non fatale
642   }
643 #if HDEBUG
644   printf("adr=%s\nfichier=%s\n", adr, fil);
645 #endif
646 
647   // ouvrir liaison
648 #if HDEBUG
649   printf("Création d'une socket sur %s\n", adr);
650 #endif
651 
652 #if CNXDEBUG
653   printf("..newhttp\n");
654 #endif
655 
656   /* connexion */
657   if (retour) {
658     if ((!(retour->req.proxy.active))
659         || ((strcmp(adr, "file://") == 0)
660             || (strncmp(adr, "https://", 8) == 0)
661         )
662       ) {                       /* pas de proxy, ou non utilisable ici */
663       soc = newhttp(opt, adr, retour, -1, waitconnect);
664     } else {
665       soc = newhttp(opt, retour->req.proxy.name, retour, retour->req.proxy.port, waitconnect);  // ouvrir sur le proxy à la place
666     }
667   } else {
668     soc = newhttp(opt, adr, NULL, -1, waitconnect);
669   }
670 
671   // copier index socket retour
672   if (retour)
673     retour->soc = soc;
674 
675   /* Check for errors */
676   if (soc == INVALID_SOCKET) {
677     if (retour) {
678       if (retour->msg) {
679         if (!strnotempty(retour->msg)) {
680 #ifdef _WIN32
681           int last_errno = WSAGetLastError();
682 
683           sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
684 #else
685           int last_errno = errno;
686 
687           sprintf(retour->msg, "Connect error: %s", strerror(last_errno));
688 #endif
689         }
690       }
691     }
692   }
693   // --------------------
694   // court-circuit (court circuite aussi le proxy..)
695   // LOCAL_SOCKET_ID est une pseudo-socket locale
696   if (soc == LOCAL_SOCKET_ID) {
697     retour->is_file = 1;        // fichier local
698     if (mode == 0) {            // GET
699 
700       // Test en cas de file:///C|...
701       if (!fexist
702           (fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
703           unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil))))
704         if (fexist
705             (fconv
706              (OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
707              unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil + 1)))) {
708           strcpybuff(tempo_fil, fil + 1);
709           fil = tempo_fil;
710         }
711       // Ouvrir
712       retour->totalsize = fsize(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
713         unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil)));       // taille du fichier
714       retour->msg[0] = '\0';
715       soc = INVALID_SOCKET;
716       if (retour->totalsize < 0)
717         strcpybuff(retour->msg, "Unable to open local file");
718       else {
719         // Note: On passe par un FILE* (plus propre)
720         //soc=open(fil,O_RDONLY,0);    // en lecture seule!
721         retour->fp = FOPEN(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
722           unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil)), "rb");      // ouvrir
723         if (retour->fp == NULL)
724           soc = INVALID_SOCKET;
725         else
726           soc = LOCAL_SOCKET_ID;
727       }
728       retour->soc = soc;
729       if (soc != INVALID_SOCKET) {
730         retour->statuscode = HTTP_OK;   // OK
731         strcpybuff(retour->msg, "OK");
732         guess_httptype(opt, retour->contenttype, fil);
733       } else if (strnotempty(retour->msg) == 0)
734         strcpybuff(retour->msg, "Unable to open local file");
735       return soc;               // renvoyer
736     } else {                    // HEAD ou POST : interdit sur un local!!!! (c'est idiot!)
737       strcpybuff(retour->msg, "Unexpected Head/Post local request");
738       soc = INVALID_SOCKET;     // erreur
739       retour->soc = soc;
740       return soc;
741     }
742   }
743   // --------------------
744 
745   if (soc != INVALID_SOCKET) {
746     char rcvd[1100];
747 
748     rcvd[0] = '\0';
749 #if HDEBUG
750     printf("Ok, connexion réussie, id=%d\n", soc);
751 #endif
752 
753     // connecté?
754     if (waitconnect) {
755       http_sendhead(opt, NULL, mode, xsend, adr, fil, NULL, NULL, retour);
756     }
757 
758     if (soc != INVALID_SOCKET) {
759 
760 #if HDEBUG
761       printf("Attente de la réponse:\n");
762 #endif
763 
764       // si GET (réception d'un fichier), réceptionner en-tête d'abord,
765       // et ensuite le corps
766       // si POST on ne réceptionne rien du tout, c'est après que l'on fera
767       // une réception standard pour récupérer l'en tête
768       if ((treat) && (waitconnect)) {   // traiter (attendre!) en-tête
769         // Réception de la status line et de l'en-tête (norme RFC1945)
770 
771         // status-line à récupérer
772         finput(soc, rcvd, 1024);
773         if (strnotempty(rcvd) == 0)
774           finput(soc, rcvd, 1024);      // "certains serveurs buggés envoient un \n au début" (RFC)
775 
776         // traiter status-line
777         treatfirstline(retour, rcvd);
778 
779 #if HDEBUG
780         printf("Status-Code=%d\n", retour->statuscode);
781 #endif
782 
783         // en-tête
784 
785         // header // ** !attention! HTTP/0.9 non supporté
786         do {
787           finput(soc, rcvd, 1024);
788 #if HDEBUG
789           printf(">%s\n", rcvd);
790 #endif
791           if (strnotempty(rcvd))
792             treathead(NULL, NULL, NULL, retour, rcvd);  // traiter
793 
794         } while(strnotempty(rcvd));
795 
796         //rcvsize=-1;    // forCER CHARGEMENT INCONNU
797 
798         //if (retour)
799         //  retour->totalsize=rcvsize;
800 
801       } else {                  // si GET, on recevra l'en tête APRES
802         //rcvsize=-1;    // on ne connait pas la taille de l'en-tête
803         if (retour)
804           retour->totalsize = -1;
805       }
806 
807     }
808 
809   }
810 
811   return soc;
812 }
813 
814 /* Buffer printing */
815 typedef struct buff_struct {
816   /** Buffer **/
817   char *buffer;
818   /** Buffer capacity in bytes **/
819   size_t capacity;
820   /** Buffer write position ; MUST point to a valid \0. **/
821   size_t pos;
822 } buff_struct;
823 
824 static void print_buffer(buff_struct*const str, const char *format, ...)
825   HTS_PRINTF_FUN(2, 3);
826 
827 /* Prints on a static buffer. asserts in case of overflow. */
print_buffer(buff_struct * const str,const char * format,...)828 static void print_buffer(buff_struct*const str, const char *format, ...) {
829   size_t result;
830   va_list args;
831   size_t remaining;
832   char *position;
833 
834   /* Security check. */
835   assertf(str != NULL);
836   assertf(str->pos < str->capacity);
837 
838   /* Print */
839   position = &str->buffer[str->pos];
840   remaining = str->capacity - str->pos;
841   va_start(args, format);
842   result = (size_t) vsnprintf(position, remaining, format, args);
843   va_end(args);
844   assertf(result < remaining);
845 
846   /* Increment. */
847   str->pos += strlen(position);
848   assertf(str->pos < str->capacity);
849 }
850 
851 // envoi d'une requète
http_sendhead(httrackp * opt,t_cookie * cookie,int mode,const char * xsend,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * retour)852 int http_sendhead(httrackp * opt, t_cookie * cookie, int mode,
853                   const char *xsend, const char *adr, const char *fil,
854                   const char *referer_adr, const char *referer_fil,
855                   htsblk * retour) {
856   char BIGSTK buffer_head_request[8192];
857   buff_struct bstr = { buffer_head_request, sizeof(buffer_head_request), 0 };
858 
859   //int use_11=0;     // HTTP 1.1 utilisé
860   int direct_url = 0;           // ne pas analyser l'url (exemple: ftp://)
861   const char *search_tag = NULL;
862 
863   // Initialize buffer
864   buffer_head_request[0] = '\0';
865 
866   // header Date
867   //strcatbuff(buff,"Date: ");
868   //time_gmt_rfc822(buff);    // obtenir l'heure au format rfc822
869   //sendc("\n");
870   //strcatbuff(buff,buff);
871 
872   // possibilité non documentée: >post: et >postfile:
873   // si présence d'un tag >post: alors executer un POST
874   // exemple: http://www.someweb.com/test.cgi?foo>post:posteddata=10&foo=5
875   // si présence d'un tag >postfile: alors envoyer en tête brut contenu dans le fichier en question
876   // exemple: http://www.someweb.com/test.cgi?foo>postfile:post0.txt
877   search_tag = strstr(fil, POSTTOK ":");
878   if (!search_tag) {
879     search_tag = strstr(fil, POSTTOK "file:");
880     if (search_tag) {           // postfile
881       if (mode == 0) {          // GET!
882         FILE *fp =
883           FOPEN(unescape_http(OPT_GET_BUFF(opt),
884                 OPT_GET_BUFF_SIZE(opt), search_tag + strlen(POSTTOK) + 5), "rb");
885         if (fp) {
886           char BIGSTK line[1100];
887           char BIGSTK protocol[256], url[HTS_URLMAXSIZE * 2], method[256];
888 
889           linput(fp, line, 1000);
890           if (sscanf(line, "%s %s %s", method, url, protocol) == 3) {
891             size_t ret;
892             // selon que l'on a ou pas un proxy
893             if (retour->req.proxy.active) {
894               print_buffer(&bstr,
895                       "%s http://%s%s %s\r\n", method, adr, url,
896                       protocol);
897             } else {
898               print_buffer(&bstr,
899                        "%s %s %s\r\n", method, url, protocol);
900             }
901             // lire le reste en brut
902             ret = fread(&bstr.buffer[bstr.pos],
903                         bstr.capacity - bstr.pos, 1, fp);
904             if ((int) ret < 0) {
905               return -1;
906             }
907             bstr.pos += strlen(&bstr.buffer[bstr.pos]);
908           }
909           fclose(fp);
910         }
911       }
912     }
913   }
914   // Fin postfile
915 
916   if (bstr.pos == 0) { // PAS POSTFILE
917     // Type de requète?
918     if ((search_tag) && (mode == 0)) {
919       print_buffer(&bstr, "POST ");
920     } else if (mode == 0) {     // GET
921       print_buffer(&bstr, "GET ");
922     } else {                    // if (mode==1) {
923       if (!retour->req.http11)  // forcer HTTP/1.0
924         print_buffer(&bstr, "GET ");       // certains serveurs (cgi) buggent avec HEAD
925       else
926         print_buffer(&bstr, "HEAD ");
927     }
928 
929     // si on gère un proxy, il faut une Absolute URI: on ajoute avant http://www.adr.dom
930     if (retour->req.proxy.active && (strncmp(adr, "https://", 8) != 0)) {
931       if (!link_has_authority(adr)) {   // default http
932 #if HDEBUG
933         printf("Proxy Use: for %s%s proxy %d port %d\n", adr, fil,
934                retour->req.proxy.name, retour->req.proxy.port);
935 #endif
936         print_buffer(&bstr, "http://%s", jump_identification_const(adr));
937       } else {                  // ftp:// en proxy http
938 #if HDEBUG
939         printf("Proxy Use for ftp: for %s%s proxy %d port %d\n", adr, fil,
940                retour->req.proxy.name, retour->req.proxy.port);
941 #endif
942         direct_url = 1;         // ne pas analyser user/pass
943         print_buffer(&bstr, "%s", adr);
944       }
945     }
946     // NOM DU FICHIER
947     // on slash doit être présent en début, sinon attention aux bad request! (400)
948     if (*fil != '/')
949       print_buffer(&bstr, "/");
950 
951     {
952       char BIGSTK tempo[HTS_URLMAXSIZE * 2];
953 
954       tempo[0] = '\0';
955       if (search_tag)
956         strncatbuff(tempo, fil, (int) (search_tag - fil));
957       else
958         strcpybuff(tempo, fil);
959       inplace_escape_check_url(tempo, sizeof(tempo));
960       print_buffer(&bstr, "%s", tempo);  // avec échappement
961     }
962 
963     // protocole
964     if (!retour->req.http11) {  // forcer HTTP/1.0
965       //use_11=0;
966       print_buffer(&bstr, " HTTP/1.0\x0d\x0a");
967     } else {                    // Requète 1.1
968       //use_11=1;
969       print_buffer(&bstr, " HTTP/1.1\x0d\x0a");
970     }
971 
972     /* supplemental data */
973     if (xsend)
974       print_buffer(&bstr, "%s", xsend);  // éventuelles autres lignes
975 
976     // tester proxy authentication
977     if (retour->req.proxy.active) {
978       if (link_has_authorization(retour->req.proxy.name)) {     // et hop, authentification proxy!
979         const char *a = jump_identification_const(retour->req.proxy.name);
980         const char *astart = jump_protocol_const(retour->req.proxy.name);
981         char autorisation[1100];
982         char user_pass[256];
983 
984         autorisation[0] = user_pass[0] = '\0';
985         //
986         strncatbuff(user_pass, astart, (int) (a - astart) - 1);
987         strcpybuff(user_pass, unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
988         code64((unsigned char *) user_pass, (int) strlen(user_pass),
989                (unsigned char *) autorisation, 0);
990         print_buffer(&bstr, "Proxy-Authorization: Basic %s"H_CRLF,
991                      autorisation);
992 #if HDEBUG
993         printf("Proxy-Authenticate, %s (code: %s)\n", user_pass, autorisation);
994 #endif
995       }
996     }
997     // Referer?
998     if (referer_adr != NULL && referer_fil != NULL && strnotempty(referer_adr)
999         && strnotempty(referer_fil)
1000       ) {                       // non vide
1001       if ((strcmp(referer_adr, "file://") != 0)
1002           && (                  /* no https referer to http urls */
1003                (strncmp(referer_adr, "https://", 8) != 0)       /* referer is not https */
1004                ||(strncmp(adr, "https://", 8) == 0)     /* or referer AND addresses are https */
1005           )
1006         ) {                     // PAS file://
1007         print_buffer(&bstr, "Referer: http://%s%s"H_CRLF,
1008                      jump_identification_const(referer_adr), referer_fil);
1009       }
1010     }
1011     // HTTP field: referer
1012     else if (strnotempty(retour->req.referer)) {
1013       print_buffer(&bstr, "Referer: %s"H_CRLF, retour->req.referer);
1014     }
1015     // POST?
1016     if (mode == 0) {            // GET!
1017       if (search_tag) {
1018         print_buffer(&bstr, "Content-length: %d" H_CRLF,
1019                 (int) (strlen
1020                        (unescape_http
1021                         (OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
1022                          search_tag + strlen(POSTTOK) + 1))));
1023       }
1024     }
1025     // gestion cookies?
1026     if (cookie) {
1027       char buffer[8192];
1028       char *b = cookie->data;
1029       int cook = 0;
1030       int max_cookies = 8;
1031 
1032       do {
1033         b = cookie_find(b, "", jump_identification_const(adr), fil);  // prochain cookie satisfaisant aux conditions
1034         if (b != NULL) {
1035           max_cookies--;
1036           if (!cook) {
1037             print_buffer(&bstr, "Cookie: $Version=1; ");
1038             cook = 1;
1039           } else
1040             print_buffer(&bstr, "; ");
1041           print_buffer(&bstr, "%s", cookie_get(buffer, b, 5));
1042           print_buffer(&bstr, "=%s", cookie_get(buffer, b, 6));
1043           print_buffer(&bstr, "; $Path=%s", cookie_get(buffer, b, 2));
1044           b = cookie_nextfield(b);
1045         }
1046       } while(b != NULL && max_cookies > 0);
1047       if (cook) {               // on a envoyé un (ou plusieurs) cookie?
1048         print_buffer(&bstr, H_CRLF);
1049 #if DEBUG_COOK
1050         printf("Header:\n%s\n", bstr.buffer);
1051 #endif
1052       }
1053     }
1054     // gérer le keep-alive (garder socket)
1055     if (retour->req.http11 && !retour->req.nokeepalive) {
1056       print_buffer(&bstr, "Connection: keep-alive" H_CRLF);
1057     } else {
1058       print_buffer(&bstr, "Connection: close" H_CRLF);
1059     }
1060 
1061     {
1062       const char *real_adr = jump_identification_const(adr);
1063 
1064       // Mandatory per RFC2616
1065       if (!direct_url) {        // pas ftp:// par exemple
1066         print_buffer(&bstr, "Host: %s"H_CRLF, real_adr);
1067       }
1068 
1069       // HTTP field: from
1070       if (strnotempty(retour->req.from)) {        // HTTP from
1071         print_buffer(&bstr, "From: %s" H_CRLF, retour->req.from);
1072       }
1073 
1074       // Présence d'un user-agent?
1075       if (retour->req.user_agent_send
1076           && strnotempty(retour->req.user_agent)) {
1077         print_buffer(&bstr, "User-Agent: %s" H_CRLF, retour->req.user_agent);
1078       }
1079 
1080       // Accept
1081       if (strnotempty(retour->req.accept)) {
1082         print_buffer(&bstr, "Accept: %s" H_CRLF, retour->req.accept);
1083       }
1084 
1085       // Accept-language
1086       if (strnotempty(retour->req.lang_iso)) {
1087         print_buffer(&bstr, "Accept-Language: %s"H_CRLF, retour->req.lang_iso);
1088       }
1089 
1090       // Compression accepted ?
1091       if (retour->req.http11) {
1092 #if HTS_USEZLIB
1093         if ((!retour->req.range_used)
1094             && (!retour->req.nocompression))
1095           print_buffer(&bstr, "Accept-Encoding: " "gzip" /* gzip if the preffered encoding */
1096                      ", " "identity;q=0.9" H_CRLF);
1097         else
1098           print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF);       /* no compression */
1099 #else
1100         print_buffer(&bstr, "Accept-Encoding: identity" H_CRLF); /* no compression */
1101 #endif
1102       }
1103 
1104       /* Authentification */
1105       {
1106         char autorisation[1100];
1107         const char *a;
1108 
1109         autorisation[0] = '\0';
1110         if (link_has_authorization(adr)) {      // ohh une authentification!
1111           const char *a = jump_identification_const(adr);
1112           const char *astart = jump_protocol_const(adr);
1113 
1114           if (!direct_url) {    // pas ftp:// par exemple
1115             char user_pass[256];
1116 
1117             user_pass[0] = '\0';
1118             strncatbuff(user_pass, astart, (int) (a - astart) - 1);
1119             strcpybuff(user_pass,
1120               unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), user_pass));
1121             code64((unsigned char *) user_pass, (int) strlen(user_pass),
1122                    (unsigned char *) autorisation, 0);
1123             if (strcmp(fil, "/robots.txt"))     /* pas robots.txt */
1124               bauth_add(cookie, astart, fil, autorisation);
1125           }
1126         } else if ((a = bauth_check(cookie, real_adr, fil)))
1127           strcpybuff(autorisation, a);
1128         /* On a une autorisation a donner?  */
1129         if (strnotempty(autorisation)) {
1130           print_buffer(&bstr, "Authorization: Basic %s"H_CRLF, autorisation);
1131         }
1132       }
1133 
1134     }
1135     //strcatbuff(buff,"Accept-Charset: iso-8859-1,*,utf-8\n");
1136 
1137     // Custom header(s)
1138     if (strnotempty(retour->req.headers)) {
1139       print_buffer(&bstr, "%s", retour->req.headers);
1140     }
1141 
1142     // CRLF de fin d'en tête
1143     print_buffer(&bstr, H_CRLF);
1144 
1145     // données complémentaires?
1146     if (search_tag)
1147       if (mode == 0)            // GET!
1148         print_buffer(&bstr, "%s",
1149                    unescape_http(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
1150                                  search_tag + strlen(POSTTOK) + 1));
1151   }
1152 #if HDEBUG
1153 #endif
1154   if (_DEBUG_HEAD) {
1155     if (ioinfo) {
1156       fprintf(ioinfo, "[%d] request for %s%s:\r\n", retour->debugid,
1157               jump_identification_const(adr), fil);
1158       fprintfio(ioinfo, bstr.buffer, "<<< ");
1159       fprintf(ioinfo, "\r\n");
1160       fflush(ioinfo);
1161     }
1162   }                             // Fin test pas postfile
1163   //
1164 
1165   // Callback
1166   {
1167     int test_head =
1168       RUN_CALLBACK6(opt, sendhead, bstr.buffer, adr, fil, referer_adr, referer_fil,
1169                     retour);
1170     if (test_head != 1) {
1171       deletesoc_r(retour);
1172       strcpybuff(retour->msg, "Header refused by external wrapper");
1173       retour->soc = INVALID_SOCKET;
1174     }
1175   }
1176 
1177   // Envoi
1178   HTS_STAT.last_request = mtime_local();
1179   if (sendc(retour, bstr.buffer) < 0) {        // ERREUR, socket rompue?...
1180     deletesoc_r(retour);        // fermer tout de même
1181     // et tenter de reconnecter
1182 
1183     strcpybuff(retour->msg, "Write error");
1184     retour->soc = INVALID_SOCKET;
1185   }
1186 
1187   // RX'98
1188   return 0;
1189 }
1190 
1191 // traiter 1ere ligne d'en tête
treatfirstline(htsblk * retour,const char * rcvd)1192 void treatfirstline(htsblk * retour, const char *rcvd) {
1193   const char *a = rcvd;
1194 
1195   // exemple:
1196   // HTTP/1.0 200 OK
1197   if (*a) {
1198     // note: certains serveurs buggés renvoient HTTP/1.0\n200 OK ou " HTTP/1.0 200 OK"
1199     while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1200       a++;                      // épurer espaces au début
1201     if (strfield(a, "HTTP/")) {
1202       // sauter HTTP/1.x
1203       while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
1204             && (*a != 9))
1205         a++;
1206       if (*a != '\0') {
1207         while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1208           a++;                  // épurer espaces
1209         if ((*a >= '0') && (*a <= '9')) {
1210           sscanf(a, "%d", &(retour->statuscode));
1211           // sauter 200
1212           while((*a != ' ') && (*a != '\0') && (*a != 10) && (*a != 13)
1213                 && (*a != 9))
1214             a++;
1215           while((*a == ' ') || (*a == 10) || (*a == 13) || (*a == 9))
1216             a++;                // épurer espaces
1217           if ((strlen(a) > 1) && (strlen(a) < 64))      // message retour
1218             strcpybuff(retour->msg, a);
1219           else
1220             infostatuscode(retour->msg, retour->statuscode);
1221           // type MIME par défaut2
1222           strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1223         } else {                // pas de code!
1224           retour->statuscode = STATUSCODE_INVALID;
1225           strcpybuff(retour->msg, "Unknown response structure");
1226         }
1227       } else {                  // euhh??
1228         retour->statuscode = STATUSCODE_INVALID;
1229         strcpybuff(retour->msg, "Unknown response structure");
1230       }
1231     } else {
1232       if (*a == '<') {
1233         /* This is dirty .. */
1234         retour->statuscode = HTTP_OK;
1235         retour->keep_alive = 0;
1236         strcpybuff(retour->msg, "Unknown, assuming junky server");
1237         strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1238       } else if (strnotempty(a)) {
1239         retour->statuscode = STATUSCODE_INVALID;
1240         strcpybuff(retour->msg, "Unknown (not HTTP/xx) response structure");
1241       } else {
1242         /* This is dirty .. */
1243         retour->statuscode = HTTP_OK;
1244         retour->keep_alive = 0;
1245         strcpybuff(retour->msg, "Unknown, assuming junky server");
1246         strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1247       }
1248     }
1249   } else {                      // vide!
1250     /*
1251        retour->statuscode=STATUSCODE_INVALID;
1252        strcpybuff(retour->msg,"Empty reponse or internal error");
1253      */
1254     /* This is dirty .. */
1255     retour->statuscode = HTTP_OK;
1256     strcpybuff(retour->msg, "Unknown, assuming junky server");
1257     strcpybuff(retour->contenttype, HTS_HYPERTEXT_DEFAULT_MIME);
1258   }
1259 }
1260 
1261 // traiter ligne par ligne l'en tête
1262 // gestion des cookies
treathead(t_cookie * cookie,const char * adr,const char * fil,htsblk * retour,char * rcvd)1263 void treathead(t_cookie * cookie, const char *adr, const char *fil, htsblk * retour,
1264                char *rcvd) {
1265   int p;
1266 
1267   if ((p = strfield(rcvd, "Content-length:")) != 0) {
1268 #if HDEBUG
1269     printf("ok, Content-length: détecté\n");
1270 #endif
1271     if (sscanf(rcvd + p, LLintP, &(retour->totalsize)) == 1) {
1272       if (retour->totalsize == 0) {
1273         retour->empty = 1;
1274       }
1275     }
1276   } else if ((p = strfield(rcvd, "Content-Disposition:")) != 0) {
1277     while(is_realspace(*(rcvd + p)))
1278       p++;                      // sauter espaces
1279     if ((int) strlen(rcvd + p) < 250) { // pas trop long?
1280       char tmp[256];
1281       char *a = NULL, *b = NULL;
1282 
1283       strcpybuff(tmp, rcvd + p);
1284       a = strstr(tmp, "filename=");
1285       if (a) {
1286         a += strlen("filename=");
1287         while(is_space(*a))
1288           a++;
1289         //a=strchr(a,'"');
1290         if (a) {
1291           char *c = NULL;
1292 
1293           //a++;      /* jump " */
1294           while((c = strchr(a, '/')))   /* skip all / (see RFC2616) */
1295             a = c + 1;
1296           //b=strchr(a+1,'"');
1297           b = a + strlen(a) - 1;
1298           while(is_space(*b))
1299             b--;
1300           b++;
1301           if (b) {
1302             *b = '\0';
1303             if ((int) strlen(a) < 200) {        // pas trop long?
1304               strcpybuff(retour->cdispo, a);
1305             }
1306           }
1307         }
1308       }
1309     }
1310   } else if ((p = strfield(rcvd, "Last-Modified:")) != 0) {
1311     while(is_realspace(*(rcvd + p)))
1312       p++;                      // sauter espaces
1313     if ((int) strlen(rcvd + p) < 64) {  // pas trop long?
1314       //struct tm* tm_time=convert_time_rfc822(rcvd+p);
1315       strcpybuff(retour->lastmodified, rcvd + p);
1316     }
1317   } else if ((p = strfield(rcvd, "Date:")) != 0) {
1318     if (strnotempty(retour->lastmodified) == 0) {       /* pas encore de last-modified */
1319       while(is_realspace(*(rcvd + p)))
1320         p++;                    // sauter espaces
1321       if ((int) strlen(rcvd + p) < 64) {        // pas trop long?
1322         //struct tm* tm_time=convert_time_rfc822(rcvd+p);
1323         strcpybuff(retour->lastmodified, rcvd + p);
1324       }
1325     }
1326   } else if ((p = strfield(rcvd, "Etag:")) != 0) {      /* Etag */
1327     if (retour) {
1328       while(is_realspace(*(rcvd + p)))
1329         p++;                    // sauter espaces
1330       if ((int) strlen(rcvd + p) < 64)  // pas trop long?
1331         strcpybuff(retour->etag, rcvd + p);
1332       else                      // erreur.. ignorer
1333         retour->etag[0] = '\0';
1334     }
1335   }
1336   // else if ((p=strfield(rcvd,"Transfer-Encoding: chunked"))!=0) {  // chunk!
1337   else if ((p = strfield(rcvd, "Transfer-Encoding:")) != 0) {   // chunk!
1338     while(is_realspace(*(rcvd + p)))
1339       p++;                      // sauter espaces
1340     if (strfield(rcvd + p, "chunked")) {
1341       retour->is_chunk = 1;     // chunked
1342       //retour->http11=2;     // chunked
1343 #if HDEBUG
1344       printf("ok, Transfer-Encoding: détecté\n");
1345 #endif
1346     }
1347   } else if ((p = strfield(rcvd, "Content-type:")) != 0) {
1348     if (retour) {
1349       char tempo[1100];
1350 
1351       // éviter les text/html; charset=foo
1352       {
1353         char *a = strchr(rcvd + p, ';');
1354 
1355         if (a) {                // extended information
1356           *a = '\0';
1357           a++;
1358           while(is_space(*a))
1359             a++;
1360           if (strfield(a, "charset")) {
1361             a += 7;
1362             while(is_space(*a))
1363               a++;
1364             if (*a == '=') {
1365               a++;
1366               while(is_space(*a))
1367                 a++;
1368               if (*a == '\"')
1369                 a++;
1370               while(is_space(*a))
1371                 a++;
1372               if (*a) {
1373                 char *chs = a;
1374 
1375                 while(*a && !is_space(*a) && *a != '\"' && *a != ';')
1376                   a++;
1377                 *a = '\0';
1378                 if (*chs) {
1379                   if (strlen(chs) < sizeof(retour->charset) - 2) {
1380                     strcpybuff(retour->charset, chs);
1381                   }
1382                 }
1383               }
1384             }
1385           }
1386         }
1387       }
1388       sscanf(rcvd + p, "%s", tempo);
1389       if (strlen(tempo) < sizeof(retour->contenttype) - 2)      // pas trop long!!
1390         strcpybuff(retour->contenttype, tempo);
1391       else
1392         strcpybuff(retour->contenttype, "application/octet-stream-unknown");    // erreur
1393     }
1394   } else if ((p = strfield(rcvd, "Content-Range:")) != 0) {
1395     // Content-Range: bytes 0-70870/70871
1396     const char *a;
1397 
1398     for(a = rcvd + p; is_space(*a); a++) ;
1399     if (strncasecmp(a, "bytes ", 6) == 0) {
1400       for(a += 6; is_space(*a); a++) ;
1401       if (sscanf
1402           (a, LLintP "-" LLintP "/" LLintP, &retour->crange_start,
1403            &retour->crange_end, &retour->crange) != 3) {
1404         retour->crange_start = 0;
1405         retour->crange_end = 0;
1406         retour->crange = 0;
1407         a = strchr(rcvd + p, '/');
1408         if (a != NULL) {
1409           a++;
1410           if (sscanf(a, LLintP, &retour->crange) == 1) {
1411             retour->crange_start = 0;
1412             retour->crange_end = retour->crange - 1;
1413           } else {
1414             retour->crange = 0;
1415           }
1416         }
1417       }
1418     }
1419   } else if ((p = strfield(rcvd, "Connection:")) != 0) {
1420     char *a = rcvd + p;
1421 
1422     while(is_space(*a))
1423       a++;
1424     if (*a) {
1425       if (strfield(a, "Keep-Alive")) {
1426         if (!retour->keep_alive) {
1427           retour->keep_alive_max = 10;
1428           retour->keep_alive_t = 15;
1429         }
1430         retour->keep_alive = 1;
1431       } else {
1432         retour->keep_alive = 0;
1433       }
1434     }
1435   } else if ((p = strfield(rcvd, "Keep-Alive:")) != 0) {
1436     char *a = rcvd + p;
1437 
1438     while(is_space(*a))
1439       a++;
1440     if (*a) {
1441       char *p;
1442 
1443       retour->keep_alive = 1;
1444       retour->keep_alive_max = 10;
1445       retour->keep_alive_t = 15;
1446       if ((p = strstr(a, "timeout="))) {
1447         p += strlen("timeout=");
1448         sscanf(p, "%d", &retour->keep_alive_t);
1449       }
1450       if ((p = strstr(a, "max="))) {
1451         p += strlen("max=");
1452         sscanf(p, "%d", &retour->keep_alive_max);
1453       }
1454       if (retour->keep_alive_max <= 1 || retour->keep_alive_t < 1) {
1455         retour->keep_alive = 0;
1456       }
1457     }
1458   } else if ((p = strfield(rcvd, "TE:")) != 0) {
1459     char *a = rcvd + p;
1460 
1461     while(is_space(*a))
1462       a++;
1463     if (*a) {
1464       if (strfield(a, "trailers")) {
1465         retour->keep_alive_trailers = 1;
1466       }
1467     }
1468   } else if ((p = strfield(rcvd, "Content-Encoding:")) != 0) {
1469     if (retour) {
1470       char tempo[1100];
1471       char *a = rcvd + p;
1472 
1473       while(is_space(*a))
1474         a++;
1475       {
1476         char *a = strchr(rcvd + p, ';');
1477 
1478         if (a)
1479           *a = '\0';
1480       }
1481       sscanf(a, "%s", tempo);
1482       if (strlen(tempo) < 64)   // pas trop long!!
1483         strcpybuff(retour->contentencoding, tempo);
1484       else
1485         retour->contentencoding[0] = '\0';      // erreur
1486 #if HTS_USEZLIB
1487       /* Check known encodings */
1488       if (retour->contentencoding[0]) {
1489         if ((strfield2(retour->contentencoding, "gzip"))
1490             || (strfield2(retour->contentencoding, "x-gzip"))
1491             /*
1492                || (strfield2(retour->contentencoding, "compress"))
1493                || (strfield2(retour->contentencoding, "x-compress"))
1494              */
1495             || (strfield2(retour->contentencoding, "deflate"))
1496             || (strfield2(retour->contentencoding, "x-deflate"))
1497           ) {
1498           retour->compressed = 1;
1499         }
1500       }
1501 #endif
1502     }
1503   } else if ((p = strfield(rcvd, "Location:")) != 0) {
1504     if (retour) {
1505       if (retour->location) {
1506         while(is_realspace(*(rcvd + p)))
1507           p++;                  // sauter espaces
1508         if ((int) strlen(rcvd + p) < HTS_URLMAXSIZE)    // pas trop long?
1509           strcpybuff(retour->location, rcvd + p);
1510         else                    // erreur.. ignorer
1511           retour->location[0] = '\0';
1512       }
1513     }
1514   } else if (((p = strfield(rcvd, "Set-Cookie:")) != 0) && (cookie)) {  // ohh un cookie
1515     char *a = rcvd + p;         // pointeur
1516     char domain[256];           // domaine cookie (.netscape.com)
1517     char path[256];             // chemin (/)
1518     char cook_name[256];        // nom cookie (MYCOOK)
1519     char BIGSTK cook_value[8192];       // valeur (ID=toto,S=1234)
1520 
1521 #if DEBUG_COOK
1522     printf("set-cookie detected\n");
1523 #endif
1524     while(*a) {
1525       char *token_st, *token_end;
1526       char *value_st, *value_end;
1527       char name[256];
1528       char BIGSTK value[8192];
1529       int next = 0;
1530 
1531       name[0] = value[0] = '\0';
1532       //
1533 
1534       // initialiser cookie lu actuellement
1535       if (adr)
1536         strcpybuff(domain, jump_identification_const(adr));   // domaine
1537       strcpybuff(path, "/");    // chemin (/)
1538       strcpybuff(cook_name, "");        // nom cookie (MYCOOK)
1539       strcpybuff(cook_value, "");       // valeur (ID=toto,S=1234)
1540       // boucler jusqu'au prochain cookie ou la fin
1541       do {
1542         char *start_loop = a;
1543 
1544         while(is_space(*a))
1545           a++;                  // sauter espaces
1546         token_st = a;           // départ token
1547         while((!is_space(*a)) && (*a) && (*a != ';') && (*a != '='))
1548           a++;                  // arrêter si espace, point virgule
1549         token_end = a;
1550         while(is_space(*a))
1551           a++;                  // sauter espaces
1552         if (*a == '=') {        // name=value
1553           a++;
1554           while(is_space(*a))
1555             a++;                // sauter espaces
1556           value_st = a;
1557           while((*a != ';') && (*a))
1558             a++;                // prochain ;
1559           //while( ((*a!='"') || (*(a-1)=='\\')) && (*a)) a++;    // prochain " (et pas \")
1560           value_end = a;
1561           //if (*a==';') {  // finit par un ;
1562           // vérifier débordements
1563           if ((((int) (token_end - token_st)) < 200)
1564               && (((int) (value_end - value_st)) < 8000)
1565               && (((int) (token_end - token_st)) > 0)
1566               && (((int) (value_end - value_st)) > 0)) {
1567             int name_len = (int) (token_end - token_st);
1568             int value_len = (int) (value_end - value_st);
1569 
1570             name[0] = '\0';
1571             value[0] = '\0';
1572             strncatbuff(name, token_st, name_len);
1573             strncatbuff(value, value_st, value_len);
1574 #if DEBUG_COOK
1575             printf("detected cookie-av: name=\"%s\" value=\"%s\"\n", name,
1576                    value);
1577 #endif
1578             if (strfield2(name, "domain")) {
1579               if (value_len < sizeof(domain) - 1) {
1580                 strcpybuff(domain, value);
1581               } else {
1582                 cook_name[0] = 0;
1583                 break;
1584               }
1585             } else if (strfield2(name, "path")) {
1586               if (value_len < sizeof(path) - 1) {
1587                 strcpybuff(path, value);
1588               } else {
1589                 cook_name[0] = 0;
1590                 break;
1591               }
1592             } else if (strfield2(name, "max-age")) {
1593               // ignoré..
1594             } else if (strfield2(name, "expires")) {
1595               // ignoré..
1596             } else if (strfield2(name, "version")) {
1597               // ignoré..
1598             } else if (strfield2(name, "comment")) {
1599               // ignoré
1600             } else if (strfield2(name, "secure")) {     // ne devrait pas arriver ici
1601               // ignoré
1602             } else {
1603               if (value_len < sizeof(cook_value) - 1
1604                   && name_len < sizeof(cook_name) - 1) {
1605                 if (strnotempty(cook_name) == 0) {      // noter premier: nom et valeur cookie
1606                   strcpybuff(cook_name, name);
1607                   strcpybuff(cook_value, value);
1608                 } else {        // prochain cookie
1609                   a = start_loop;       // on devra recommencer à cette position
1610                   next = 1;     // enregistrer
1611                 }
1612               } else {
1613                 cook_name[0] = 0;
1614                 break;
1615               }
1616             }
1617           }
1618         }
1619         if (!next) {
1620           while((*a != ';') && (*a))
1621             a++;                // prochain
1622           while(*a == ';')
1623             a++;                // sauter ;
1624         }
1625       } while((*a) && (!next));
1626       if (strnotempty(cook_name)) {     // cookie?
1627 #if DEBUG_COOK
1628         printf
1629           ("new cookie: name=\"%s\" value=\"%s\" domain=\"%s\" path=\"%s\"\n",
1630            cook_name, cook_value, domain, path);
1631 #endif
1632         cookie_add(cookie, cook_name, cook_value, domain, path);
1633       }
1634     }
1635   }
1636 }
1637 
1638 // transforme le message statuscode en chaîne
infostatuscode(char * msg,int statuscode)1639 HTSEXT_API void infostatuscode(char *msg, int statuscode) {
1640   switch (statuscode) {
1641     // Erreurs HTTP, selon RFC
1642   case 100:
1643     strcpybuff(msg, "Continue");
1644     break;
1645   case 101:
1646     strcpybuff(msg, "Switching Protocols");
1647     break;
1648   case 200:
1649     strcpybuff(msg, "OK");
1650     break;
1651   case 201:
1652     strcpybuff(msg, "Created");
1653     break;
1654   case 202:
1655     strcpybuff(msg, "Accepted");
1656     break;
1657   case 203:
1658     strcpybuff(msg, "Non-Authoritative Information");
1659     break;
1660   case 204:
1661     strcpybuff(msg, "No Content");
1662     break;
1663   case 205:
1664     strcpybuff(msg, "Reset Content");
1665     break;
1666   case 206:
1667     strcpybuff(msg, "Partial Content");
1668     break;
1669   case 300:
1670     strcpybuff(msg, "Multiple Choices");
1671     break;
1672   case 301:
1673     strcpybuff(msg, "Moved Permanently");
1674     break;
1675   case 302:
1676     strcpybuff(msg, "Moved Temporarily");
1677     break;
1678   case 303:
1679     strcpybuff(msg, "See Other");
1680     break;
1681   case 304:
1682     strcpybuff(msg, "Not Modified");
1683     break;
1684   case 305:
1685     strcpybuff(msg, "Use Proxy");
1686     break;
1687   case 306:
1688     strcpybuff(msg, "Undefined 306 error");
1689     break;
1690   case 307:
1691     strcpybuff(msg, "Temporary Redirect");
1692     break;
1693   case 400:
1694     strcpybuff(msg, "Bad Request");
1695     break;
1696   case 401:
1697     strcpybuff(msg, "Unauthorized");
1698     break;
1699   case 402:
1700     strcpybuff(msg, "Payment Required");
1701     break;
1702   case 403:
1703     strcpybuff(msg, "Forbidden");
1704     break;
1705   case 404:
1706     strcpybuff(msg, "Not Found");
1707     break;
1708   case 405:
1709     strcpybuff(msg, "Method Not Allowed");
1710     break;
1711   case 406:
1712     strcpybuff(msg, "Not Acceptable");
1713     break;
1714   case 407:
1715     strcpybuff(msg, "Proxy Authentication Required");
1716     break;
1717   case 408:
1718     strcpybuff(msg, "Request Time-out");
1719     break;
1720   case 409:
1721     strcpybuff(msg, "Conflict");
1722     break;
1723   case 410:
1724     strcpybuff(msg, "Gone");
1725     break;
1726   case 411:
1727     strcpybuff(msg, "Length Required");
1728     break;
1729   case 412:
1730     strcpybuff(msg, "Precondition Failed");
1731     break;
1732   case 413:
1733     strcpybuff(msg, "Request Entity Too Large");
1734     break;
1735   case 414:
1736     strcpybuff(msg, "Request-URI Too Large");
1737     break;
1738   case 415:
1739     strcpybuff(msg, "Unsupported Media Type");
1740     break;
1741   case 416:
1742     strcpybuff(msg, "Requested Range Not Satisfiable");
1743     break;
1744   case 417:
1745     strcpybuff(msg, "Expectation Failed");
1746     break;
1747   case 500:
1748     strcpybuff(msg, "Internal Server Error");
1749     break;
1750   case 501:
1751     strcpybuff(msg, "Not Implemented");
1752     break;
1753   case 502:
1754     strcpybuff(msg, "Bad Gateway");
1755     break;
1756   case 503:
1757     strcpybuff(msg, "Service Unavailable");
1758     break;
1759   case 504:
1760     strcpybuff(msg, "Gateway Time-out");
1761     break;
1762   case 505:
1763     strcpybuff(msg, "HTTP Version Not Supported");
1764     break;
1765     //
1766   default:
1767     if (strnotempty(msg) == 0)
1768       strcpybuff(msg, "Unknown error");
1769     break;
1770   }
1771 }
1772 
1773 // check if data is available
check_readinput(htsblk * r)1774 int check_readinput(htsblk * r) {
1775   if (r->soc != INVALID_SOCKET) {
1776     fd_set fds;                 // poll structures
1777     struct timeval tv;          // structure for select
1778     const int soc = (int) r->soc;
1779 
1780     assertf(soc == r->soc);
1781     FD_ZERO(&fds);
1782     FD_SET(soc, &fds);
1783     tv.tv_sec = 0;
1784     tv.tv_usec = 0;
1785     select(soc + 1, &fds, NULL, NULL, &tv);
1786     if (FD_ISSET(soc, &fds))
1787       return 1;
1788     else
1789       return 0;
1790   } else
1791     return 0;
1792 }
1793 
1794 // check if data is available
check_readinput_t(T_SOC soc,int timeout)1795 int check_readinput_t(T_SOC soc, int timeout) {
1796   if (soc != INVALID_SOCKET) {
1797     fd_set fds;                 // poll structures
1798     struct timeval tv;          // structure for select
1799     const int isoc = (int) soc;
1800 
1801     assertf(isoc == soc);
1802     FD_ZERO(&fds);
1803     FD_SET(isoc, &fds);
1804     tv.tv_sec = timeout;
1805     tv.tv_usec = 0;
1806     select(isoc + 1, &fds, NULL, NULL, &tv);
1807     if (FD_ISSET(isoc, &fds))
1808       return 1;
1809     else
1810       return 0;
1811   } else
1812     return 0;
1813 }
1814 
1815 // idem, sauf qu'ici on peut choisir la taille max de données à recevoir
1816 // SI bufl==0 alors le buffer est censé être de 8kos, et on recoit par bloc de lignes
1817 // en éliminant les cr (ex: header), arrêt si double-lf
1818 // SI bufl==-1 alors le buffer est censé être de 8kos, et on recoit ligne par ligne
1819 // en éliminant les cr (ex: header), arrêt si double-lf
1820 // Note: les +1 dans les malloc sont dûs à l'octet nul rajouté en fin de fichier
http_xfread1(htsblk * r,int bufl)1821 LLint http_xfread1(htsblk * r, int bufl) {
1822   int nl = -1;
1823 
1824   // EOF
1825   if (r->totalsize >= 0 && r->size == r->totalsize) {
1826     return READ_EOF;
1827   }
1828 
1829   if (bufl > 0) {
1830     if (!r->is_write) {         // stocker en mémoire
1831       if (r->totalsize >= 0) {  // totalsize déterminé ET ALLOUE
1832         if (r->adr == NULL) {
1833           r->adr = (char *) malloct((size_t) r->totalsize + 1);
1834           r->size = 0;
1835         }
1836         if (r->adr != NULL) {
1837           // lecture
1838           const size_t req_size = r->totalsize - r->size;
1839 
1840           nl = req_size > 0 ? hts_read(r, r->adr + ((int) r->size), (int) req_size) : 0;        /* NO 32 bit overlow possible here (no 4GB html!) */
1841           // nouvelle taille
1842           if (nl >= 0)
1843             r->size += nl;
1844 
1845           /*
1846              if (r->size >= r->totalsize)
1847              nl = -1;  // break
1848            */
1849 
1850           r->adr[r->size] = '\0';       // caractère NULL en fin au cas où l'on traite des HTML
1851         }
1852 
1853       } else {                  // inconnu..
1854         // réserver de la mémoire?
1855         if (r->adr == NULL) {
1856 #if HDEBUG
1857           printf("..alloc xfread\n");
1858 #endif
1859           r->adr = (char *) malloct(bufl + 1);
1860           r->size = 0;
1861         } else {
1862 #if HDEBUG
1863           printf("..realloc xfread1\n");
1864 #endif
1865           r->adr = (char *) realloct(r->adr, (int) r->size + bufl + 1);
1866         }
1867 
1868         if (r->adr != NULL) {
1869           // lecture
1870           nl = hts_read(r, r->adr + (int) r->size, bufl);
1871           if (nl > 0) {
1872             // resize
1873             r->adr = (char *) realloct(r->adr, (int) r->size + nl + 1);
1874             // nouvelle taille
1875             r->size += nl;
1876             // octet nul
1877             if (r->adr)
1878               r->adr[r->size] = '\0';
1879 
1880           }                     // sinon on a fini
1881 #if HDEBUG
1882           else if (nl < 0)
1883             printf("..end read (%d)\n", nl);
1884 #endif
1885         }
1886 #if HDEBUG
1887         else
1888           printf("..-> error\n");
1889 #endif
1890       }
1891 
1892       // pas de adr=erreur
1893       if (r->adr == NULL)
1894         nl = READ_ERROR;
1895 
1896     } else {                    // stocker sur disque
1897       char *buff;
1898 
1899       buff = (char *) malloct(bufl);
1900       if (buff != NULL) {
1901         // lecture
1902         nl = hts_read(r, buff, bufl);
1903         // nouvelle taille
1904         if (nl > 0) {
1905           r->size += nl;
1906           if (fwrite(buff, 1, nl, r->out) != nl) {
1907             r->statuscode = STATUSCODE_INVALID;
1908             strcpybuff(r->msg, "Write error on disk");
1909             nl = READ_ERROR;
1910           }
1911         }
1912         //if ((nl < 0) || ((r->totalsize>0) && (r->size >= r->totalsize)))
1913         //  nl=-1;  // break
1914 
1915         // libérer bloc tempo
1916         freet(buff);
1917       } else
1918         nl = READ_ERROR;
1919 
1920       if ((nl < 0) && (r->out != NULL)) {
1921         fflush(r->out);
1922       }
1923 
1924     }                           // stockage disque ou mémoire
1925 
1926   } else if (bufl == -2) {      // force reserve
1927     if (r->adr == NULL) {
1928       r->adr = (char *) malloct(8192);
1929       r->size = 0;
1930       return 0;
1931     }
1932     return -1;
1933   } else {                      // réception d'un en-tête octet par octet
1934     int count = 256;
1935     int tot_nl = 0;
1936     int lf_detected = 0;
1937     int at_beginning = 1;
1938 
1939     do {
1940       nl = READ_INTERNAL_ERROR;
1941       count--;
1942       if (r->adr == NULL) {
1943         r->adr = (char *) malloct(8192);
1944         r->size = 0;
1945       }
1946       if (r->adr != NULL) {
1947         if (r->size < 8190) {
1948           // lecture
1949           nl = hts_read(r, r->adr + r->size, 1);
1950           if (nl > 0) {
1951             // exit if:
1952             // lf detected AND already detected before
1953             // or
1954             // lf detected AND first character read
1955             if (*(r->adr + r->size) == 10) {
1956               if (lf_detected || (at_beginning) || (bufl < 0))
1957                 count = -1;
1958               lf_detected = 1;
1959             }
1960             if (*(r->adr + r->size) != 13) {    // sauter caractères 13
1961               if ((*(r->adr + r->size) != 10)
1962                   && (*(r->adr + r->size) != 13)
1963                 ) {
1964                 // restart for new line
1965                 lf_detected = 0;
1966               }
1967               (r->size)++;
1968               at_beginning = 0;
1969             }
1970             *(r->adr + r->size) = '\0'; // terminer par octet nul
1971           }
1972         }
1973       }
1974       if (nl >= 0) {
1975         tot_nl += nl;
1976         if (!check_readinput(r))
1977           count = -1;
1978       }
1979     } while((nl >= 0) && (count > 0));
1980     if (nl >= 0) {
1981       nl = tot_nl;
1982     }
1983   }
1984   // EOF
1985   if (r->totalsize >= 0 && r->size == r->totalsize) {
1986     return READ_EOF;
1987   } else {
1988     return nl;
1989   }
1990 }
1991 
1992 // teste si une URL (validité, header, taille)
1993 // retourne 200 ou le code d'erreur (404=NOT FOUND, etc)
1994 // en cas de moved xx, dans location
1995 // abandonne désormais au bout de 30 secondes (aurevoir les sites
1996 // qui nous font poireauter 5 heures..) -> -2=timeout
http_test(httrackp * opt,const char * adr,const char * fil,char * loc)1997 htsblk http_test(httrackp * opt, const char *adr, const char *fil, char *loc) {
1998   T_SOC soc;
1999   htsblk retour;
2000 
2001   //int rcvsize=-1;
2002   //char* rcv=NULL;    // adresse de retour
2003   //int bufl=TAILLE_BUFFER;    // 8Ko de buffer
2004   TStamp tl;
2005   int timeout = 30;             // timeout pour un check (arbitraire) // **
2006 
2007   // pour abandonner un site trop lent
2008   tl = time_local();
2009 
2010   loc[0] = '\0';
2011   hts_init_htsblk(&retour);
2012   //memset(&retour, 0, sizeof(htsblk));    // effacer
2013   retour.location = loc;        // si non nul, contiendra l'adresse véritable en cas de moved xx
2014 
2015   //soc=http_fopen(adr,fil,&retour,NULL);  // ouvrir, + header
2016 
2017   // on ouvre en head, et on traite l'en tête
2018   soc = http_xfopen(opt, 1, 0, 1, NULL, adr, fil, &retour);     // ouvrir HEAD, + envoi header
2019 
2020   if (soc != INVALID_SOCKET) {
2021     int e = 0;
2022 
2023     // tant qu'on a des données, et qu'on ne recoit pas deux LF, et que le timeout n'arrie pas
2024     do {
2025       if (http_xfread1(&retour, 0) < 0)
2026         e = 1;
2027       else {
2028         if (retour.adr != NULL) {
2029           if ((retour.adr[retour.size - 1] != 10)
2030               || (retour.adr[retour.size - 2] != 10))
2031             e = 1;
2032         }
2033       }
2034 
2035       if (!e) {
2036         if ((time_local() - tl) >= timeout) {
2037           e = -1;
2038         }
2039       }
2040 
2041     } while(!e);
2042 
2043     if (e == 1) {
2044       if (adr != NULL) {
2045         int ptr = 0;
2046         char rcvd[1100];
2047 
2048         // note: en gros recopie du traitement de back_wait()
2049         //
2050 
2051         // ----------------------------------------
2052         // traiter en-tête!
2053         // status-line à récupérer
2054         ptr += binput(retour.adr + ptr, rcvd, 1024);
2055         if (strnotempty(rcvd) == 0)
2056           ptr += binput(retour.adr + ptr, rcvd, 1024);  // "certains serveurs buggés envoient un \n au début" (RFC)
2057 
2058         // traiter status-line
2059         treatfirstline(&retour, rcvd);
2060 
2061 #if HDEBUG
2062         printf("(Buffer) Status-Code=%d\n", retour.statuscode);
2063 #endif
2064 
2065         // en-tête
2066 
2067         // header // ** !attention! HTTP/0.9 non supporté
2068         do {
2069           ptr += binput(retour.adr + ptr, rcvd, 1024);
2070 #if HDEBUG
2071           printf("(buffer)>%s\n", rcvd);
2072 #endif
2073           if (strnotempty(rcvd))
2074             treathead(NULL, NULL, NULL, &retour, rcvd); // traiter
2075 
2076         } while(strnotempty(rcvd));
2077         // ----------------------------------------
2078 
2079         // libérer mémoire
2080         if (retour.adr != NULL) {
2081           freet(retour.adr);
2082           retour.adr = NULL;
2083         }
2084       }
2085     } else {
2086       retour.statuscode = STATUSCODE_TIMEOUT;
2087       strcpybuff(retour.msg, "Timeout While Testing");
2088     }
2089 
2090 #if HTS_DEBUG_CLOSESOCK
2091     DEBUG_W("http_test: deletehttp\n");
2092 #endif
2093     deletehttp(&retour);
2094     retour.soc = INVALID_SOCKET;
2095   }
2096   return retour;
2097 }
2098 
2099 // Crée un lien (http) vers une adresse internet iadr
2100 // retour: structure (adresse, taille, message si erreur (si !adr))
2101 // peut ouvrir avec des connect() non bloquants: waitconnect=0/1
newhttp(httrackp * opt,const char * _iadr,htsblk * retour,int port,int waitconnect)2102 T_SOC newhttp(httrackp * opt, const char *_iadr, htsblk * retour, int port,
2103               int waitconnect) {
2104   T_SOC soc;                    // descipteur de la socket
2105 
2106   if (strcmp(_iadr, "file://") != 0) {  /* non fichier */
2107     SOCaddr server;
2108     const char *error = "unknown error";
2109 
2110     // tester un éventuel id:pass et virer id:pass@ si détecté
2111     const char *const iadr = jump_identification_const(_iadr);
2112 
2113     SOCaddr_clear(server);
2114 
2115 #if HDEBUG
2116     printf("gethostbyname\n");
2117 #endif
2118 
2119     // tester un éventuel port
2120     if (port == -1) {
2121       const char *a = jump_toport_const(iadr);
2122 
2123 #if HTS_USEOPENSSL
2124       if (retour->ssl)
2125         port = 443;
2126       else
2127         port = 80;              // port par défaut
2128 #else
2129       port = 80;                // port par défaut
2130 #endif
2131 
2132       if (a != NULL) {
2133         char BIGSTK iadr2[HTS_URLMAXSIZE * 2];
2134         int i = -1;
2135 
2136         iadr2[0] = '\0';
2137         sscanf(a + 1, "%d", &i);
2138         if (i != -1) {
2139           port = (unsigned short int) i;
2140         }
2141 
2142         // adresse véritable (sans :xx)
2143         strncatbuff(iadr2, iadr, (int) (a - iadr));
2144 
2145         // adresse sans le :xx
2146         hts_dns_resolve2(opt, iadr2, &server, &error);
2147 
2148       } else {
2149 
2150         // adresse normale (port par défaut par la suite)
2151         hts_dns_resolve2(opt, iadr, &server, &error);
2152       }
2153 
2154     } else {                    // port défini
2155       hts_dns_resolve2(opt, iadr, &server, &error);
2156     }
2157 
2158     if (!SOCaddr_is_valid(server)) {
2159 #if DEBUG
2160       printf("erreur gethostbyname\n");
2161 #endif
2162       if (retour && retour->msg) {
2163 #ifdef _WIN32
2164         snprintf(retour->msg, sizeof(retour->msg),
2165                  "Unable to get server's address: %s", error);
2166 #else
2167         snprintf(retour->msg, sizeof(retour->msg),
2168                  "Unable to get server's address: %s", error);
2169 #endif
2170       }
2171       return INVALID_SOCKET;
2172     }
2173 
2174     // make a copy for external clients
2175     SOCaddr_copy_SOCaddr(retour->address, server);
2176     retour->address_size = SOCaddr_size(retour->address);
2177 
2178     // créer ("attachement") une socket (point d'accès) internet,en flot
2179 #if HDEBUG
2180     printf("socket\n");
2181 #endif
2182 #if HTS_WIDE_DEBUG
2183     DEBUG_W("socket\n");
2184 #endif
2185     soc = (T_SOC) socket(SOCaddr_sinfamily(server), SOCK_STREAM, 0);
2186     if (retour != NULL) {
2187       retour->debugid = HTS_STAT.stat_sockid++;
2188     }
2189 #if HTS_WIDE_DEBUG
2190     DEBUG_W("socket()=%d\n" _(int) soc);
2191 #endif
2192     if (soc == INVALID_SOCKET) {
2193       if (retour && retour->msg) {
2194 #ifdef _WIN32
2195         int last_errno = WSAGetLastError();
2196 
2197         sprintf(retour->msg, "Unable to create a socket: %s",
2198                 strerror(last_errno));
2199 #else
2200         int last_errno = errno;
2201 
2202         sprintf(retour->msg, "Unable to create a socket: %s",
2203                 strerror(last_errno));
2204 #endif
2205       }
2206       return INVALID_SOCKET;    // erreur création socket impossible
2207     }
2208     // bind this address
2209     if (retour != NULL && strnotempty(retour->req.proxy.bindhost)) {
2210       const char *error = "unknown error";
2211       SOCaddr bind_addr;
2212 
2213       if (hts_dns_resolve2(opt, retour->req.proxy.bindhost,
2214                              &bind_addr, &error) == NULL
2215           || bind(soc, &SOCaddr_sockaddr(bind_addr),
2216                   SOCaddr_size(bind_addr)) != 0) {
2217         if (retour && retour->msg) {
2218 #ifdef _WIN32
2219           snprintf(retour->msg, sizeof(retour->msg),
2220                    "Unable to bind the specificied server address: %s",
2221                    error);
2222 #else
2223           snprintf(retour->msg, sizeof(retour->msg),
2224                    "Unable to bind the specificied server address: %s",
2225                    error);
2226 #endif
2227         }
2228         deletesoc(soc);
2229         return INVALID_SOCKET;
2230       }
2231     }
2232     // structure: connexion au domaine internet, port 80 (ou autre)
2233     SOCaddr_initport(server, port);
2234 #if HDEBUG
2235     printf("==%d\n", soc);
2236 #endif
2237 
2238     // connexion non bloquante?
2239     if (!waitconnect) {
2240 #ifdef _WIN32
2241       unsigned long p = 1;      // non bloquant
2242       if (ioctlsocket(soc, FIONBIO, &p)) {
2243         const int last_errno = WSAGetLastError();
2244         snprintf(retour->msg, sizeof(retour->msg),
2245                  "Non-blocking socket failed: %s", strerror(last_errno));
2246         deletesoc(soc);
2247         return INVALID_SOCKET;
2248       }
2249 #else
2250       const int flags = fcntl(soc, F_GETFL, 0);
2251       if (flags == -1 || fcntl(soc, F_SETFL, flags | O_NONBLOCK) == -1) {
2252         snprintf(retour->msg, sizeof(retour->msg),
2253                  "Non-blocking socket failed: %s", strerror(errno));
2254         deletesoc(soc);
2255         return INVALID_SOCKET;
2256       }
2257 #endif
2258     }
2259     // Connexion au serveur lui même
2260 #if HDEBUG
2261     printf("connect\n");
2262 #endif
2263     HTS_STAT.last_connect = mtime_local();
2264 
2265 #if HTS_WIDE_DEBUG
2266     DEBUG_W("connect\n");
2267 #endif
2268     if (connect(soc, &SOCaddr_sockaddr(server), SOCaddr_size(server)) != 0) {
2269       // bloquant
2270       if (waitconnect) {
2271 #if HDEBUG
2272         printf("unable to connect!\n");
2273 #endif
2274         if (retour != NULL && retour->msg) {
2275 #ifdef _WIN32
2276           const int last_errno = WSAGetLastError();
2277 
2278           sprintf(retour->msg, "Unable to connect to the server: %s",
2279                   strerror(last_errno));
2280 #else
2281           const int last_errno = errno;
2282 
2283           sprintf(retour->msg, "Unable to connect to the server: %s",
2284                   strerror(last_errno));
2285 #endif
2286         }
2287         /* Close the socket and notify the error!!! */
2288         deletesoc(soc);
2289         return INVALID_SOCKET;
2290       }
2291     }
2292 #if HTS_WIDE_DEBUG
2293     DEBUG_W("connect done\n");
2294 #endif
2295 
2296 #if HDEBUG
2297     printf("connexion établie\n");
2298 #endif
2299 
2300     // A partir de maintenant, on peut envoyer et recevoir des données
2301     // via le flot identifié par soc (socket): write(soc,adr,taille) et
2302     // read(soc,adr,taille)
2303 
2304   } else {                      // on doit ouvrir un fichier local!
2305     // il sera géré de la même manière qu'une socket (c'est idem!)
2306 
2307     soc = LOCAL_SOCKET_ID;      // pseudo-socket locale..
2308     // soc sera remplacé lors d'un http_fopen() par un handle véritable!
2309 
2310   }                             // teste fichier local ou http
2311 
2312   return soc;
2313 }
2314 
2315 // couper http://www.truc.fr/pub/index.html -> www.truc.fr /pub/index.html
2316 // retour=-1 si erreur.
2317 // si file://... alors adresse=file:// (et coupe le ?query dans ce cas)
ident_url_absolute(const char * url,lien_adrfil * adrfil)2318 int ident_url_absolute(const char *url, lien_adrfil *adrfil) {
2319   int pos = 0;
2320   int scheme = 0;
2321 
2322   // effacer adrfil->adr et adrfil->fil
2323   adrfil->adr[0] = adrfil->fil[0] = '\0';
2324 
2325 #if HDEBUG
2326   printf("protocol: %s\n", url);
2327 #endif
2328 
2329   // Scheme?
2330   {
2331     const char *a = url;
2332 
2333     while(isalpha((unsigned char) *a))
2334       a++;
2335     if (*a == ':')
2336       scheme = 1;
2337   }
2338 
2339   // 1. optional scheme ":"
2340   if ((pos = strfield(url, "file:"))) { // fichier local!! (pour les tests)
2341     //!!p+=3;
2342     strcpybuff(adrfil->adr, "file://");
2343   } else if ((pos = strfield(url, "http:"))) {  // HTTP
2344     //!!p+=3;
2345   } else if ((pos = strfield(url, "ftp:"))) {   // FTP
2346     strcpybuff(adrfil->adr, "ftp://");  // FTP!!
2347     //!!p+=3;
2348 #if HTS_USEOPENSSL
2349   } else if ((pos = strfield(url, "https:"))) {     // HTTPS
2350     strcpybuff(adrfil->adr, "https://");
2351 #endif
2352   } else if (scheme) {
2353     return -1;                  // erreur non reconnu
2354   } else
2355     pos = 0;
2356 
2357   // 2. optional "//" authority
2358   if (strncmp(url + pos, "//", 2) == 0)
2359     pos += 2;
2360 
2361   // (url+pos) now points to the path (not net path)
2362 
2363   //## if (adrfil->adr[0]!=lOCAL_CHAR) {    // adrfil->adresse normale http
2364   if (!strfield(adrfil->adr, "file:")) {        // PAS adrfil->file://
2365     const char *p, *q;
2366 
2367     p = url + pos;
2368 
2369     // p pointe sur le début de l'adrfil->adresse, ex: www.truc.fr/sommaire/index.html
2370     q = strchr(jump_identification_const(p), '/');
2371     if (q == 0)
2372       q = strchr(jump_identification_const(p), '?');  // http://www.foo.com?bar=1
2373     if (q == 0)
2374       q = p + strlen(p);        // pointe sur \0
2375     // q pointe sur le chemin, ex: index.html?query=recherche
2376 
2377     // chemin www... trop long!!
2378     if ((((int) (q - p))) > HTS_URLMAXSIZE) {
2379       //strcpybuff(retour.msg,"Path too long");
2380       return -1;                // erreur
2381     }
2382     // recopier adrfil->adresse www..
2383     strncatbuff(adrfil->adr, p, ((int) (q - p)));
2384     // *( adrfil->adr+( ((int) q) - ((int) p) ) )=0;  // faut arrêter la fumette!
2385     // recopier chemin /pub/..
2386     if (q[0] != '/')            // page par défaut (/)
2387       strcatbuff(adrfil->fil, "/");
2388     strcatbuff(adrfil->fil, q);
2389     // SECURITE:
2390     // simplifier url pour les ../
2391     fil_simplifie(adrfil->fil);
2392   } else {                      // localhost adrfil->file://
2393     const char *p;
2394     size_t i;
2395     char *a;
2396 
2397     p = url + pos;
2398     if (*p == '/' || *p == '\\') {      /* adrfil->file:///.. */
2399       strcatbuff(adrfil->fil, p);       // fichier local ; adrfil->adr="#"
2400     } else {
2401       if (p[1] != ':') {
2402         strcatbuff(adrfil->fil, "//");  /* adrfil->file://server/foo */
2403         strcatbuff(adrfil->fil, p);
2404       } else {
2405         strcatbuff(adrfil->fil, p);     // adrfil->file://C:\..
2406       }
2407     }
2408 
2409     a = strchr(adrfil->fil, '?');
2410     if (a)
2411       *a = '\0';                /* couper query (inutile pour adrfil->file:// lors de la requête) */
2412     // adrfil->filtrer les \\ -> / pour les fichiers DOS
2413     for(i = 0; adrfil->fil[i] != '\0'; i++)
2414       if (adrfil->fil[i] == '\\')
2415         adrfil->fil[i] = '/';
2416   }
2417 
2418   // no hostname
2419   if (!strnotempty(adrfil->adr))
2420     return -1;                  // erreur non reconnu
2421 
2422   // nommer au besoin.. (non utilisé normalement)
2423   if (!strnotempty(adrfil->fil))
2424     strcpybuff(adrfil->fil, "default-index.html");
2425 
2426   // case insensitive pour adrfil->adresse
2427   {
2428     char *a = jump_identification(adrfil->adr);
2429 
2430     while(*a) {
2431       if ((*a >= 'A') && (*a <= 'Z'))
2432         *a += 'a' - 'A';
2433       a++;
2434     }
2435   }
2436 
2437   return 0;
2438 }
2439 
2440 /* simplify ../ and ./ */
fil_simplifie(char * f)2441 void fil_simplifie(char *f) {
2442   char *a, *b;
2443   char *rollback[128];
2444   int rollid = 0;
2445   char lc = '/';
2446   int query = 0;
2447   int wasAbsolute = (*f == '/');
2448 
2449   for(a = b = f; *a != '\0';) {
2450     if (*a == '?')
2451       query = 1;
2452     if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '/') {        /* foo/./bar or ./foo  */
2453       a += 2;
2454     } else if (query == 0 && lc == '/' && a[0] == '.' && a[1] == '.' && (a[2] == '/' || a[2] == '\0')) {        /* foo/../bar or ../foo or .. */
2455       if (a[2] == '\0')
2456         a += 2;
2457       else
2458         a += 3;
2459       if (rollid > 1) {
2460         rollid--;
2461         b = rollback[rollid - 1];
2462       } else {                  /* too many ../ */
2463         rollid = 0;
2464         b = f;
2465         if (wasAbsolute)
2466           b++;                  /* after the / */
2467       }
2468     } else {
2469       *b++ = lc = *a;
2470       if (*a == '/') {
2471         rollback[rollid++] = b;
2472         if (rollid >= 127) {
2473           *f = '\0';            /* ERROR */
2474           break;
2475         }
2476       }
2477       a++;
2478     }
2479   }
2480   *b = '\0';
2481   if (*f == '\0') {
2482     if (wasAbsolute) {
2483       f[0] = '/';
2484       f[1] = '\0';
2485     } else {
2486       f[0] = '.';
2487       f[1] = '/';
2488       f[2] = '\0';
2489     }
2490   }
2491 }
2492 
2493 // fermer liaison fichier ou socket
deletehttp(htsblk * r)2494 void deletehttp(htsblk * r) {
2495 #if HTS_DEBUG_CLOSESOCK
2496   DEBUG_W("deletehttp: (htsblk*) 0x%p\n" _(void *)r);
2497 #endif
2498 #if HTS_USEOPENSSL
2499   /* Free OpenSSL structures */
2500   if (r->ssl_con) {
2501     SSL_shutdown(r->ssl_con);
2502     SSL_free(r->ssl_con);
2503     r->ssl_con = NULL;
2504   }
2505 #endif
2506   if (r->soc != INVALID_SOCKET) {
2507     if (r->is_file) {
2508       if (r->fp)
2509         fclose(r->fp);
2510       r->fp = NULL;
2511     } else {
2512       if (r->soc != LOCAL_SOCKET_ID)
2513         deletesoc_r(r);
2514     }
2515     r->soc = INVALID_SOCKET;
2516   }
2517 }
2518 
2519 // free the addr buffer
2520 // always returns 1
deleteaddr(htsblk * r)2521 int deleteaddr(htsblk * r) {
2522   if (r->adr != NULL) {
2523     freet(r->adr);
2524     r->adr = NULL;
2525   }
2526   if (r->headers != NULL) {
2527     freet(r->headers);
2528     r->headers = NULL;
2529   }
2530   return 1;
2531 }
2532 
2533 // fermer une socket
deletesoc(T_SOC soc)2534 void deletesoc(T_SOC soc) {
2535   if (soc != INVALID_SOCKET && soc != LOCAL_SOCKET_ID) {
2536 #if HTS_WIDE_DEBUG
2537     DEBUG_W("close %d\n" _(int) soc);
2538 #endif
2539 #ifdef _WIN32
2540     if (closesocket(soc) != 0) {
2541       int err = WSAGetLastError();
2542 
2543       fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
2544     }
2545 #else
2546     if (close(soc) != 0) {
2547       const int err = errno;
2548 
2549       fprintf(stderr, "* error closing socket %d: %s\n", soc, strerror(err));
2550     }
2551 #endif
2552 #if HTS_WIDE_DEBUG
2553     DEBUG_W(".. done\n");
2554 #endif
2555   }
2556 }
2557 
2558 /* Will also clean other things */
deletesoc_r(htsblk * r)2559 void deletesoc_r(htsblk * r) {
2560 #if HTS_USEOPENSSL
2561   if (r->ssl_con) {
2562     SSL_shutdown(r->ssl_con);
2563     // SSL_CTX_set_quiet_shutdown(r->ssl_con->ctx, 1);
2564     SSL_free(r->ssl_con);
2565     r->ssl_con = NULL;
2566   }
2567 #endif
2568   if (r->soc != INVALID_SOCKET) {
2569     deletesoc(r->soc);
2570     r->soc = INVALID_SOCKET;
2571   }
2572 }
2573 
2574 // renvoi le nombre de secondes depuis 1970
time_local(void)2575 TStamp time_local(void) {
2576   return ((TStamp) time(NULL));
2577 }
2578 
2579 // number of millisec since 1970
mtime_local(void)2580 HTSEXT_API TStamp mtime_local(void) {
2581 #ifndef HTS_DO_NOT_USE_FTIME
2582 #ifndef _WIN32
2583   struct timeval tv;
2584   if (gettimeofday(&tv, NULL) != 0) {
2585     assert(! "gettimeofday");
2586   }
2587 
2588   return (TStamp) (((TStamp) tv.tv_sec * (TStamp) 1000)
2589                    + ((TStamp) tv.tv_usec / (TStamp) 1000000));
2590 #else
2591   struct timeb B;
2592   ftime(&B);
2593   return (TStamp) (((TStamp) B.time * (TStamp) 1000)
2594                    + ((TStamp) B.millitm));
2595 #endif
2596 #else
2597   // not precise..
2598   return (TStamp) (((TStamp) time_local() * (TStamp) 1000)
2599                    + ((TStamp) 0));
2600 #endif
2601 }
2602 
2603 // convertit un nombre de secondes en temps (chaine)
sec2str(char * st,TStamp t)2604 void sec2str(char *st, TStamp t) {
2605   int j, h, m, s;
2606 
2607   j = (int) (t / (3600 * 24));
2608   t -= ((TStamp) j) * (3600 * 24);
2609   h = (int) (t / (3600));
2610   t -= ((TStamp) h) * 3600;
2611   m = (int) (t / 60);
2612   t -= ((TStamp) m) * 60;
2613   s = (int) t;
2614 
2615   if (j > 0)
2616     sprintf(st, "%d days, %d hours %d minutes %d seconds", j, h, m, s);
2617   else if (h > 0)
2618     sprintf(st, "%d hours %d minutes %d seconds", h, m, s);
2619   else if (m > 0)
2620     sprintf(st, "%d minutes %d seconds", m, s);
2621   else
2622     sprintf(st, "%d seconds", s);
2623 }
2624 
2625 // idem, plus court (chaine)
qsec2str(char * st,TStamp t)2626 HTSEXT_API void qsec2str(char *st, TStamp t) {
2627   int j, h, m, s;
2628 
2629   j = (int) (t / (3600 * 24));
2630   t -= ((TStamp) j) * (3600 * 24);
2631   h = (int) (t / (3600));
2632   t -= ((TStamp) h) * 3600;
2633   m = (int) (t / 60);
2634   t -= ((TStamp) m) * 60;
2635   s = (int) t;
2636 
2637   if (j > 0)
2638     sprintf(st, "%dd,%02dh,%02dmin%02ds", j, h, m, s);
2639   else if (h > 0)
2640     sprintf(st, "%dh,%02dmin%02ds", h, m, s);
2641   else if (m > 0)
2642     sprintf(st, "%dmin%02ds", m, s);
2643   else
2644     sprintf(st, "%ds", s);
2645 }
2646 
2647 // heure actuelle, GMT, format rfc (taille buffer 256o)
time_gmt_rfc822(char * s)2648 void time_gmt_rfc822(char *s) {
2649   time_t tt;
2650   struct tm *A;
2651 
2652   tt = time(NULL);
2653   A = gmtime(&tt);
2654   if (A == NULL)
2655     A = localtime(&tt);
2656   time_rfc822(s, A);
2657 }
2658 
2659 // heure actuelle, format rfc (taille buffer 256o)
time_local_rfc822(char * s)2660 void time_local_rfc822(char *s) {
2661   time_t tt;
2662   struct tm *A;
2663 
2664   tt = time(NULL);
2665   A = localtime(&tt);
2666   time_rfc822_local(s, A);
2667 }
2668 
2669 /* convertir une chaine en temps */
convert_time_rfc822(struct tm * result,const char * s)2670 struct tm *convert_time_rfc822(struct tm *result, const char *s) {
2671   char months[] = "jan feb mar apr may jun jul aug sep oct nov dec";
2672   char str[256];
2673   char *a;
2674 
2675   /* */
2676   int result_mm = -1;
2677   int result_dd = -1;
2678   int result_n1 = -1;
2679   int result_n2 = -1;
2680   int result_n3 = -1;
2681   int result_n4 = -1;
2682 
2683   /* */
2684 
2685   if ((int) strlen(s) > 200)
2686     return NULL;
2687   strcpybuff(str, s);
2688   hts_lowcase(str);
2689   /* éliminer :,- */
2690   while((a = strchr(str, '-')))
2691     *a = ' ';
2692   while((a = strchr(str, ':')))
2693     *a = ' ';
2694   while((a = strchr(str, ',')))
2695     *a = ' ';
2696   /* tokeniser */
2697   a = str;
2698   while(*a) {
2699     char *first, *last;
2700     char tok[256];
2701 
2702     /* découper mot */
2703     while(*a == ' ')
2704       a++;                      /* sauter espaces */
2705     first = a;
2706     while((*a) && (*a != ' '))
2707       a++;
2708     last = a;
2709     tok[0] = '\0';
2710     if (first != last) {
2711       char *pos;
2712 
2713       strncatbuff(tok, first, (int) (last - first));
2714       /* analyser */
2715       if ((pos = strstr(months, tok))) {        /* month always in letters */
2716         result_mm = ((int) (pos - months)) / 4;
2717       } else {
2718         int number;
2719 
2720         if (sscanf(tok, "%d", &number) == 1) {  /* number token */
2721           if (result_dd < 0)    /* day always first number */
2722             result_dd = number;
2723           else if (result_n1 < 0)
2724             result_n1 = number;
2725           else if (result_n2 < 0)
2726             result_n2 = number;
2727           else if (result_n3 < 0)
2728             result_n3 = number;
2729           else if (result_n4 < 0)
2730             result_n4 = number;
2731         }                       /* sinon, bruit de fond(+1GMT for exampel) */
2732       }
2733     }
2734   }
2735   if ((result_n1 >= 0) && (result_mm >= 0) && (result_dd >= 0)
2736       && (result_n2 >= 0) && (result_n3 >= 0) && (result_n4 >= 0)) {
2737     if (result_n4 >= 1000) {    /* Sun Nov  6 08:49:37 1994 */
2738       result->tm_year = result_n4 - 1900;
2739       result->tm_hour = result_n1;
2740       result->tm_min = result_n2;
2741       result->tm_sec = max(result_n3, 0);
2742     } else {                    /* Sun, 06 Nov 1994 08:49:37 GMT or Sunday, 06-Nov-94 08:49:37 GMT */
2743       result->tm_hour = result_n2;
2744       result->tm_min = result_n3;
2745       result->tm_sec = max(result_n4, 0);
2746       if (result_n1 <= 50)      /* 00 means 2000 */
2747         result->tm_year = result_n1 + 100;
2748       else if (result_n1 < 1000)        /* 99 means 1999 */
2749         result->tm_year = result_n1;
2750       else                      /* 2000 */
2751         result->tm_year = result_n1 - 1900;
2752     }
2753     result->tm_isdst = 0;       /* assume GMT */
2754     result->tm_yday = -1;       /* don't know */
2755     result->tm_wday = -1;       /* don't know */
2756     result->tm_mon = result_mm;
2757     result->tm_mday = result_dd;
2758     return result;
2759   }
2760   return NULL;
2761 }
2762 
getGMT(struct tm * tm)2763 static time_t getGMT(struct tm *tm) {   /* hey, time_t is local! */
2764   time_t t = mktime(tm);
2765 
2766   if (t != (time_t) - 1 && t != (time_t) 0) {
2767     /* BSD does not have static "timezone" declared */
2768 #if (defined(BSD) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD_kernel__))
2769     time_t now = time(NULL);
2770     time_t timezone = -localtime(&now)->tm_gmtoff;
2771 #endif
2772     return (time_t) (t - timezone);
2773   }
2774   return (time_t) - 1;
2775 }
2776 
2777 /* sets file time. -1 if error */
2778 /* Note: utf-8 */
set_filetime(const char * file,struct tm * tm_time)2779 int set_filetime(const char *file, struct tm *tm_time) {
2780   time_t t = getGMT(tm_time);
2781 
2782   if (t != (time_t) - 1) {
2783     STRUCT_UTIMBUF tim;
2784 
2785     memset(&tim, 0, sizeof(tim));
2786     tim.actime = tim.modtime = t;
2787     return UTIME(file, &tim);
2788   }
2789   return -1;
2790 }
2791 
2792 /* sets file time from RFC822 date+time, -1 if error*/
2793 /* Note: utf-8 */
set_filetime_rfc822(const char * file,const char * date)2794 int set_filetime_rfc822(const char *file, const char *date) {
2795   struct tm buffer;
2796   struct tm *tm_s = convert_time_rfc822(&buffer, date);
2797 
2798   if (tm_s) {
2799     return set_filetime(file, tm_s);
2800   } else
2801     return -1;
2802 }
2803 
2804 /* Note: utf-8 */
get_filetime_rfc822(const char * file,char * date)2805 int get_filetime_rfc822(const char *file, char *date) {
2806   STRUCT_STAT buf;
2807 
2808   date[0] = '\0';
2809   if (STAT(file, &buf) == 0) {
2810     struct tm *A;
2811     time_t tt = buf.st_mtime;
2812 
2813     A = gmtime(&tt);
2814     if (A == NULL)
2815       A = localtime(&tt);
2816     if (A != NULL) {
2817       time_rfc822(date, A);
2818       return 1;
2819     }
2820   }
2821   return 0;
2822 }
2823 
2824 // heure au format rfc (taille buffer 256o)
time_rfc822(char * s,struct tm * A)2825 void time_rfc822(char *s, struct tm *A) {
2826   if (A == NULL) {
2827     int localtime_returned_null = 0;
2828 
2829     assertf(localtime_returned_null);
2830   }
2831   strftime(s, 256, "%a, %d %b %Y %H:%M:%S GMT", A);
2832 }
2833 
2834 // heure locale au format rfc (taille buffer 256o)
time_rfc822_local(char * s,struct tm * A)2835 void time_rfc822_local(char *s, struct tm *A) {
2836   if (A == NULL) {
2837     int localtime_returned_null = 0;
2838 
2839     assertf(localtime_returned_null);
2840   }
2841   strftime(s, 256, "%a, %d %b %Y %H:%M:%S", A);
2842 }
2843 
2844 // conversion en b,Kb,Mb
int2bytes(strc_int2bytes2 * strc,LLint n)2845 HTSEXT_API char *int2bytes(strc_int2bytes2 * strc, LLint n) {
2846   char **a = int2bytes2(strc, n);
2847 
2848   strcpybuff(strc->catbuff, a[0]);
2849   strcatbuff(strc->catbuff, a[1]);
2850   return strc->catbuff;
2851 }
2852 
2853 // conversion en b/s,Kb/s,Mb/s
int2bytessec(strc_int2bytes2 * strc,long int n)2854 HTSEXT_API char *int2bytessec(strc_int2bytes2 * strc, long int n) {
2855   char buff[256];
2856   char **a = int2bytes2(strc, n);
2857 
2858   strcpybuff(buff, a[0]);
2859   strcatbuff(buff, a[1]);
2860   return concat(strc->catbuff, sizeof(strc->catbuff), buff, "/s");
2861 }
int2char(strc_int2bytes2 * strc,int n)2862 HTSEXT_API char *int2char(strc_int2bytes2 * strc, int n) {
2863   sprintf(strc->buff2, "%d", n);
2864   return strc->buff2;
2865 }
2866 
2867 // conversion en b,Kb,Mb, nombre et type séparés
2868 // limite: 2.10^9.10^6B
2869 
2870 /* See http://physics.nist.gov/cuu/Units/binary.html */
2871 #define ToLLint(a) ((LLint)(a))
2872 #define ToLLintKiB (ToLLint(1024))
2873 #define ToLLintMiB (ToLLintKiB*ToLLintKiB)
2874 #ifdef HTS_LONGLONG
2875 #define ToLLintGiB (ToLLintKiB*ToLLintKiB*ToLLintKiB)
2876 #define ToLLintTiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
2877 #define ToLLintPiB (ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB*ToLLintKiB)
2878 #endif
int2bytes2(strc_int2bytes2 * strc,LLint n)2879 HTSEXT_API char **int2bytes2(strc_int2bytes2 * strc, LLint n) {
2880   if (n < ToLLintKiB) {
2881     sprintf(strc->buff1, "%d", (int) (LLint) n);
2882     strcpybuff(strc->buff2, "B");
2883   } else if (n < ToLLintMiB) {
2884     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / ToLLintKiB)),
2885             (int) ((LLint) ((n % ToLLintKiB) * 100) / ToLLintKiB));
2886     strcpybuff(strc->buff2, "KiB");
2887   }
2888 #ifdef HTS_LONGLONG
2889   else if (n < ToLLintGiB) {
2890     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
2891             (int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
2892     strcpybuff(strc->buff2, "MiB");
2893   } else if (n < ToLLintTiB) {
2894     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintGiB))),
2895             (int) ((LLint) (((n % (ToLLintGiB)) * 100) / (ToLLintGiB))));
2896     strcpybuff(strc->buff2, "GiB");
2897   } else if (n < ToLLintPiB) {
2898     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintTiB))),
2899             (int) ((LLint) (((n % (ToLLintTiB)) * 100) / (ToLLintTiB))));
2900     strcpybuff(strc->buff2, "TiB");
2901   } else {
2902     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintPiB))),
2903             (int) ((LLint) (((n % (ToLLintPiB)) * 100) / (ToLLintPiB))));
2904     strcpybuff(strc->buff2, "PiB");
2905   }
2906 #else
2907   else {
2908     sprintf(strc->buff1, "%d,%02d", (int) ((LLint) (n / (ToLLintMiB))),
2909             (int) ((LLint) (((n % (ToLLintMiB)) * 100) / (ToLLintMiB))));
2910     strcpybuff(strc->buff2, "MiB");
2911   }
2912 #endif
2913   strc->buffadr[0] = strc->buff1;
2914   strc->buffadr[1] = strc->buff2;
2915   return strc->buffadr;
2916 }
2917 
2918 #ifdef _WIN32
2919 #else
2920 // ignore sigpipe?
sig_ignore_flag(int setflag)2921 int sig_ignore_flag(int setflag) {      // flag ignore
2922   static int flag = 0;          /* YES, this one is true static */
2923 
2924   if (setflag >= 0)
2925     flag = setflag;
2926   return flag;
2927 }
2928 #endif
2929 
2930 // envoi de texte (en têtes généralement) sur la socket soc
sendc(htsblk * r,const char * s)2931 int sendc(htsblk * r, const char *s) {
2932   int n, ssz = (int) strlen(s);
2933 
2934 #ifdef _WIN32
2935 #else
2936   sig_ignore_flag(1);
2937 #endif
2938 #if HDEBUG
2939   write(0, s, ssz);
2940 #endif
2941 
2942 #if HTS_USEOPENSSL
2943   if (r->ssl) {
2944     n = SSL_write(r->ssl_con, s, ssz);
2945   } else
2946 #endif
2947     n = send(r->soc, s, ssz, 0);
2948 
2949 #ifdef _WIN32
2950 #else
2951   sig_ignore_flag(0);
2952 #endif
2953 
2954   return (n == ssz) ? n : -1;
2955 }
2956 
2957 // Remplace read
finput(T_SOC fd,char * s,int max)2958 int finput(T_SOC fd, char *s, int max) {
2959   char c;
2960   int j = 0;
2961 
2962   do {
2963     //c=fgetc(fp);
2964     if (read((int) fd, &c, 1) <= 0) {
2965       c = 0;
2966     }
2967     if (c != 0) {
2968       switch (c) {
2969       case 10:
2970         c = 0;
2971         break;
2972       case 13:
2973         break;                  // sauter ces caractères
2974       default:
2975         s[j++] = c;
2976         break;
2977       }
2978     }
2979   } while((c != 0) && (j < max - 1));
2980   s[j] = '\0';
2981   return j;
2982 }
2983 
2984 // Like linput, but in memory (optimized)
binput(char * buff,char * s,int max)2985 int binput(char *buff, char *s, int max) {
2986   int count = 0;
2987   int destCount = 0;
2988 
2989   // Note: \0 will return 1
2990   while(destCount < max && buff != NULL && buff[count] != '\0'
2991         && buff[count] != '\n') {
2992     if (buff[count] != '\r') {
2993       s[destCount++] = buff[count];
2994     }
2995     count++;
2996   }
2997   s[destCount] = '\0';
2998 
2999   // then return the supplemental jump offset
3000   return count + 1;
3001 }
3002 
3003 // Lecture d'une ligne (peut être unicode à priori)
linput(FILE * fp,char * s,int max)3004 int linput(FILE * fp, char *s, int max) {
3005   int c;
3006   int j = 0;
3007 
3008   do {
3009     c = fgetc(fp);
3010     if (c != EOF) {
3011       switch (c) {
3012       case 13:
3013         break;                  // sauter CR
3014       case 10:
3015         c = -1;
3016         break;
3017       case 9:
3018       case 12:
3019         break;                  // sauter ces caractères
3020       default:
3021         s[j++] = (char) c;
3022         break;
3023       }
3024     }
3025   } while((c != -1) && (c != EOF) && (j < (max - 1)));
3026   s[j] = '\0';
3027   return j;
3028 }
linputsoc(T_SOC soc,char * s,int max)3029 int linputsoc(T_SOC soc, char *s, int max) {
3030   int c;
3031   int j = 0;
3032 
3033   do {
3034     unsigned char ch;
3035 
3036     if (recv(soc, &ch, 1, 0) == 1) {
3037       c = ch;
3038     } else {
3039       c = EOF;
3040     }
3041     if (c != EOF) {
3042       switch (c) {
3043       case 13:
3044         break;                  // sauter CR
3045       case 10:
3046         c = -1;
3047         break;
3048       case 9:
3049       case 12:
3050         break;                  // sauter ces caractères
3051       default:
3052         s[j++] = (char) c;
3053         break;
3054       }
3055     }
3056   } while((c != -1) && (c != EOF) && (j < (max - 1)));
3057   s[j] = '\0';
3058   return j;
3059 }
linputsoc_t(T_SOC soc,char * s,int max,int timeout)3060 int linputsoc_t(T_SOC soc, char *s, int max, int timeout) {
3061   if (check_readinput_t(soc, timeout)) {
3062     return linputsoc(soc, s, max);
3063   }
3064   return -1;
3065 }
linput_trim(FILE * fp,char * s,int max)3066 int linput_trim(FILE * fp, char *s, int max) {
3067   int rlen = 0;
3068   char *ls = (char *) malloct(max + 1);
3069 
3070   s[0] = '\0';
3071   if (ls) {
3072     char *a;
3073 
3074     // lire ligne
3075     rlen = linput(fp, ls, max);
3076     if (rlen) {
3077       // sauter espaces et tabs en fin
3078       while((rlen > 0)
3079             && ((ls[max(rlen - 1, 0)] == ' ')
3080                 || (ls[max(rlen - 1, 0)] == '\t')))
3081         ls[--rlen] = '\0';
3082       // sauter espaces en début
3083       a = ls;
3084       while((rlen > 0) && ((*a == ' ') || (*a == '\t'))) {
3085         a++;
3086         rlen--;
3087       }
3088       if (rlen > 0) {
3089         memcpy(s, a, rlen);     // can copy \0 chars
3090         s[rlen] = '\0';
3091       }
3092     }
3093     //
3094     freet(ls);
3095   }
3096   return rlen;
3097 }
linput_cpp(FILE * fp,char * s,int max)3098 int linput_cpp(FILE * fp, char *s, int max) {
3099   int rlen = 0;
3100 
3101   s[0] = '\0';
3102   do {
3103     int ret;
3104 
3105     if (rlen > 0)
3106       if (s[rlen - 1] == '\\')
3107         s[--rlen] = '\0';       // couper \ final
3108     // lire ligne
3109     ret = linput_trim(fp, s + rlen, max - rlen);
3110     if (ret > 0)
3111       rlen += ret;
3112   } while((s[max(rlen - 1, 0)] == '\\') && (rlen < max));
3113   return rlen;
3114 }
3115 
3116 // idem avec les car spéciaux
rawlinput(FILE * fp,char * s,int max)3117 void rawlinput(FILE * fp, char *s, int max) {
3118   int c;
3119   int j = 0;
3120 
3121   do {
3122     c = fgetc(fp);
3123     if (c != EOF) {
3124       switch (c) {
3125       case 13:
3126         break;                  // sauter CR
3127       case 10:
3128         c = -1;
3129         break;
3130       default:
3131         s[j++] = (char) c;
3132         break;
3133       }
3134     }
3135   } while((c != -1) && (c != EOF) && (j < (max - 1)));
3136   s[j++] = '\0';
3137 }
3138 
3139 //cherche chaine, case insensitive
strstrcase(const char * s,const char * o)3140 const char *strstrcase(const char *s, const char *o) {
3141   while(*s && strfield(s, o) == 0)
3142     s++;
3143   if (*s == '\0')
3144     return NULL;
3145   return s;
3146 }
3147 
3148 // Unicode detector
3149 // See http://www.unicode.org/unicode/reports/tr28/
3150 // (sect Table 3.1B. Legal UTF-8 Byte Sequences)
3151 typedef struct {
3152   unsigned int pos;
3153   unsigned char data[4];
3154 } t_auto_seq;
3155 
3156 // char between a and b
3157 #define CHAR_BETWEEN(c, a, b)       ( (c) >= 0x##a ) && ( (c) <= 0x##b )
3158 // sequence start
3159 #define SEQBEG                      ( inseq == 0 )
3160 // in this block
3161 #define BLK(n,a, b)                 ( (seq.pos >= n) && ((err = CHAR_BETWEEN(seq.data[n], a, b))) )
3162 #define ELT(n,a)                    BLK(n,a,a)
3163 // end
3164 #define SEQEND                      ((ok = 1))
3165 // sequence started, character will fail if error
3166 #define IN_SEQ                      ( (inseq = 1) )
3167 // decoding error
3168 #define BAD_SEQ                     ( (ok == 0) && (inseq != 0) && (!err) )
3169 // no sequence started
3170 #define NO_SEQ                      ( inseq == 0 )
3171 
3172 // is this block an UTF unicode textfile?
3173 // 0 : no
3174 // 1 : yes
3175 // -1: don't know
is_unicode_utf8(const char * buffer_,const size_t size)3176 int is_unicode_utf8(const char *buffer_, const size_t size) {
3177   const unsigned char *buffer = (const unsigned char *) buffer_;
3178   t_auto_seq seq;
3179   size_t i;
3180   int is_utf = -1;
3181 
3182   RUNTIME_TIME_CHECK_SIZE(size);
3183 
3184   seq.pos = 0;
3185   for(i = 0; i < size; i++) {
3186     unsigned int ok = 0;
3187     unsigned int inseq = 0;
3188     unsigned int err = 0;
3189 
3190     seq.data[seq.pos] = buffer[i];
3191      /**/ if (SEQBEG && BLK(0, 00, 7F) && IN_SEQ && SEQEND) {
3192     } else if (SEQBEG && BLK(0, C2, DF) && IN_SEQ && BLK(1, 80, BF) && SEQEND) {
3193     } else if (SEQBEG && ELT(0, E0) && IN_SEQ && BLK(1, A0, BF)
3194                && BLK(2, 80, BF) && SEQEND) {
3195     } else if (SEQBEG && BLK(0, E1, EC) && IN_SEQ && BLK(1, 80, BF)
3196                && BLK(2, 80, BF) && SEQEND) {
3197     } else if (SEQBEG && ELT(0, ED) && IN_SEQ && BLK(1, 80, 9F)
3198                && BLK(2, 80, BF) && SEQEND) {
3199     } else if (SEQBEG && BLK(0, EE, EF) && IN_SEQ && BLK(1, 80, BF)
3200                && BLK(2, 80, BF) && SEQEND) {
3201     } else if (SEQBEG && ELT(0, F0) && IN_SEQ && BLK(1, 90, BF)
3202                && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3203     } else if (SEQBEG && BLK(0, F1, F3) && IN_SEQ && BLK(1, 80, BF)
3204                && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3205     } else if (SEQBEG && ELT(0, F4) && IN_SEQ && BLK(1, 80, 8F)
3206                && BLK(2, 80, BF) && BLK(3, 80, BF) && SEQEND) {
3207     } else if (NO_SEQ) {        // bad, unknown
3208       return 0;
3209     }
3210     /* */
3211 
3212     /* Error */
3213     if (BAD_SEQ) {
3214       return 0;
3215     }
3216 
3217     /* unicode character */
3218     if (seq.pos > 0)
3219       is_utf = 1;
3220 
3221     /* Next */
3222     if (ok)
3223       seq.pos = 0;
3224     else
3225       seq.pos++;
3226 
3227     /* Internal error */
3228     if (seq.pos >= 4)
3229       return 0;
3230 
3231   }
3232 
3233   return is_utf;
3234 }
3235 
map_characters(unsigned char * buffer,unsigned int size,unsigned int * map)3236 void map_characters(unsigned char *buffer, unsigned int size, unsigned int *map) {
3237   unsigned int i;
3238 
3239   memset(map, 0, sizeof(unsigned int) * 256);
3240   for(i = 0; i < size; i++) {
3241     map[buffer[i]]++;
3242   }
3243 }
3244 
3245 // le fichier est-il un fichier html?
3246 //  0 : non
3247 //  1 : oui
3248 // -1 : on sait pas
3249 // -2 : on sait pas, pas d'extension
ishtml(httrackp * opt,const char * fil)3250 int ishtml(httrackp * opt, const char *fil) {
3251   /* User-defined MIME types (overrides ishtml()) */
3252   char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
3253   char mime[256];
3254   char *a;
3255 
3256   strcpybuff(fil_noquery, fil);
3257   if ((a = strchr(fil_noquery, '?')) != NULL) {
3258     *a = '\0';
3259   }
3260   if (get_userhttptype(opt, mime, fil_noquery)) {
3261     if (is_html_mime_type(mime)) {
3262       return 1;
3263     } else {
3264       return 0;
3265     }
3266   }
3267 
3268   if (!strnotempty(fil_noquery)) {
3269     return -2;
3270   }
3271 
3272   /* Search for known ext */
3273   for(a = fil_noquery + strlen(fil_noquery) - 1;
3274       *a != '.' && *a != '/' && a > fil_noquery; a--) ;
3275   if (*a == '.') {              // a une extension
3276     char BIGSTK fil_noquery[HTS_URLMAXSIZE * 2];
3277     char *b;
3278     int ret;
3279     char *dotted = a;
3280 
3281     fil_noquery[0] = '\0';
3282     a++;                        // pointer sur extension
3283     strncatbuff(fil_noquery, a, HTS_URLMAXSIZE);
3284     b = strchr(fil_noquery, '?');
3285     if (b)
3286       *b = '\0';
3287     ret = ishtml_ext(fil_noquery);      // retour
3288     if (ret == -1) {
3289       switch (is_knowntype(opt, dotted)) {
3290       case 1:
3291         ret = 0;                // connu, non html
3292         break;
3293       case 2:
3294         ret = 1;                // connu, html
3295         break;
3296       default:
3297         ret = -1;               // inconnu..
3298         break;
3299       }
3300     }
3301     return ret;
3302   } else
3303     return -2;                  // indéterminé, par exemple /truc
3304 }
3305 
3306 // idem, mais pour uniquement l'extension
ishtml_ext(const char * a)3307 int ishtml_ext(const char *a) {
3308   int html = 0;
3309 
3310   //
3311   if (strfield2(a, "html"))
3312     html = 1;
3313   else if (strfield2(a, "htm"))
3314     html = 1;
3315   else if (strfield2(a, "shtml"))
3316     html = 1;
3317   else if (strfield2(a, "phtml"))
3318     html = 1;
3319   else if (strfield2(a, "htmlx"))
3320     html = 1;
3321   else if (strfield2(a, "shtm"))
3322     html = 1;
3323   else if (strfield2(a, "phtm"))
3324     html = 1;
3325   else if (strfield2(a, "htmx"))
3326     html = 1;
3327   //
3328   // insuccès..
3329   else {
3330 #if 1
3331     html = -1;                  // inconnu..
3332 #else
3333     // XXXXXX not suitable (ext)
3334     switch (is_knownext(a)) {
3335     case 1:
3336       html = 0;                 // connu, non html
3337       break;
3338     case 2:
3339       html = 1;                 // connu, html
3340       break;
3341     default:
3342       html = -1;                // inconnu..
3343       break;
3344     }
3345 #endif
3346   }
3347   return html;
3348 }
3349 
3350 // error (404,500..)
ishttperror(int err)3351 int ishttperror(int err) {
3352   switch (err / 100) {
3353   case 4:
3354   case 5:
3355     return 1;
3356     break;
3357   }
3358   return 0;
3359 }
3360 
3361 /* Declare a non-const version of FUN */
3362 #define DECLARE_NON_CONST_VERSION(FUN) \
3363 char *FUN(char *source) { \
3364   const char *const ret = FUN ##_const(source); \
3365   return ret != NULL ? source + ( ret - source ) : NULL; \
3366 }
3367 
3368 // retourne le pointeur ou le pointeur + offset si il existe dans la chaine un @ signifiant
3369 // une identification
jump_identification_const(const char * source)3370 HTSEXT_API const char *jump_identification_const(const char *source) {
3371   const char *a, *trytofind;
3372 
3373   if (strcmp(source, "file://") == 0)
3374     return source;
3375   // rechercher dernier @ (car parfois email transmise dans adresse!)
3376   // mais sauter ftp:// éventuel
3377   a = jump_protocol_const(source);
3378   trytofind = strrchr_limit(a, '@', strchr(a, '/'));
3379   return trytofind != NULL ? trytofind : a;
3380 }
3381 
DECLARE_NON_CONST_VERSION(jump_identification)3382 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_identification)
3383 
3384 HTSEXT_API const char *jump_normalized_const(const char *source) {
3385   if (strcmp(source, "file://") == 0)
3386     return source;
3387   source = jump_identification_const(source);
3388   if (strfield(source, "www") && source[3] != '\0') {
3389     if (source[3] == '.') {     // www.foo.com -> foo.com
3390       source += 4;
3391     } else {                    // www-4.foo.com -> foo.com
3392       const char *a = source + 3;
3393 
3394       while(*a && (isdigit(*a) || *a == '-'))
3395         a++;
3396       if (*a == '.') {
3397         source = a + 1;
3398       }
3399     }
3400   }
3401   return source;
3402 }
3403 
DECLARE_NON_CONST_VERSION(jump_normalized)3404 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_normalized)
3405 
3406 static int sortNormFnc(const void *a_, const void *b_) {
3407   const char *const*const a = (const char *const*) a_;
3408   const char *const*const b = (const char *const*) b_;
3409 
3410   return strcmp(*a + 1, *b + 1);
3411 }
3412 
fil_normalized(const char * source,char * dest)3413 HTSEXT_API char *fil_normalized(const char *source, char *dest) {
3414   char lastc = 0;
3415   int gotquery = 0;
3416   int ampargs = 0;
3417   size_t i, j;
3418   char *query = NULL;
3419 
3420   for(i = j = 0; source[i] != '\0'; i++) {
3421     if (!gotquery && source[i] == '?')
3422       gotquery = ampargs = 1;
3423     if ((!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
3424       ) {
3425     } else {
3426       if (gotquery && source[i] == '&') {
3427         ampargs++;
3428       }
3429       dest[j++] = source[i];
3430     }
3431     lastc = source[i];
3432   }
3433   dest[j++] = '\0';
3434 
3435   /* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
3436   if (ampargs > 1) {
3437     char **amps = malloct(ampargs * sizeof(char *));
3438     char *copyBuff = NULL;
3439     size_t qLen = 0;
3440 
3441     assertf(amps != NULL);
3442     gotquery = 0;
3443     for(i = j = 0; dest[i] != '\0'; i++) {
3444       if ((gotquery && dest[i] == '&') || (!gotquery && dest[i] == '?')) {
3445         if (!gotquery) {
3446           gotquery = 1;
3447           query = &dest[i];
3448           qLen = strlen(query);
3449         }
3450         assertf(j < ampargs);
3451         amps[j++] = &dest[i];
3452         dest[i] = '\0';
3453       }
3454     }
3455     assertf(gotquery);
3456     assertf(j == ampargs);
3457 
3458     /* Sort 'em all */
3459     qsort(amps, ampargs, sizeof(char *), sortNormFnc);
3460 
3461     /* Replace query by sorted query */
3462     copyBuff = malloct(qLen + 1);
3463     assertf(copyBuff != NULL);
3464     copyBuff[0] = '\0';
3465     for(i = 0; i < ampargs; i++) {
3466       if (i == 0)
3467         strcatbuff(copyBuff, "?");
3468       else
3469         strcatbuff(copyBuff, "&");
3470       strcatbuff(copyBuff, amps[i] + 1);
3471     }
3472     assertf(strlen(copyBuff) == qLen);
3473     strcpybuff(query, copyBuff);
3474 
3475     /* Cleanup */
3476     freet(amps);
3477     freet(copyBuff);
3478   }
3479 
3480   return dest;
3481 }
3482 
3483 #define endwith(a) ( (len >= (sizeof(a)-1)) ? ( strncmp(dest, a+len-(sizeof(a)-1), sizeof(a)-1) == 0 ) : 0 );
adr_normalized(const char * source,char * dest)3484 HTSEXT_API char *adr_normalized(const char *source, char *dest) {
3485   /* not yet too aggressive (no com<->net<->org checkings) */
3486   strcpybuff(dest, jump_normalized_const(source));
3487   return dest;
3488 }
3489 
3490 #undef endwith
3491 
3492 // find port (:80) or NULL if not found
3493 // can handle IPV6 addresses
jump_toport_const(const char * source)3494 HTSEXT_API const char *jump_toport_const(const char *source) {
3495   const char *a, *trytofind;
3496 
3497   a = jump_identification_const(source);
3498   trytofind = strrchr_limit(a, ']', strchr(source, '/'));       // find last ] (http://[3ffe:b80:1234::1]:80/foo.html)
3499   a = strchr((trytofind) ? trytofind : a, ':');
3500   return a;
3501 }
3502 
DECLARE_NON_CONST_VERSION(jump_toport)3503 HTSEXT_API DECLARE_NON_CONST_VERSION(jump_toport)
3504 
3505 // strrchr, but not too far
3506 const char *strrchr_limit(const char *s, char c, const char *limit) {
3507   if (limit == NULL) {
3508     const char *p = strrchr(s, c);
3509 
3510     return p ? (p + 1) : NULL;
3511   } else {
3512     const char *a = NULL, *p;
3513 
3514     for(;;) {
3515       p = strchr((a) ? a : s, c);
3516       if ((p >= limit) || (p == NULL))
3517         return a;
3518       a = p + 1;
3519     }
3520   }
3521 }
3522 
3523 // retourner adr sans ftp://
jump_protocol_const(const char * source)3524 const char *jump_protocol_const(const char *source) {
3525   int p;
3526 
3527   // scheme
3528   // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)
3529   if ((p = strfield(source, "http:")))
3530     source += p;
3531   else if ((p = strfield(source, "ftp:")))
3532     source += p;
3533   else if ((p = strfield(source, "https:")))
3534     source += p;
3535   else if ((p = strfield(source, "file:")))
3536     source += p;
3537   // net_path
3538   if (strncmp(source, "//", 2) == 0)
3539     source += 2;
3540   return source;
3541 }
3542 
DECLARE_NON_CONST_VERSION(jump_protocol)3543 DECLARE_NON_CONST_VERSION(jump_protocol)
3544 
3545 // codage base 64 a vers b
3546 void code64(unsigned char *a, int size_a, unsigned char *b, int crlf) {
3547   int i1 = 0, i2 = 0, i3 = 0, i4 = 0;
3548   int loop = 0;
3549   unsigned long int store;
3550   int n;
3551   const char _hts_base64[] =
3552     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3553   while(size_a-- > 0) {
3554     // 24 bits
3555     n = 1;
3556     store = *a++;
3557     if (size_a-- > 0) {
3558       n = 2;
3559       store <<= 8;
3560       store |= *a++;
3561     }
3562     if (size_a-- > 0) {
3563       n = 3;
3564       store <<= 8;
3565       store |= *a++;
3566     }
3567     if (n == 3) {
3568       i4 = store & 63;
3569       i3 = (store >> 6) & 63;
3570       i2 = (store >> 12) & 63;
3571       i1 = (store >> 18) & 63;
3572     } else if (n == 2) {
3573       store <<= 2;
3574       i3 = store & 63;
3575       i2 = (store >> 6) & 63;
3576       i1 = (store >> 12) & 63;
3577     } else {
3578       store <<= 4;
3579       i2 = store & 63;
3580       i1 = (store >> 6) & 63;
3581     }
3582 
3583     *b++ = _hts_base64[i1];
3584     *b++ = _hts_base64[i2];
3585     if (n >= 2)
3586       *b++ = _hts_base64[i3];
3587     else
3588       *b++ = '=';
3589     if (n >= 3)
3590       *b++ = _hts_base64[i4];
3591     else
3592       *b++ = '=';
3593 
3594     if (crlf && ((loop += 3) % 60) == 0) {
3595       *b++ = '\r';
3596       *b++ = '\n';
3597     }
3598   }
3599   *b++ = '\0';
3600 }
3601 
3602 // return the hex character value, or -1 on error.
ehexh(const char c)3603 static HTS_INLINE int ehexh(const char c) {
3604   if (c >= '0' && c <= '9')
3605     return c - '0';
3606   else if (c >= 'a' && c <= 'f')
3607     return (c - 'a' + 10);
3608   else if (c >= 'A' && c <= 'F')
3609     return (c - 'A' + 10);
3610   else
3611     return -1;
3612 }
3613 
3614 // return the two-hex character value, or -1 on error.
ehex(const char * s)3615 static HTS_INLINE int ehex(const char *s) {
3616   const int c1 = ehexh(s[0]);
3617   if (c1 >= 0) {
3618     const int c2 = ehexh(s[1]);
3619     if (c2 >= 0) {
3620       return 16*c1 + c2;
3621     }
3622   }
3623   return -1;
3624 }
3625 
unescape_amp(char * s)3626 void unescape_amp(char *s) {
3627   if (hts_unescapeEntities(s, s, strlen(s) + 1) != 0) {
3628     assertf(! "error escaping html entities");
3629   }
3630 }
3631 
3632 // remplacer %20 par ' ', etc..
3633 // buffer MAX 1Ko
unescape_http(char * const catbuff,const size_t size,const char * const s)3634 HTSEXT_API char *unescape_http(char *const catbuff, const size_t size, const char *const s) {
3635   size_t i, j;
3636 
3637   RUNTIME_TIME_CHECK_SIZE(size);
3638 
3639   for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
3640     int h;
3641     if (s[i] == '%' && (h = ehex(&s[i + 1])) >= 0) {
3642       catbuff[j++] = (char) h;
3643       i += 2;
3644     }
3645     else
3646       catbuff[j++] = s[i];
3647   }
3648   catbuff[j++] = '\0';
3649   return catbuff;
3650 }
3651 
3652 // unescape in URL/URI ONLY what has to be escaped, to form a standard URL/URI
3653 // DOES NOT DECODE %25 (part of CHAR_DELIM)
3654 // no_high & 1: decode high chars
3655 // no_high & 2: decode space
unescape_http_unharm(char * const catbuff,const size_t size,const char * s,const int no_high)3656 HTSEXT_API char *unescape_http_unharm(char *const catbuff, const size_t size,
3657                                       const char *s, const int no_high) {
3658   size_t i, j;
3659 
3660   RUNTIME_TIME_CHECK_SIZE(size);
3661 
3662   for(i = 0, j = 0; s[i] != '\0' && j + 1 < size ; i++) {
3663     if (s[i] == '%') {
3664       const int nchar = ehex(&s[i + 1]);
3665 
3666       const int test =
3667         ( CHAR_RESERVED(nchar) && nchar != '+' )        /* %2B => + (not in query!) */
3668         || CHAR_DELIM(nchar)
3669         || CHAR_UNWISE(nchar)
3670         || CHAR_LOW(nchar)    /* CHAR_SPECIAL */
3671         || ( CHAR_XXAVOID(nchar) && ( nchar != ' ' || ( no_high & 2) == 0 ) )
3672         || ( ( no_high & 1 ) && CHAR_HIG(nchar) )
3673         ;
3674 
3675       if (!test && nchar >= 0) {  /* can safely unescape */
3676         catbuff[j++] = (char) nchar;
3677         i += 2;
3678       } else {
3679         catbuff[j++] = '%';
3680       }
3681     } else {
3682       catbuff[j++] = s[i];
3683     }
3684   }
3685   catbuff[j++] = '\0';
3686   return catbuff;
3687 }
3688 
3689 // remplacer " par %xx etc..
3690 // buffer MAX 1Ko
escape_spc_url(const char * const src,char * const dest,const size_t size)3691 HTSEXT_API size_t escape_spc_url(const char *const src,
3692                                  char *const dest, const size_t size) {
3693   return x_escape_http(src, dest, size, 2);
3694 }
3695 
3696 // smith / john -> smith%20%2f%20john
escape_in_url(const char * const src,char * const dest,const size_t size)3697 HTSEXT_API size_t escape_in_url(const char *const src,
3698                                 char *const dest, const size_t size) {
3699   return x_escape_http(src, dest, size, 1);
3700 }
3701 
3702 // smith / john -> smith%20/%20john
escape_uri(const char * const src,char * const dest,const size_t size)3703 HTSEXT_API size_t escape_uri(const char *const src,
3704                              char *const dest, const size_t size) {
3705   return x_escape_http(src, dest, size, 3);
3706 }
3707 
escape_uri_utf(const char * const src,char * const dest,const size_t size)3708 HTSEXT_API size_t escape_uri_utf(const char *const src,
3709                                  char *const dest, const size_t size) {
3710   return x_escape_http(src, dest, size, 30);
3711 }
3712 
escape_check_url(const char * const src,char * const dest,const size_t size)3713 HTSEXT_API size_t escape_check_url(const char *const src,
3714                                    char *const dest, const size_t size) {
3715   return x_escape_http(src, dest, size, 0);
3716 }
3717 
3718 // same as escape_check_url, but returns char*
escape_check_url_addr(const char * const src,char * const dest,const size_t size)3719 HTSEXT_API char *escape_check_url_addr(const char *const src,
3720                                        char *const dest, const size_t size) {
3721   escape_check_url(src, dest, size);
3722   return dest;
3723 }
3724 
3725 // Same as above, but appending to "dest"
3726 #undef DECLARE_APPEND_ESCAPE_VERSION
3727 #define DECLARE_APPEND_ESCAPE_VERSION(NAME) \
3728 HTSEXT_API size_t append_ ##NAME(const char *const src, char *const dest, const size_t size) { \
3729   const size_t len = strnlen(dest, size); \
3730   assertf(len < size); \
3731   return NAME(src, dest + len, size - len); \
3732 }
3733 
3734 DECLARE_APPEND_ESCAPE_VERSION(escape_in_url)
DECLARE_APPEND_ESCAPE_VERSION(escape_spc_url)3735 DECLARE_APPEND_ESCAPE_VERSION(escape_spc_url)
3736 DECLARE_APPEND_ESCAPE_VERSION(escape_uri_utf)
3737 DECLARE_APPEND_ESCAPE_VERSION(escape_check_url)
3738 DECLARE_APPEND_ESCAPE_VERSION(escape_uri)
3739 
3740 #undef DECLARE_APPEND_ESCAPE_VERSION
3741 
3742 // Same as above, but in-place
3743 #undef DECLARE_INPLACE_ESCAPE_VERSION
3744 #define DECLARE_INPLACE_ESCAPE_VERSION(NAME) \
3745 HTSEXT_API size_t inplace_ ##NAME(char *const dest, const size_t size) { \
3746   char buffer[256]; \
3747   const size_t len = strnlen(dest, size); \
3748   const int in_buffer = len + 1 < sizeof(buffer); \
3749   char *src = in_buffer ? buffer : malloct(len + 1); \
3750   size_t ret; \
3751   assertf(src != NULL); \
3752   assertf(len < size); \
3753   memcpy(src, dest, len + 1); \
3754   ret = NAME(src, dest, size); \
3755   if (!in_buffer) { \
3756     freet(src); \
3757   } \
3758   return ret; \
3759 }
3760 
3761 DECLARE_INPLACE_ESCAPE_VERSION(escape_in_url)
3762 DECLARE_INPLACE_ESCAPE_VERSION(escape_spc_url)
3763 DECLARE_INPLACE_ESCAPE_VERSION(escape_uri_utf)
3764 DECLARE_INPLACE_ESCAPE_VERSION(escape_check_url)
3765 DECLARE_INPLACE_ESCAPE_VERSION(escape_uri)
3766 
3767 #undef DECLARE_INPLACE_ESCAPE_VERSION
3768 
3769 
3770 HTSEXT_API size_t make_content_id(const char *const adr, const char *const fil,
3771                                   char *const dest, const size_t size) {
3772   char *a;
3773   size_t esc_size = escape_in_url(adr, dest, size);
3774   esc_size += escape_in_url(fil, dest + esc_size, size - esc_size);
3775   RUNTIME_TIME_CHECK_SIZE(size);
3776   for(a = dest ; (a = strchr(a, '%')) != NULL ; a++) {
3777     *a = 'X';
3778   }
3779   return esc_size;
3780 }
3781 
3782 // strip all control characters
escape_remove_control(char * const s)3783 HTSEXT_API void escape_remove_control(char *const s) {
3784   size_t i, j;
3785   for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3786     const unsigned char c = (unsigned  char) s[i];
3787     if (c >= 32) {
3788       if (i != j) {
3789         assertf(j < i);
3790         s[j] = s[i];
3791       }
3792       j++;
3793     }
3794   }
3795 }
3796 
3797 #undef ADD_CHAR
3798 #define ADD_CHAR(C) do {   \
3799       assertf(j < size);    \
3800       if (j + 1 == size) { \
3801         dest[j] = '\0';    \
3802         return size;       \
3803       }                    \
3804       dest[j++] = (C);     \
3805   } while(0)
3806 
3807 /* Returns the number of characters written (not taking in account the terminating \0), or 'size' upon overflow. */
x_escape_http(const char * const s,char * const dest,const size_t size,const int mode)3808 HTSEXT_API size_t x_escape_http(const char *const s, char *const dest,
3809                                 const size_t size, const int mode) {
3810   static const char hex[] = "0123456789abcdef";
3811   size_t i, j;
3812 
3813   RUNTIME_TIME_CHECK_SIZE(size);
3814 
3815   // Out-of-bound.
3816   // Previous character is supposed to be the terminating \0.
3817   if (size == 0) {
3818     return 0;
3819   }
3820 
3821   for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3822     const unsigned char c = (unsigned char) s[i];
3823     int test = 0;
3824 
3825     if (mode == 0)
3826       test = c == '"' || c == ' ' || CHAR_SPECIAL(c);
3827     else if (mode == 1)
3828       test = CHAR_RESERVED(c)
3829              || CHAR_DELIM(c)
3830              || CHAR_UNWISE(c)
3831              || CHAR_SPECIAL(c)
3832              || CHAR_XXAVOID(c)
3833              || CHAR_MARK(c);
3834     else if (mode == 2)
3835       test = c == ' ';       // n'escaper que espace
3836     else if (mode == 3)         // échapper que ce qui est nécessaire
3837       test = CHAR_SPECIAL(c)
3838              || CHAR_XXAVOID(c);
3839     else if (mode == 30)      // échapper que ce qui est nécessaire
3840       test = (c != '/' && CHAR_RESERVED(c))
3841         || CHAR_DELIM(c)
3842         || CHAR_UNWISE(c)
3843         || CHAR_SPECIAL(c)
3844         || CHAR_XXAVOID(c);
3845 
3846     if (!test) {
3847       ADD_CHAR(c);
3848     } else {
3849       ADD_CHAR('%');
3850       ADD_CHAR(hex[c / 16]);
3851       ADD_CHAR(hex[c % 16]);
3852     }
3853   }
3854 
3855   assertf(j < size);
3856   dest[j] = '\0';
3857   return j;
3858 }
3859 
escape_for_html_print(const char * const s,char * const dest,const size_t size)3860 HTSEXT_API size_t escape_for_html_print(const char *const s, char *const dest, const size_t size) {
3861   size_t i, j;
3862 
3863   RUNTIME_TIME_CHECK_SIZE(size);
3864 
3865   for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3866     const unsigned char c = (unsigned char) s[i];
3867     if (c == '&') {
3868       ADD_CHAR('&');
3869       ADD_CHAR('a');
3870       ADD_CHAR('m');
3871       ADD_CHAR('p');
3872       ADD_CHAR(';');
3873     } else {
3874       ADD_CHAR(c);
3875     }
3876   }
3877   assertf(j < size);
3878   dest[j] = '\0';
3879   return j;
3880 }
3881 
escape_for_html_print_full(const char * const s,char * const dest,const size_t size)3882 HTSEXT_API size_t escape_for_html_print_full(const char *const s, char *const dest, const size_t size) {
3883   static const char hex[] = "0123456789abcdef";
3884   size_t i, j;
3885 
3886   RUNTIME_TIME_CHECK_SIZE(size);
3887 
3888   for(i = 0, j = 0 ; s[i] != '\0' ; i++) {
3889     const unsigned char c = (unsigned char) s[i];
3890     if (c == '&') {
3891       ADD_CHAR('&');
3892       ADD_CHAR('a');
3893       ADD_CHAR('m');
3894       ADD_CHAR('p');
3895       ADD_CHAR(';');
3896     } else if (CHAR_HIG(c)) {
3897       ADD_CHAR('&');
3898       ADD_CHAR('#');
3899       ADD_CHAR('x');
3900       ADD_CHAR(hex[c / 16]);
3901       ADD_CHAR(hex[c % 16]);
3902       ADD_CHAR(';');
3903     } else {
3904       ADD_CHAR(c);
3905     }
3906   }
3907   assertf(j < size);
3908   dest[j] = '\0';
3909   return j;
3910 }
3911 
3912 #undef ADD_CHAR
3913 
3914 // conversion minuscules, avec buffer
convtolower(char * catbuff,const char * a)3915 char *convtolower(char *catbuff, const char *a) {
3916   strcpybuff(catbuff, a);
3917   hts_lowcase(catbuff);         // lower case
3918   return catbuff;
3919 }
3920 
3921 // conversion en minuscules
hts_lowcase(char * s)3922 void hts_lowcase(char *s) {
3923   size_t i;
3924 
3925   for(i = 0; s[i] != '\0'; i++)
3926     if ((s[i] >= 'A') && (s[i] <= 'Z'))
3927       s[i] += ('a' - 'A');
3928 }
3929 
3930 // remplacer un caractère d'une chaîne dans une autre
hts_replace(char * s,char from,char to)3931 void hts_replace(char *s, char from, char to) {
3932   char *a;
3933 
3934   while((a = strchr(s, from)) != NULL) {
3935     *a = to;
3936   }
3937 }
3938 
3939 // deviner type d'un fichier local..
3940 // ex: fil="toto.gif" -> s="image/gif"
guess_httptype(httrackp * opt,char * s,const char * fil)3941 void guess_httptype(httrackp * opt, char *s, const char *fil) {
3942   get_httptype(opt, s, fil, 1);
3943 }
3944 
3945 // idem
3946 // flag: 1 si toujours renvoyer un type
get_httptype(httrackp * opt,char * s,const char * fil,int flag)3947 HTSEXT_API void get_httptype(httrackp * opt, char *s, const char *fil, int flag) {
3948   // userdef overrides get_httptype
3949   if (get_userhttptype(opt, s, fil)) {
3950     return;
3951   }
3952   // regular tests
3953   if (ishtml(opt, fil) == 1) {
3954     strcpybuff(s, "text/html");
3955   } else {
3956     /* Check html -> text/html */
3957     const char *a = fil + strlen(fil) - 1;
3958 
3959     while((*a != '.') && (*a != '/') && (a > fil))
3960       a--;
3961     if (*a == '.' && strlen(a) < 32) {
3962       int j = 0;
3963 
3964       a++;
3965       while(strnotempty(hts_mime[j][1])) {
3966         if (strfield2(hts_mime[j][1], a)) {
3967           if (hts_mime[j][0][0] != '*') {       // Une correspondance existe
3968             strcpybuff(s, hts_mime[j][0]);
3969             return;
3970           }
3971         }
3972         j++;
3973       }
3974 
3975       if (flag)
3976         sprintf(s, "application/%s", a);
3977     } else {
3978       if (flag)
3979         strcpybuff(s, "application/octet-stream");
3980     }
3981   }
3982 }
3983 
3984 // get type of fil (php)
3985 // s: buffer (text/html) or NULL
3986 // return: 1 if known by user
get_userhttptype(httrackp * opt,char * s,const char * fil)3987 int get_userhttptype(httrackp * opt, char *s, const char *fil) {
3988   if (s != NULL) {
3989     if (s)
3990       s[0] = '\0';
3991     if (fil == NULL || *fil == '\0')
3992       return 0;
3993 #if 1
3994     if (StringLength(opt->mimedefs) > 0) {
3995 
3996       /* Check --assume foooo/foo/bar.cgi=text/html, then foo/bar.cgi=text/html, then bar.cgi=text/html */
3997       /* also: --assume baz,bar,foooo/foo/bar.cgi=text/html */
3998       /* start from path beginning */
3999       do {
4000         const char *next;
4001         const char *mimedefs = StringBuff(opt->mimedefs);       /* loop through mime definitions : \nfoo=bar\nzoo=baz\n.. */
4002 
4003         while(*mimedefs != '\0') {
4004           const char *segment = fil + 1;
4005 
4006           if (*mimedefs == '\n') {
4007             mimedefs++;
4008           }
4009           /* compare current segment with user's definition */
4010           do {
4011             int i;
4012 
4013             /* check current item */
4014             for(i = 0; mimedefs[i] != '\0'      /* end of all defs */
4015                 && mimedefs[i] != ' '   /* next item in left list */
4016                 && mimedefs[i] != '='   /* end of left list */
4017                 && mimedefs[i] != '\n'  /* end of this def (?) */
4018                 && mimedefs[i] == segment[i]    /* same item */
4019                 ; i++) ;
4020             /* success */
4021             if ((mimedefs[i] == '=' || mimedefs[i] == ' ')
4022                 && segment[i] == '\0') {
4023               int i2;
4024 
4025               while(mimedefs[i] != 0 && mimedefs[i] != '\n'
4026                     && mimedefs[i] != '=')
4027                 i++;
4028               if (mimedefs[i] == '=') {
4029                 i++;
4030                 for(i2 = 0;
4031                     mimedefs[i + i2] != '\n' && mimedefs[i + i2] != '\0';
4032                     i2++) {
4033                   s[i2] = mimedefs[i + i2];
4034                 }
4035                 s[i2] = '\0';
4036                 return 1;       /* SUCCESS! */
4037               }
4038             }
4039             /* next item in list */
4040             for(mimedefs += i;
4041                 *mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '='
4042                 && *mimedefs != ' '; mimedefs++) ;
4043             if (*mimedefs == ' ') {
4044               mimedefs++;
4045             }
4046           } while(*mimedefs != '\0' && *mimedefs != '\n' && *mimedefs != '=');
4047           /* next user-def */
4048           for(; *mimedefs != '\0' && *mimedefs != '\n'; mimedefs++) ;
4049         }
4050         /* shorten segment */
4051         next = strchr(fil + 1, '/');
4052         if (next == NULL) {
4053           /* ext tests */
4054           next = strchr(fil + 1, '.');
4055         }
4056         fil = next;
4057       } while(fil != NULL);
4058     }
4059 #else
4060     if (*buffer) {
4061       char BIGSTK search[1024];
4062       char *detect;
4063 
4064       sprintf(search, "\n%s=", ext);    // php=text/html
4065       detect = strstr(*buffer, search);
4066       if (!detect) {
4067         sprintf(search, "\n%s\n", ext); // php\ncgi=text/html
4068         detect = strstr(*buffer, search);
4069       }
4070       if (detect) {
4071         detect = strchr(detect, '=');
4072         if (detect) {
4073           detect++;
4074           if (s) {
4075             char *a;
4076 
4077             a = strchr(detect, '\n');
4078             if (a) {
4079               strncatbuff(s, detect, (int) (a - detect));
4080             }
4081           }
4082           return 1;
4083         }
4084       }
4085     }
4086 #endif
4087   }
4088   return 0;
4089 }
4090 
4091 // renvoyer extesion d'un type mime..
4092 // ex: "image/gif" -> gif
give_mimext(char * s,const char * st)4093 void give_mimext(char *s, const char *st) {
4094   int ok = 0;
4095   int j = 0;
4096 
4097   s[0] = '\0';
4098   while((!ok) && (strnotempty(hts_mime[j][1]))) {
4099     if (strfield2(hts_mime[j][0], st)) {
4100       if (hts_mime[j][1][0] != '*') {   // Une correspondance existe
4101         strcpybuff(s, hts_mime[j][1]);
4102         ok = 1;
4103       }
4104     }
4105     j++;
4106   }
4107   // wrap "x" mimetypes, such as:
4108   // application/x-mp3
4109   // or
4110   // application/mp3
4111   if (!ok) {
4112     int p;
4113     const char *a = NULL;
4114 
4115     if ((p = strfield(st, "application/x-")))
4116       a = st + p;
4117     else if ((p = strfield(st, "application/")))
4118       a = st + p;
4119     if (a) {
4120       if ((int) strlen(a) >= 1) {
4121         if ((int) strlen(a) <= 4) {
4122           strcpybuff(s, a);
4123           ok = 1;
4124         }
4125       }
4126     }
4127   }
4128 }
4129 
4130 // extension connue?..
4131 //  0 : non
4132 //  1 : oui
4133 //  2 : html
is_knowntype(httrackp * opt,const char * fil)4134 HTSEXT_API int is_knowntype(httrackp * opt, const char *fil) {
4135   char catbuff[CATBUFF_SIZE];
4136   const char *ext;
4137   int j = 0;
4138 
4139   if (!fil)
4140     return 0;
4141   ext = get_ext(catbuff, sizeof(catbuff), fil);
4142   while(strnotempty(hts_mime[j][1])) {
4143     if (strfield2(hts_mime[j][1], ext)) {
4144       if (is_html_mime_type(hts_mime[j][0]))
4145         return 2;
4146       else
4147         return 1;
4148     }
4149     j++;
4150   }
4151 
4152   // Known by user?
4153   return (is_userknowntype(opt, fil));
4154 }
4155 
4156 // known type?..
4157 //  0 : no
4158 //  1 : yes
4159 //  2 : html
4160 // setdefs : set mime buffer:
4161 //   file=(char*) "asp=text/html\nphp=text/html\n"
is_userknowntype(httrackp * opt,const char * fil)4162 HTSEXT_API int is_userknowntype(httrackp * opt, const char *fil) {
4163   char BIGSTK mime[1024];
4164 
4165   if (!fil)
4166     return 0;
4167   if (!strnotempty(fil))
4168     return 0;
4169   mime[0] = '\0';
4170   get_userhttptype(opt, mime, fil);
4171   if (!strnotempty(mime))
4172     return 0;
4173   else if (is_html_mime_type(mime))
4174     return 2;
4175   else
4176     return 1;
4177 }
4178 
4179 // page dynamique?
4180 // is_dyntype(get_ext("foo.asp"))
is_dyntype(const char * fil)4181 HTSEXT_API int is_dyntype(const char *fil) {
4182   int j = 0;
4183 
4184   if (!fil)
4185     return 0;
4186   if (!strnotempty(fil))
4187     return 0;
4188   while(strnotempty(hts_ext_dynamic[j])) {
4189     if (strfield2(hts_ext_dynamic[j], fil)) {
4190       return 1;
4191     }
4192     j++;
4193   }
4194   return 0;
4195 }
4196 
4197 // types critiques qui ne doivent pas être changés car renvoyés par des serveurs qui ne
4198 // connaissent pas le type
may_unknown(httrackp * opt,const char * st)4199 int may_unknown(httrackp * opt, const char *st) {
4200   int j = 0;
4201 
4202   // types média
4203   if (may_be_hypertext_mime(opt, st, "")) {
4204     return 1;
4205   }
4206   while(strnotempty(hts_mime_keep[j])) {
4207     if (strfield2(hts_mime_keep[j], st)) {      // trouvé
4208       return 1;
4209     }
4210     j++;
4211   }
4212   return 0;
4213 }
4214 
4215 /* returns 1 if the mime/filename seems to be bogus because of badly recognized multiple extension
4216   ; such as "application/x-wais-source" for "httrack-3.42-1.el5.src.rpm"
4217   reported by Hippy Dave 08/2008 (3.43) */
may_bogus_multiple(httrackp * opt,const char * mime,const char * filename)4218 int may_bogus_multiple(httrackp * opt, const char *mime, const char *filename) {
4219   int j;
4220 
4221   for(j = 0; strnotempty(hts_mime_bogus_multiple[j]); j++) {
4222     if (strfield2(hts_mime_bogus_multiple[j], mime)) {  /* found mime type in suspicious list */
4223       char ext[64];
4224 
4225       ext[0] = '\0';
4226       give_mimext(ext, mime);
4227       if (ext[0] != 0) {        /* we have an extension for that */
4228         const size_t ext_size = strlen(ext);
4229         const char *file = strrchr(filename, '/');      /* fetch terminal filename */
4230 
4231         if (file != NULL) {
4232           int i;
4233 
4234           for(i = 0; file[i] != 0; i++) {
4235             if (i > 0 && file[i - 1] == '.'
4236                 && strncasecmp(&file[i], ext, ext_size) == 0
4237                 && (file[i + ext_size] == 0 || file[i + ext_size] == '.'
4238                     || file[i + ext_size] == '?')) {
4239               return 1;         /* is ambiguous */
4240             }
4241           }
4242         }
4243       }
4244       return 0;
4245     }
4246   }
4247   return 0;
4248 }
4249 
4250 /* filename extension should not be changed because potentially bogus ; replaces may_unknown() (3.43) */
may_unknown2(httrackp * opt,const char * mime,const char * filename)4251 int may_unknown2(httrackp * opt, const char *mime, const char *filename) {
4252   int ret = may_unknown(opt, mime);
4253 
4254   if (ret == 0) {
4255     ret = may_bogus_multiple(opt, mime, filename);
4256   }
4257   return ret;
4258 }
4259 
4260 // -- Utils fichiers
4261 
4262 // pretty print for i/o
fprintfio(FILE * fp,const char * buff,const char * prefix)4263 void fprintfio(FILE * fp, const char *buff, const char *prefix) {
4264   char nl = 1;
4265 
4266   while(*buff) {
4267     switch (*buff) {
4268     case 13:
4269       break;
4270     case 10:
4271       fprintf(fp, "\r\n");
4272       nl = 1;
4273       break;
4274     default:
4275       if (nl)
4276         fprintf(fp, "%s", prefix);
4277       nl = 0;
4278       fputc(*buff, fp);
4279     }
4280     buff++;
4281   }
4282 }
4283 
4284 /* Le fichier existe-t-il? (ou est-il accessible?) */
4285 /* Note: NOT utf-8 */
4286 /* Note: preserve errno */
fexist(const char * s)4287 int fexist(const char *s) {
4288   char catbuff[CATBUFF_SIZE];
4289   const int err = errno;
4290   struct stat st;
4291 
4292   memset(&st, 0, sizeof(st));
4293   if (stat(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
4294     if (S_ISREG(st.st_mode)) {
4295       return 1;
4296     } else {
4297       return 0;
4298     }
4299   }
4300   errno = err;
4301   return 0;
4302 }
4303 
4304 /* Le fichier existe-t-il? (ou est-il accessible?) */
4305 /* Note: utf-8 */
4306 /* Note: preserve errno */
fexist_utf8(const char * s)4307 int fexist_utf8(const char *s) {
4308   char catbuff[CATBUFF_SIZE];
4309   const int err = errno;
4310   STRUCT_STAT st;
4311 
4312   memset(&st, 0, sizeof(st));
4313   if (STAT(fconv(catbuff, sizeof(catbuff), s), &st) == 0) {
4314     if (S_ISREG(st.st_mode)) {
4315       return 1;
4316     } else {
4317       return 0;
4318     }
4319   }
4320   errno = err;
4321   return 0;
4322 }
4323 
4324 /* Taille d'un fichier, -1 si n'existe pas */
4325 /* Note: NOT utf-8 */
fsize(const char * s)4326 off_t fsize(const char *s) {
4327   struct stat st;
4328 
4329   if (!strnotempty(s))          // nom vide: erreur
4330     return -1;
4331   if (stat(s, &st) == 0 && S_ISREG(st.st_mode)) {
4332     return st.st_size;
4333   } else {
4334     return -1;
4335   }
4336 }
4337 
4338 /* Taille d'un fichier, -1 si n'existe pas */
4339 /* Note: utf-8 */
fsize_utf8(const char * s)4340 off_t fsize_utf8(const char *s) {
4341   STRUCT_STAT st;
4342 
4343   if (!strnotempty(s))          // nom vide: erreur
4344     return -1;
4345   if (STAT(s, &st) == 0 && S_ISREG(st.st_mode)) {
4346     return st.st_size;
4347   } else {
4348     return -1;
4349   }
4350 }
4351 
fpsize(FILE * fp)4352 off_t fpsize(FILE * fp) {
4353   off_t oldpos, size;
4354 
4355   if (!fp)
4356     return -1;
4357 #ifdef HTS_FSEEKO
4358   oldpos = ftello(fp);
4359 #else
4360   oldpos = ftell(fp);
4361 #endif
4362   fseek(fp, 0, SEEK_END);
4363 #ifdef HTS_FSEEKO
4364   size = ftello(fp);
4365   fseeko(fp, oldpos, SEEK_SET);
4366 #else
4367   size = ftell(fp);
4368   fseek(fp, oldpos, SEEK_SET);
4369 #endif
4370   return size;
4371 }
4372 
4373 /* root dir, with ending / */
4374 typedef struct {
4375   char path[1024 + 4];
4376   int init;
4377 } hts_rootdir_strc;
hts_rootdir(char * file)4378 HTSEXT_API const char *hts_rootdir(char *file) {
4379   static hts_rootdir_strc strc = { "", 0 };
4380   if (file) {
4381     if (!strc.init) {
4382       strc.path[0] = '\0';
4383       strc.init = 1;
4384       if (strnotempty(file)) {
4385         const size_t file_len = strlen(file);
4386         char *a;
4387 
4388         assertf(file_len < sizeof(strc.path));
4389         strcpybuff(strc.path, file);
4390         while((a = strrchr(strc.path, '\\')))
4391           *a = '/';
4392         if ((a = strrchr(strc.path, '/'))) {
4393           *(a + 1) = '\0';
4394         } else
4395           strc.path[0] = '\0';
4396       }
4397       if (!strnotempty(strc.path)) {
4398         if (getcwd(strc.path, sizeof(strc.path)) == NULL)
4399           strc.path[0] = '\0';
4400         else
4401           strcatbuff(strc.path, "/");
4402       }
4403     }
4404     return NULL;
4405   } else if (strc.init)
4406     return strc.path;
4407   else
4408     return "";
4409 }
4410 
4411 HTSEXT_API hts_stat_struct HTS_STAT;
4412 
4413 //
4414 // return  number of downloadable bytes, depending on rate limiter
4415 // see engine_stats() routine, too
4416 // this routine works quite well for big files and regular ones, but apparently the rate limiter has
4417 // some problems with very small files (rate too high)
check_downloadable_bytes(int rate)4418 LLint check_downloadable_bytes(int rate) {
4419   if (rate > 0) {
4420     TStamp time_now;
4421     TStamp elapsed_useconds;
4422     LLint bytes_transferred_during_period;
4423     LLint left;
4424 
4425     // get the older timer
4426     int id_timer = (HTS_STAT.istat_idlasttimer + 1) % 2;
4427 
4428     time_now = mtime_local();
4429     elapsed_useconds = time_now - HTS_STAT.istat_timestart[id_timer];
4430     // NO totally stupid - elapsed_useconds+=1000;      // for the next second, too
4431     bytes_transferred_during_period =
4432       (HTS_STAT.HTS_TOTAL_RECV - HTS_STAT.istat_bytes[id_timer]);
4433 
4434     left = ((rate * elapsed_useconds) / 1000) - bytes_transferred_during_period;
4435     if (left <= 0)
4436       left = 0;
4437 
4438     return left;
4439   } else
4440     return TAILLE_BUFFER;
4441 }
4442 
4443 //
4444 // 0 : OK
4445 // 1 : slow down
4446 #if 0
4447 int HTS_TOTAL_RECV_CHECK(int var) {
4448   if (HTS_STAT.HTS_TOTAL_RECV_STATE)
4449     return 1;
4450   /*
4451      {
4452      if (HTS_STAT.HTS_TOTAL_RECV_STATE==3) {
4453      var = min(var,32);
4454      Sleep(250);
4455      } else if (HTS_STAT.HTS_TOTAL_RECV_STATE==2) {
4456      var = min(var,256);
4457      Sleep(100);
4458      } else {
4459      var/=2;
4460      if (var<=0) var=1;
4461      Sleep(50);
4462      }
4463      }
4464    */
4465   return 0;
4466 }
4467 #endif
4468 
4469 // Lecture dans buff de size octets au maximum en utilisant la socket r (structure htsblk)
4470 // returns:
4471 // >0 : data received
4472 // == 0 : not yet data
4473 // <0: error or no data: READ_ERROR, READ_EOF or READ_TIMEOUT
hts_read(htsblk * r,char * buff,int size)4474 int hts_read(htsblk * r, char *buff, int size) {
4475   int retour;
4476 
4477   //  return read(soc,buff,size);
4478   if (r->is_file) {
4479 #if HTS_WIDE_DEBUG
4480     DEBUG_W("read(%p, %d, %d)\n" _(void *)buff _(int) size _(int) r->fp);
4481 #endif
4482     if (r->fp) {
4483       retour = (int) fread(buff, 1, size, r->fp);
4484       if (retour == 0)  // can happen with directories (!)
4485         retour = READ_ERROR;
4486     } else
4487       retour = READ_ERROR;
4488   } else {
4489 #if HTS_WIDE_DEBUG
4490     DEBUG_W("recv(%d, %p, %d)\n" _(int) r->soc _(void *)buff _(int) size);
4491 
4492     if (r->soc == INVALID_SOCKET)
4493       printf("!!WIDE_DEBUG ERROR, soc==INVALID hts_read\n");
4494 #endif
4495     //HTS_TOTAL_RECV_CHECK(size);         // Diminuer au besoin si trop de données reçues
4496 #if HTS_USEOPENSSL
4497     if (r->ssl) {
4498       retour = SSL_read(r->ssl_con, buff, size);
4499       if (retour <= 0) {
4500         int err_code = SSL_get_error(r->ssl_con, retour);
4501 
4502         if ((err_code == SSL_ERROR_WANT_READ)
4503             || (err_code == SSL_ERROR_WANT_WRITE)
4504           ) {
4505           retour = 0;           /* no data yet (ssl cache) */
4506         } else if (err_code == SSL_ERROR_ZERO_RETURN) {
4507           retour = READ_EOF;    /* completed */
4508         } else {
4509           retour = READ_ERROR;  /* eof or error */
4510         }
4511       }
4512     } else {
4513 #endif
4514       retour = recv(r->soc, buff, size, 0);
4515       if (retour == 0) {
4516         retour = READ_EOF;
4517       } else if (retour < 0) {
4518         retour = READ_ERROR;
4519       }
4520     }
4521     if (retour > 0)             // compter flux entrant
4522       HTS_STAT.HTS_TOTAL_RECV += retour;
4523 #if HTS_USEOPENSSL
4524   }
4525 #endif
4526 #if HTS_WIDE_DEBUG
4527   DEBUG_W("recv/read done (%d bytes)\n" _(int) retour);
4528 #endif
4529   return retour;
4530 }
4531 
4532 // -- Gestion cache DNS --
4533 // 'RX98
4534 
4535 // 'capsule' contenant uniquement le cache
hts_cache(httrackp * opt)4536 t_dnscache *hts_cache(httrackp * opt) {
4537   assertf(opt != NULL);
4538   if (opt->state.dns_cache == NULL) {
4539     opt->state.dns_cache = (t_dnscache *) malloct(sizeof(t_dnscache));
4540     memset(opt->state.dns_cache, 0, sizeof(t_dnscache));
4541   }
4542   assertf(opt->state.dns_cache != NULL);
4543   /* first entry is NULL */
4544   assertf(opt->state.dns_cache->iadr == NULL);
4545   return opt->state.dns_cache;
4546 }
4547 
4548 // Free DNS cache.
hts_cache_free(t_dnscache * const root)4549 void hts_cache_free(t_dnscache *const root) {
4550   if (root != NULL) {
4551     t_dnscache *cache;
4552     for(cache = root; cache != NULL; ) {
4553       t_dnscache *const next = cache->next;
4554       cache->next = NULL;
4555       freet(cache);
4556       cache = next;
4557     }
4558   }
4559 }
4560 
4561 // lock le cache dns pour tout opération d'ajout
4562 // plus prudent quand plusieurs threads peuvent écrire dedans..
4563 // -1: status? 0: libérer 1:locker
4564 
4565 // MUST BE LOCKED
4566 // routine pour le cache - retour optionnel à donner à chaque fois
4567 // NULL: nom non encore testé dans le cache
4568 // si h_length==0 alors le nom n'existe pas dans le dns
hts_ghbn(const t_dnscache * cache,const char * const iadr,SOCaddr * const addr)4569 static SOCaddr* hts_ghbn(const t_dnscache *cache, const char *const iadr, SOCaddr *const addr) {
4570   assertf(addr != NULL);
4571   assertf(iadr != NULL);
4572   if (*iadr == '\0') {
4573     return NULL;
4574   }
4575   /* first entry is empty */
4576   if (cache->iadr == NULL) {
4577     cache = cache->next;
4578   }
4579   for(; cache != NULL; cache = cache->next) {
4580     assertf(cache != NULL);
4581     assertf(cache->iadr != NULL);
4582     assertf(cache->iadr == (const char*) cache + sizeof(t_dnscache));
4583     if (strcmp(cache->iadr, iadr) == 0) {       // ok trouvé
4584       if (cache->host_length != 0) {     // entrée valide
4585         assertf(cache->host_length <= sizeof(cache->host_addr));
4586         SOCaddr_copyaddr2(*addr, cache->host_addr, cache->host_length);
4587         return addr;
4588       } else {                  // erreur dans le dns, déja vérifié
4589         SOCaddr_clear(*addr);
4590         return addr;
4591       }
4592     }
4593   }
4594   return NULL;
4595 }
4596 
hts_dns_resolve_nocache2_(const char * const hostname,SOCaddr * const addr,const char ** error)4597 static SOCaddr* hts_dns_resolve_nocache2_(const char *const hostname,
4598                                           SOCaddr *const addr,
4599                                           const char **error) {
4600   {
4601 #if HTS_INET6==0
4602     /* IPv4 resolver */
4603     struct hostent *const hp = gethostbyname(hostname);
4604 
4605     if (hp != NULL) {
4606       SOCaddr_copyaddr2(addr, hp->h_addr_list[0], hp->h_length);
4607       return SOCaddr_is_valid(addr) ? &addr : NULL;
4608     } else {
4609       SOCaddr_clear(*addr);
4610     }
4611 #else
4612     /* IPv6 resolver */
4613     struct addrinfo *res = NULL;
4614     struct addrinfo hints;
4615     int gerr;
4616 
4617     SOCaddr_clear(*addr);
4618     memset(&hints, 0, sizeof(hints));
4619     if (IPV6_resolver == 1)     // V4 only (for bogus V6 entries)
4620       hints.ai_family = PF_INET;
4621     else if (IPV6_resolver == 2)        // V6 only (for testing V6 only)
4622       hints.ai_family = PF_INET6;
4623     else                        // V4 + V6
4624       hints.ai_family = PF_UNSPEC;
4625     hints.ai_socktype = SOCK_STREAM;
4626     hints.ai_protocol = IPPROTO_TCP;
4627     if ( ( gerr = getaddrinfo(hostname, NULL, &hints, &res) ) == 0) {
4628       if (res != NULL) {
4629         if (res->ai_addr != NULL && res->ai_addrlen != 0) {
4630           SOCaddr_copyaddr2(*addr, res->ai_addr, res->ai_addrlen);
4631         }
4632       }
4633     } else {
4634       if (error != NULL) {
4635         *error = gai_strerror(gerr);
4636       }
4637     }
4638     if (res) {
4639       freeaddrinfo(res);
4640     }
4641 #endif
4642   }
4643 
4644   return SOCaddr_is_valid(*addr) ? addr : NULL;
4645 }
4646 
hts_dns_resolve_nocache2(const char * const hostname,SOCaddr * const addr,const char ** error)4647 HTSEXT_API SOCaddr* hts_dns_resolve_nocache2(const char *const hostname,
4648                                      SOCaddr *const addr, const char **error) {
4649   /* Protection */
4650   if (!strnotempty(hostname)) {
4651     return NULL;
4652   }
4653 
4654   /*
4655      Strip [] if any : [3ffe:b80:1234:1::1]
4656      The resolver doesn't seem to handle IP6 addresses in brackets
4657    */
4658   if ((hostname[0] == '[') && (hostname[strlen(hostname) - 1] == ']')) {
4659     SOCaddr *ret;
4660     size_t size = strlen(hostname);
4661     char *copy = malloct(size + 1);
4662     assertf(copy != NULL);
4663     copy[0] = '\0';
4664     strncat(copy, hostname + 1, size - 2);
4665     ret =  hts_dns_resolve_nocache2_(copy, addr, error);
4666     freet(copy);
4667     return ret;
4668   } else {
4669     return hts_dns_resolve_nocache2_(hostname, addr, error);
4670   }
4671 }
4672 
hts_dns_resolve_nocache(const char * const hostname,SOCaddr * const addr)4673 HTSEXT_API SOCaddr* hts_dns_resolve_nocache(const char *const hostname, SOCaddr *const addr) {
4674   return hts_dns_resolve_nocache2(hostname, addr, NULL);
4675 }
4676 
check_hostname_dns(const char * const hostname)4677 HTSEXT_API int check_hostname_dns(const char *const hostname) {
4678   SOCaddr buffer;
4679   return hts_dns_resolve_nocache(hostname, &buffer) != NULL;
4680 }
4681 
4682 // Needs locking
4683 // cache dns interne à HTS // ** FREE A FAIRE sur la chaine
hts_dns_resolve_(httrackp * opt,const char * _iadr,SOCaddr * const addr,const char ** error)4684 static SOCaddr* hts_dns_resolve_(httrackp * opt, const char *_iadr,
4685                                  SOCaddr *const addr, const char **error) {
4686   char BIGSTK iadr[HTS_URLMAXSIZE * 2];
4687   t_dnscache *cache = hts_cache(opt);  // adresse du cache
4688   SOCaddr *sa;
4689 
4690   assertf(opt != NULL);
4691   assertf(_iadr != NULL);
4692   assertf(addr != NULL);
4693 
4694   strcpybuff(iadr, jump_identification_const(_iadr));
4695   // couper éventuel :
4696   {
4697     char *a;
4698 
4699     if ((a = jump_toport(iadr)))
4700       *a = '\0';
4701   }
4702 
4703   /* get IP from the dns cache */
4704   sa = hts_ghbn(cache, iadr, addr);
4705   if (sa != NULL) {
4706     return SOCaddr_is_valid(*sa) ? sa : NULL;
4707   } else {                      // non présent dans le cache dns, tester
4708     const size_t iadr_len = strlen(iadr) + 1;
4709 
4710     // find queue
4711     for(; cache->next != NULL; cache = cache->next) ;
4712 
4713 #if DEBUGDNS
4714     printf("resolving (not cached) %s\n", iadr);
4715 #endif
4716 
4717     sa = hts_dns_resolve_nocache2(iadr, addr, error);     // calculer IP host
4718 
4719 #if HTS_WIDE_DEBUG
4720     DEBUG_W("gethostbyname done\n");
4721 #endif
4722 
4723     /* attempt to store new entry */
4724     cache->next = malloct(sizeof(t_dnscache) + iadr_len);
4725     if (cache->next != NULL) {
4726       t_dnscache *const next = cache->next;
4727       char *const block = (char*) cache->next;
4728       char *const str = block + sizeof(t_dnscache);
4729       memcpy(str, iadr, iadr_len);
4730       next->iadr = str;
4731       if (sa != NULL) {
4732         next->host_length = SOCaddr_size(*sa);
4733         assertf(next->host_length <= sizeof(next->host_addr));
4734         memcpy(next->host_addr, &SOCaddr_sockaddr(*sa), next->host_length);
4735       } else {
4736         next->host_length = 0;      // non existant dans le dns
4737       }
4738       next->next = NULL;
4739       return sa;
4740     }
4741 
4742     /* return result if any */
4743     return sa;
4744   }                             // retour hp du cache
4745 }
4746 
hts_dns_resolve2(httrackp * opt,const char * _iadr,SOCaddr * const addr,const char ** error)4747 SOCaddr* hts_dns_resolve2(httrackp * opt, const char *_iadr, SOCaddr *const addr, const char **error) {
4748   SOCaddr *ret;
4749   hts_mutexlock(&opt->state.lock);
4750   ret = hts_dns_resolve_(opt, _iadr, addr, error);
4751   hts_mutexrelease(&opt->state.lock);
4752   return ret;
4753 }
4754 
hts_dns_resolve(httrackp * opt,const char * _iadr,SOCaddr * const addr)4755 SOCaddr* hts_dns_resolve(httrackp * opt, const char *_iadr, SOCaddr *const addr) {
4756   return hts_dns_resolve2(opt, _iadr, addr, NULL);
4757 }
4758 
4759 // --- Tracage des mallocs() ---
4760 #ifdef HTS_TRACE_MALLOC
4761 //#define htsLocker(A, N) htsLocker(A, N)
4762 #define htsLocker(A, N) do {} while(0)
4763 static mlink trmalloc = { NULL, 0, 0, NULL };
4764 
4765 static int trmalloc_id = 0;
4766 static htsmutex *mallocMutex = NULL;
hts_meminit(void)4767 static void hts_meminit(void) {
4768   //if (mallocMutex == NULL) {
4769   //  mallocMutex = calloc(sizeof(*mallocMutex), 1);
4770   //  htsLocker(mallocMutex, -999);
4771   //}
4772 }
hts_malloc(size_t len)4773 void *hts_malloc(size_t len) {
4774   void *adr;
4775 
4776   hts_meminit();
4777   htsLocker(mallocMutex, 1);
4778   assertf(len > 0);
4779   adr = hts_xmalloc(len, 0);
4780   htsLocker(mallocMutex, 0);
4781   return adr;
4782 }
hts_calloc(size_t len,size_t len2)4783 void *hts_calloc(size_t len, size_t len2) {
4784   void *adr;
4785 
4786   hts_meminit();
4787   assertf(len > 0);
4788   assertf(len2 > 0);
4789   htsLocker(mallocMutex, 1);
4790   adr = hts_xmalloc(len, len2);
4791   htsLocker(mallocMutex, 0);
4792   memset(adr, 0, len * len2);
4793   return adr;
4794 }
hts_strdup(char * str)4795 void *hts_strdup(char *str) {
4796   size_t size = str ? strlen(str) : 0;
4797   char *adr = (char *) hts_malloc(size + 1);
4798 
4799   assertf(adr != NULL);
4800   strcpy(adr, str ? str : "");
4801   return adr;
4802 }
hts_xmalloc(size_t len,size_t len2)4803 void *hts_xmalloc(size_t len, size_t len2) {
4804   mlink *lnk = (mlink *) calloc(1, sizeof(mlink));
4805 
4806   assertf(lnk != NULL);
4807   assertf(len > 0);
4808   assertf(len2 >= 0);
4809   if (lnk) {
4810     void *r = NULL;
4811     int size, bsize = sizeof(t_htsboundary);
4812 
4813     if (len2)
4814       size = len * len2;
4815     else
4816       size = len;
4817     size += ((bsize - (size % bsize)) % bsize); /* check alignement */
4818     r = malloc(size + bsize * 2);
4819     assertf(r != NULL);
4820     if (r) {
4821       *((t_htsboundary *) ((char *) r))
4822         = *((t_htsboundary *) ((char *) r + size + bsize))
4823         = htsboundary;
4824       ((char *) r) += bsize;    /* boundary */
4825       lnk->adr = r;
4826       lnk->len = size;
4827       lnk->id = trmalloc_id++;
4828       lnk->next = trmalloc.next;
4829       trmalloc.next = lnk;
4830       return r;
4831     } else {
4832       free(lnk);
4833     }
4834   }
4835   return NULL;
4836 }
hts_free(void * adr)4837 void hts_free(void *adr) {
4838   mlink *lnk = &trmalloc;
4839   int bsize = sizeof(t_htsboundary);
4840 
4841   assertf(adr != NULL);
4842   if (!adr) {
4843     return;
4844   }
4845   htsLocker(mallocMutex, 1);
4846   while(lnk->next != NULL) {
4847     if (lnk->next->adr == adr) {
4848       mlink *blk_free = lnk->next;
4849 
4850       assertf(blk_free->id != -1);
4851       assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
4852       assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
4853               htsboundary);
4854       lnk->next = lnk->next->next;
4855       free((void *) blk_free);
4856       //blk_free->id=-1;
4857       free((char *) adr - bsize);
4858       htsLocker(mallocMutex, 0);
4859       return;
4860     }
4861     lnk = lnk->next;
4862     assertf(lnk->next != NULL);
4863   }
4864   free(adr);
4865   htsLocker(mallocMutex, 0);
4866 }
hts_realloc(void * adr,size_t len)4867 void *hts_realloc(void *adr, size_t len) {
4868   int bsize = sizeof(t_htsboundary);
4869 
4870   len += ((bsize - (len % bsize)) % bsize);     /* check alignement */
4871   if (adr != NULL) {
4872     mlink *lnk = &trmalloc;
4873 
4874     htsLocker(mallocMutex, 1);
4875     while(lnk->next != NULL) {
4876       if (lnk->next->adr == adr) {
4877         {
4878           mlink *blk_free = lnk->next;
4879 
4880           assertf(blk_free->id != -1);
4881           assertf(*((t_htsboundary *) ((char *) adr - bsize)) == htsboundary);
4882           assertf(*((t_htsboundary *) ((char *) adr + blk_free->len)) ==
4883                   htsboundary);
4884         }
4885         adr = realloc((char *) adr - bsize, len + bsize * 2);
4886         assertf(adr != NULL);
4887         lnk->next->adr = (char *) adr + bsize;
4888         lnk->next->len = len;
4889         *((t_htsboundary *) ((char *) adr))
4890           = *((t_htsboundary *) ((char *) adr + len + bsize))
4891           = htsboundary;
4892         htsLocker(mallocMutex, 0);
4893         return (char *) adr + bsize;
4894       }
4895       lnk = lnk->next;
4896       assertf(lnk->next != NULL);
4897     }
4898     htsLocker(mallocMutex, 0);
4899   }
4900   return hts_malloc(len);
4901 }
hts_find(char * adr)4902 mlink *hts_find(char *adr) {
4903   char *stkframe = (char *) &stkframe;
4904   mlink *lnk = &trmalloc;
4905   int bsize = sizeof(t_htsboundary);
4906 
4907   assertf(adr != NULL);
4908   if (!adr) {
4909     return NULL;
4910   }
4911   htsLocker(mallocMutex, 1);
4912   while(lnk->next != NULL) {
4913     if (adr >= lnk->next->adr && adr <= lnk->next->adr + lnk->next->len) {      /* found */
4914       htsLocker(mallocMutex, 0);
4915       return lnk->next;
4916     }
4917     lnk = lnk->next;
4918   }
4919   htsLocker(mallocMutex, 0);
4920   {
4921     int depl = (int) (adr - stkframe);
4922 
4923     if (depl < 0)
4924       depl = -depl;
4925     //assertf(depl < 512000);   /* near the stack frame.. doesn't look like malloc but stack variable */
4926     return NULL;
4927   }
4928 }
4929 
4930 // check the malloct() and calloct() trace stack
hts_freeall(void)4931 void hts_freeall(void) {
4932   int bsize = sizeof(t_htsboundary);
4933 
4934   while(trmalloc.next) {
4935 #if MEMDEBUG
4936     printf("* block %d\t not released: at %d\t (%d\t bytes)\n",
4937            trmalloc.next->id, trmalloc.next->adr, trmalloc.next->len);
4938 #endif
4939     if (trmalloc.next->id != -1) {
4940       free((char *) trmalloc.next->adr - bsize);
4941     }
4942   }
4943 }
4944 #endif
4945 
4946 // -- divers //
4947 
4948 // cut path and project name
4949 // patch also initial path
cut_path(char * fullpath,char * path,char * pname)4950 void cut_path(char *fullpath, char *path, char *pname) {
4951   path[0] = pname[0] = '\0';
4952   if (strnotempty(fullpath)) {
4953     if ((fullpath[strlen(fullpath) - 1] == '/')
4954         || (fullpath[strlen(fullpath) - 1] == '\\'))
4955       fullpath[strlen(fullpath) - 1] = '\0';
4956     if (strlen(fullpath) > 1) {
4957       char *a;
4958 
4959       while((a = strchr(fullpath, '\\')))
4960         *a = '/';               // remplacer par /
4961       a = fullpath + strlen(fullpath) - 2;
4962       while((*a != '/') && (a > fullpath))
4963         a--;
4964       if (*a == '/')
4965         a++;
4966       strcpybuff(pname, a);
4967       strncatbuff(path, fullpath, (int) (a - fullpath));
4968     }
4969   }
4970 }
4971 
4972 // -- Gestion protocole ftp --
4973 
4974 #ifdef _WIN32
ftp_available(void)4975 int ftp_available(void) {
4976   return 1;
4977 }
4978 #else
ftp_available(void)4979 int ftp_available(void) {
4980   return 1;                     // ok!
4981   //return 0;   // SOUS UNIX, PROBLEMESs
4982 }
4983 #endif
4984 
4985 static void hts_debug_log_print(const char *format, ...);
4986 
4987 static int hts_dgb_init = 0;
4988 static FILE *hts_dgb_init_fp = NULL;
hts_debug(int level)4989 HTSEXT_API void hts_debug(int level) {
4990   hts_dgb_init = level;
4991   if (hts_dgb_init > 0) {
4992     hts_debug_log_print("hts_debug() called");
4993   }
4994 }
4995 
hts_dgb_(void)4996 static FILE *hts_dgb_(void) {
4997   if (hts_dgb_init_fp == NULL) {
4998     if ((hts_dgb_init & 0x80) == 0) {
4999       hts_dgb_init_fp = stderr;
5000     } else {
5001       hts_dgb_init_fp = FOPEN("hts-debug.txt", "wb");
5002       if (hts_dgb_init_fp != NULL) {
5003         fprintf(hts_dgb_init_fp, "* Creating file\r\n");
5004       }
5005     }
5006   }
5007   return hts_dgb_init_fp;
5008 }
5009 
hts_debug_log_print(const char * format,...)5010 static void hts_debug_log_print(const char *format, ...) {
5011   if (hts_dgb_init > 0) {
5012     const int error = errno;
5013     FILE *const fp = hts_dgb_();
5014     va_list args;
5015 
5016     assertf(format != NULL);
5017     va_start(args, format);
5018     (void) vfprintf(fp, format, args);
5019     va_end(args);
5020     fputs("\n", fp);
5021     fflush(fp);
5022     errno = error;
5023   }
5024 }
5025 
hts_version(void)5026 HTSEXT_API const char* hts_version(void) {
5027   return HTTRACK_VERSIONID;
5028 }
5029 
ssl_vulnerable(const char * version)5030 static int ssl_vulnerable(const char *version) {
5031 #ifdef _WIN32
5032   static const char *const match = "OpenSSL 1.0.1";
5033   const size_t match_len = strlen(match);
5034   if (version != NULL && strncmp(version, match, match_len) == 0) {
5035     // CVE-2014-0160
5036     // "OpenSSL 1.0.1g 7 Apr 2014"
5037     const char minor = version[match_len];
5038     return minor == ' ' || ( minor >= 'a' && minor <= 'f' );
5039   }
5040 #endif
5041   return 0;
5042 }
5043 
5044 /* user abort callback */
5045 htsErrorCallback htsCallbackErr = NULL;
5046 
hts_set_error_callback(htsErrorCallback handler)5047 HTSEXT_API void hts_set_error_callback(htsErrorCallback handler) {
5048   htsCallbackErr = handler;
5049 }
5050 
hts_get_error_callback(void)5051 HTSEXT_API htsErrorCallback hts_get_error_callback(void) {
5052   return htsCallbackErr;
5053 }
5054 
default_coucal_asserthandler(void * arg,const char * exp,const char * file,int line)5055 static void default_coucal_asserthandler(void *arg, const char* exp, const char* file, int line) {
5056   abortf_(exp, file, line);
5057 }
5058 
get_loglevel_from_coucal(coucal_loglevel level)5059 static int get_loglevel_from_coucal(coucal_loglevel level) {
5060   switch(level) {
5061   case coucal_log_critical:
5062     return LOG_PANIC;
5063     break;
5064   case coucal_log_warning:
5065     return LOG_WARNING;
5066     break;
5067   case coucal_log_info:
5068     return LOG_INFO;
5069     break;
5070   case coucal_log_debug:
5071     return LOG_DEBUG;
5072     break;
5073   case coucal_log_trace:
5074     return LOG_TRACE;
5075     break;
5076   default:
5077     return LOG_ERROR;
5078     break;
5079   }
5080 }
5081 
5082 /* log to default console */
default_coucal_loghandler(void * arg,coucal_loglevel level,const char * format,va_list args)5083 static void default_coucal_loghandler(void *arg, coucal_loglevel level,
5084                                        const char* format, va_list args) {
5085 
5086   if (level <= coucal_log_warning) {
5087     fprintf(stderr, "** warning: ");
5088   }
5089   vfprintf(stderr, format, args);
5090   fprintf(stderr, "\n");
5091 }
5092 
5093 /* log to project log */
htsopt_coucal_loghandler(void * arg,coucal_loglevel level,const char * format,va_list args)5094 static void htsopt_coucal_loghandler(void *arg, coucal_loglevel level,
5095                                       const char* format, va_list args) {
5096   httrackp *const opt = (httrackp*) arg;
5097   if (opt != NULL && opt->log != NULL) {
5098     hts_log_vprint(opt, get_loglevel_from_coucal(level),
5099       format, args);
5100   } else {
5101     default_coucal_loghandler(NULL, level, format, args);
5102   }
5103 }
5104 
5105 /* attach hashtable logger to project log */
hts_set_hash_handler(coucal hashtable,httrackp * opt)5106 void hts_set_hash_handler(coucal hashtable, httrackp *opt) {
5107   /* Init hashtable default assertion handler. */
5108   coucal_set_assert_handler(hashtable,
5109     htsopt_coucal_loghandler,
5110     default_coucal_asserthandler,
5111     opt);
5112 }
5113 
5114 static int hts_init_ok = 0;
hts_init(void)5115 HTSEXT_API int hts_init(void) {
5116   const char *dbg_env;
5117 
5118   /* */
5119   if (hts_init_ok)
5120     return 1;
5121   hts_init_ok = 1;
5122 
5123   /* enable debugging ? */
5124   dbg_env = getenv("HTS_LOG");
5125   if (dbg_env != NULL && *dbg_env != 0) {
5126     int level = 0;
5127 
5128     if (sscanf(dbg_env, "%d", &level) == 1) {
5129       hts_debug(level);
5130     }
5131   }
5132 
5133   hts_debug_log_print("entering hts_init()");   /* debug */
5134 
5135   /* Init hashtable default assertion handler. */
5136   coucal_set_global_assert_handler(default_coucal_loghandler,
5137     default_coucal_asserthandler);
5138 
5139   /* Init threads (lazy init) */
5140   htsthread_init();
5141 
5142   /* Ensure external modules are loaded */
5143   hts_debug_log_print("calling htspe_init()");  /* debug */
5144   htspe_init();                 /* module load (lazy) */
5145 
5146   /* MD5 Auto-test */
5147   {
5148     char digest[32 + 2];
5149     const char *atest = "MD5 Checksum Autotest";
5150 
5151     digest[0] = '\0';
5152     domd5mem(atest, strlen(atest), digest, 1);  /* a42ec44369da07ace5ec1d660ba4a69a */
5153     if (strcmp(digest, "a42ec44369da07ace5ec1d660ba4a69a") != 0) {
5154       int fatal_broken_md5 = 0;
5155 
5156       assertf(fatal_broken_md5);
5157     }
5158   }
5159 
5160   hts_debug_log_print("initializing SSL");      /* debug */
5161 #if HTS_USEOPENSSL
5162   /*
5163      Initialize the OpensSSL library
5164    */
5165   if (!openssl_ctx) {
5166     const char *version;
5167 
5168     SSL_load_error_strings();
5169     SSL_library_init();
5170 
5171     // Check CVE-2014-0160.
5172     version = SSLeay_version(SSLEAY_VERSION);
5173     if (ssl_vulnerable(version)) {
5174       fprintf(stderr,
5175               "SSLeay_version(SSLEAY_VERSION) == '%s'\n", version);
5176       abortLog("unable to initialize TLS: OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)");
5177       assertf("OpenSSL version seems vulnerable to heartbleed bug (CVE-2014-0160)" == NULL);
5178     }
5179 
5180     // OpenSSL_add_all_algorithms();
5181     openssl_ctx = SSL_CTX_new(SSLv23_client_method());
5182     if (!openssl_ctx) {
5183       fprintf(stderr,
5184               "fatal: unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)\n");
5185       abortLog("unable to initialize TLS: SSL_CTX_new(SSLv23_client_method)");
5186       assertf("unable to initialize TLS" == NULL);
5187     }
5188   }
5189 #endif
5190 
5191   hts_debug_log_print("ending hts_init()");     /* debug */
5192   return 1;
5193 }
5194 
5195 /* will not free thread env. */
hts_uninit(void)5196 HTSEXT_API int hts_uninit(void) {
5197   /* hts_init() is a lazy initializer, with limited a allocation (one or two mutexes) ;
5198      we won't free anything here as the .h semantic was never being very clear */
5199   return 1;
5200 }
5201 
hts_uninit_module(void)5202 HTSEXT_API int hts_uninit_module(void) {
5203   if (!hts_init_ok)
5204     return 1;
5205   htsthread_uninit();
5206   htspe_uninit();
5207   hts_init_ok = 0;
5208   return 1;
5209 }
5210 
5211 // legacy. do not use
hts_log(httrackp * opt,const char * prefix,const char * msg)5212 HTSEXT_API int hts_log(httrackp * opt, const char *prefix, const char *msg) {
5213   if (opt->log != NULL) {
5214     fspc(opt, opt->log, prefix);
5215     fprintf(opt->log, "%s" LF, msg);
5216     return 0;
5217   }
5218   return 1;                     /* Error */
5219 }
5220 
5221 static void (*hts_log_print_callback)(httrackp * opt, int type, const char *format, va_list args) = NULL;
5222 
hts_set_log_vprint_callback(void (* callback)(httrackp * opt,int type,const char * format,va_list args))5223 HTSEXT_API void hts_set_log_vprint_callback(void (*callback)(httrackp * opt,
5224                                             int type, const char *format, va_list args)) {
5225   hts_log_print_callback = callback;
5226 }
5227 
hts_log_vprint(httrackp * opt,int type,const char * format,va_list args)5228 HTSEXT_API void hts_log_vprint(httrackp * opt, int type, const char *format, va_list args) {
5229   assertf(format != NULL);
5230   if (hts_log_print_callback != NULL) {
5231     va_list args_copy;
5232     va_copy(args_copy, args);
5233     hts_log_print_callback(opt, type, format, args);
5234     va_end(args_copy);
5235   }
5236   if (opt != NULL && opt->log != NULL) {
5237     const int save_errno = errno;
5238     const char *s_type = "unknown";
5239     const int level = type & 0xff;
5240 
5241     // Check log level
5242     if (opt->debug < level) {
5243       return;
5244     }
5245 
5246     switch (level) {
5247     case LOG_TRACE:
5248       s_type = "trace";
5249       break;
5250     case LOG_DEBUG:
5251       s_type = "debug";
5252       break;
5253     case LOG_INFO:
5254       s_type = "info";
5255       break;
5256     case LOG_NOTICE:
5257     case LOG_WARNING:
5258       s_type = "warning";
5259       break;
5260     case LOG_ERROR:
5261       s_type = "error";
5262       break;
5263     case LOG_PANIC:
5264       s_type = "panic";
5265       break;
5266     }
5267     fspc(opt, opt->log, s_type);
5268     (void) vfprintf(opt->log, format, args);
5269     if ((type & LOG_ERRNO) != 0) {
5270       fprintf(opt->log, ": %s", strerror(save_errno));
5271     }
5272     fputs(LF, opt->log);
5273     if (opt->flush) {
5274       fflush(opt->log);
5275     }
5276     errno = save_errno;
5277   }
5278 }
5279 
hts_log_print(httrackp * opt,int type,const char * format,...)5280 HTSEXT_API void hts_log_print(httrackp * opt, int type, const char *format, ...) {
5281   va_list args;
5282   assertf(format != NULL);
5283   va_start(args, format);
5284   hts_log_vprint(opt, type, format, args);
5285   va_end(args);
5286 }
5287 
set_wrappers(httrackp * opt)5288 HTSEXT_API void set_wrappers(httrackp * opt) {  // LEGACY
5289 }
5290 
plug_wrapper(httrackp * opt,const char * moduleName,const char * argv)5291 HTSEXT_API int plug_wrapper(httrackp * opt, const char *moduleName,
5292                             const char *argv) {
5293   void *handle = openFunctionLib(moduleName);
5294 
5295   if (handle != NULL) {
5296     t_hts_plug plug = (t_hts_plug) getFunctionPtr(handle, "hts_plug");
5297     t_hts_unplug unplug = (t_hts_unplug) getFunctionPtr(handle, "hts_unplug");
5298 
5299     if (plug != NULL) {
5300       int ret = plug(opt, argv);
5301 
5302       if (hts_dgb_init > 0 && opt->log != NULL) {
5303         hts_debug_log_print("plugged module '%s' (return code=%d)", moduleName,
5304                             ret);
5305       }
5306       if (ret == 1) {           /* Success! */
5307         opt->libHandles.handles =
5308           (htslibhandle *) realloct(opt->libHandles.handles,
5309                                     (opt->libHandles.count +
5310                                      1) * sizeof(htslibhandle));
5311         opt->libHandles.handles[opt->libHandles.count].handle = handle;
5312         opt->libHandles.handles[opt->libHandles.count].moduleName =
5313           strdupt(moduleName);
5314         opt->libHandles.count++;
5315         return 1;
5316       } else {
5317         hts_debug_log_print
5318           ("* note: error while running entry point 'hts_plug' in %s",
5319            moduleName);
5320         if (unplug)
5321           unplug(opt);
5322       }
5323     } else {
5324       int last_errno = errno;
5325 
5326       hts_debug_log_print("* note: can't find entry point 'hts_plug' in %s: %s",
5327                           moduleName, strerror(last_errno));
5328     }
5329     closeFunctionLib(handle);
5330     return 0;
5331   } else {
5332     int last_errno = errno;
5333 
5334     hts_debug_log_print("* note: can't load %s: %s", moduleName,
5335                         strerror(last_errno));
5336   }
5337   return -1;
5338 }
5339 
unplug_wrappers(httrackp * opt)5340 static void unplug_wrappers(httrackp * opt) {
5341   if (opt->libHandles.handles != NULL) {
5342     int i;
5343 
5344     for(i = 0; i < opt->libHandles.count; i++) {
5345       if (opt->libHandles.handles[i].handle != NULL) {
5346         /* hts_unplug(), the dll exit point (finalizer) */
5347         t_hts_unplug unplug =
5348           (t_hts_unplug) getFunctionPtr(opt->libHandles.handles[i].handle,
5349                                         "hts_unplug");
5350         if (unplug != NULL)
5351           unplug(opt);
5352         closeFunctionLib(opt->libHandles.handles[i].handle);
5353         opt->libHandles.handles[i].handle = NULL;
5354       }
5355       if (opt->libHandles.handles[i].moduleName != NULL) {
5356         freet(opt->libHandles.handles[i].moduleName);
5357         opt->libHandles.handles[i].moduleName = NULL;
5358       }
5359     }
5360     freet(opt->libHandles.handles);
5361     opt->libHandles.handles = NULL;
5362     opt->libHandles.count = 0;
5363   }
5364 }
5365 
multipleStringMatch(const char * s,const char * match)5366 int multipleStringMatch(const char *s, const char *match) {
5367   int ret = 0;
5368   String name = STRING_EMPTY;
5369 
5370   if (match == NULL || s == NULL || *s == 0)
5371     return 0;
5372   for(; *match != 0; match++) {
5373     StringClear(name);
5374     for(; *match != 0 && *match != '\n'; match++) {
5375       StringAddchar(name, *match);
5376     }
5377     if (StringLength(name) > 0 && strstr(s, StringBuff(name)) != NULL) {
5378       ret = 1;
5379       break;
5380     }
5381   }
5382   StringFree(name);
5383   return ret;
5384 }
5385 
hts_create_opt(void)5386 HTSEXT_API httrackp *hts_create_opt(void) {
5387 #if ( defined(_WIN32) || defined(__ANDROID__) )
5388   static const char *defaultModules[] = {
5389     "htsswf", "htsjava", "httrack-plugin", NULL
5390   };
5391 #else
5392   static const char *defaultModules[] = {
5393     "libhtsswf.so.1", "libhtsjava.so.2", "httrack-plugin", NULL
5394   };
5395 #endif
5396   httrackp *opt = malloc(sizeof(httrackp));
5397 
5398   /* default options */
5399   memset(opt, 0, sizeof(httrackp));
5400   opt->size_httrackp = sizeof(httrackp);
5401 
5402   /* mutexes */
5403   hts_mutexinit(&opt->state.lock);
5404 
5405   /* custom wrappers */
5406   opt->libHandles.count = 0;
5407 
5408   /* default settings */
5409 
5410   opt->wizard = 2;              // wizard automatique
5411   opt->quiet = 0;               // questions
5412   //
5413   opt->travel = 0;              // même adresse
5414   opt->depth = 9999;            // mirror total par défaut
5415   opt->extdepth = 0;            // mais pas à l'extérieur
5416   opt->seeker = 1;              // down
5417   opt->urlmode = 2;             // relatif par défaut
5418   opt->no_type_change = 0;      // change file types
5419   opt->debug = LOG_NOTICE;      // small log
5420   opt->getmode = 3;             // linear scan
5421   opt->maxsite = -1;            // taille max site (aucune)
5422   opt->maxfile_nonhtml = -1;    // taille max fichier non html
5423   opt->maxfile_html = -1;       // idem pour html
5424   opt->maxsoc = 4;              // nbre socket max
5425   opt->fragment = -1;           // pas de fragmentation
5426   opt->nearlink = 0;            // ne pas prendre les liens non-html "adjacents"
5427   opt->makeindex = 1;           // faire un index
5428   opt->kindex = 0;              // index 'keyword'
5429   opt->delete_old = 1;          // effacer anciens fichiers
5430   opt->background_on_suspend = 1;       // Background the process if Control Z calls signal suspend.
5431   opt->makestat = 0;            // pas de fichier de stats
5432   opt->maketrack = 0;           // ni de tracking
5433   opt->timeout = 120;           // timeout par défaut (2 minutes)
5434   opt->cache = 1;               // cache prioritaire
5435   opt->shell = 0;               // pas de shell par defaut
5436   opt->proxy.active = 0;        // pas de proxy
5437   opt->user_agent_send = 1;     // envoyer un user-agent
5438   StringCopy(opt->user_agent,
5439              "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)");
5440   StringCopy(opt->referer, "");
5441   StringCopy(opt->from, "");
5442   opt->savename_83 = 0;         // noms longs par défaut
5443   opt->savename_type = 0;       // avec structure originale
5444   opt->savename_delayed = 2;    // hard delayed type (default)
5445   opt->delayed_cached = 1;      // cached delayed type (default)
5446   opt->mimehtml = 0;            // pas MIME-html
5447   opt->parsejava = HTSPARSE_DEFAULT;    // parser classes
5448   opt->hostcontrol = 0;         // PAS de control host pour timeout et traffic jammer
5449   opt->retry = 2;               // 2 retry par défaut
5450   opt->errpage = 1;             // copier ou générer une page d'erreur en cas d'erreur (404 etc.)
5451   opt->check_type = 1;          // vérifier type si inconnu (cgi,asp..) SAUF / considéré comme html
5452   opt->all_in_cache = 0;        // ne pas tout stocker en cache
5453   opt->robots = 2;              // traiter les robots.txt
5454   opt->external = 0;            // liens externes normaux
5455   opt->passprivacy = 0;         // mots de passe dans les fichiers
5456   opt->includequery = 1;        // include query-string par défaut
5457   opt->mirror_first_page = 0;   // pas mode mirror links
5458   opt->accept_cookie = 1;       // gérer les cookies
5459   opt->cookie = NULL;
5460   opt->http10 = 0;              // laisser http/1.1
5461   opt->nokeepalive = 0;         // pas keep-alive
5462   opt->nocompression = 0;       // pas de compression
5463   opt->tolerant = 0;            // ne pas accepter content-length incorrect
5464   opt->parseall = 1;            // tout parser (tags inconnus, par exemple)
5465   opt->parsedebug = 0;          // pas de mode débuggage
5466   opt->norecatch = 0;           // ne pas reprendre les fichiers effacés par l'utilisateur
5467   opt->verbosedisplay = 0;      // pas d'animation texte
5468   opt->sizehack = 0;            // size hack
5469   opt->urlhack = 1;             // url hack (normalizer)
5470   StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
5471   opt->ftp_proxy = 1;           // proxy http pour ftp
5472   opt->convert_utf8 = 1;        // convert html to UTF-8
5473   StringCopy(opt->filelist, "");
5474   StringCopy(opt->lang_iso, "en, *");
5475   StringCopy(opt->accept,
5476     "text/html,image/png,image/jpeg,image/pjpeg,image/x-xbitmap,image/svg+xml,image/gif;q=0.9,*/*;q=0.1");
5477   StringCopy(opt->headers, "");
5478   StringCopy(opt->mimedefs, "\n");      // aucun filtre mime (\n IMPORTANT)
5479   StringClear(opt->mod_blacklist);
5480   //
5481   opt->log = stdout;
5482   opt->errlog = stderr;
5483   opt->flush = 1;               // flush sur les fichiers log
5484   //opt->aff_progress=0;
5485   opt->keyboard = 0;
5486   //
5487   StringCopy(opt->path_html, "");
5488   StringCopy(opt->path_html_utf8, "");
5489   StringCopy(opt->path_log, "");
5490   StringCopy(opt->path_bin, "");
5491   //
5492   opt->maxlink = 100000;        // 100,000 liens max par défaut
5493   opt->maxfilter = 200;         // 200 filtres max par défaut
5494   opt->maxcache = 1048576 * 32; // a peu près 32Mo en cache max -- OPTION NON PARAMETRABLE POUR L'INSTANT --
5495   //opt->maxcache_anticipate=256;  // maximum de liens à anticiper
5496   opt->maxtime = -1;            // temps max en secondes
5497   opt->maxrate = 25000;         // taux maxi
5498   opt->maxconn = 5.0;           // nombre connexions/s
5499   opt->waittime = -1;           // wait until.. hh*3600+mm*60+ss
5500   //
5501   opt->exec = "";
5502   opt->is_update = 0;           // not an update (yet)
5503   opt->dir_topindex = 0;        // do not built top index (yet)
5504   //
5505   opt->bypass_limits = 0;       // enforce limits by default
5506   opt->state.stop = 0;          // stopper
5507   opt->state.exit_xh = 0;       // abort
5508   //
5509   opt->state.is_ended = 0;
5510 
5511   /* Alocated buffers */
5512 
5513   opt->callbacks_fun =
5514     (t_hts_htmlcheck_callbacks *) malloct(sizeof(t_hts_htmlcheck_callbacks));
5515   memset(opt->callbacks_fun, 0, sizeof(t_hts_htmlcheck_callbacks));
5516 
5517   /* Preload callbacks : java and flash parser, and the automatic user-defined callback */
5518 
5519   {
5520     int i;
5521 
5522     for(i = 0; defaultModules[i] != NULL; i++) {
5523       int ret = plug_wrapper(opt, defaultModules[i], defaultModules[i]);
5524 
5525       if (ret == 0) {           /* Module aborted initialization */
5526         /* Ignored. */
5527       }
5528     }
5529   }
5530 
5531   return opt;
5532 }
5533 
hts_sizeof_opt(void)5534 HTSEXT_API size_t hts_sizeof_opt(void) {
5535   return sizeof(httrackp);
5536 }
5537 
hts_free_opt(httrackp * opt)5538 HTSEXT_API void hts_free_opt(httrackp * opt) {
5539   if (opt != NULL) {
5540 
5541     /* Alocated callbacks */
5542 
5543     if (opt->callbacks_fun != NULL) {
5544       int i;
5545       t_hts_htmlcheck_callbacks_item *items =
5546         (t_hts_htmlcheck_callbacks_item *) opt->callbacks_fun;
5547       const int size =
5548         (int) sizeof(t_hts_htmlcheck_callbacks) /
5549         sizeof(t_hts_htmlcheck_callbacks_item);
5550       assertf(sizeof(t_hts_htmlcheck_callbacks_item) * size ==
5551               sizeof(t_hts_htmlcheck_callbacks));
5552 
5553       /* Free all linked lists */
5554       for(i = 0; i < size; i++) {
5555         t_hts_callbackarg *carg, *next_carg;
5556 
5557         for(carg = items[i].carg;
5558             carg != NULL && (next_carg = carg->prev.carg, carg != NULL);
5559             carg = next_carg) {
5560           hts_free(carg);
5561         }
5562       }
5563 
5564       freet(opt->callbacks_fun);
5565       opt->callbacks_fun = NULL;
5566     }
5567 
5568     /* Close library handles */
5569     unplug_wrappers(opt);
5570 
5571     /* Cache */
5572     if (opt->state.dns_cache != NULL) {
5573       t_dnscache *root;
5574 
5575       hts_mutexlock(&opt->state.lock);
5576       root = opt->state.dns_cache;
5577       opt->state.dns_cache = NULL;
5578       hts_mutexrelease(&opt->state.lock);
5579 
5580       hts_cache_free(root);
5581     }
5582 
5583     /* Cancel chain */
5584     if (opt->state.cancel != NULL) {
5585       htsoptstatecancel *cancel;
5586 
5587       for(cancel = opt->state.cancel; cancel != NULL;) {
5588         htsoptstatecancel *next = cancel->next;
5589 
5590         if (cancel->url != NULL) {
5591           freet(cancel->url);
5592         }
5593         freet(cancel);
5594         cancel = next;
5595       }
5596       opt->state.cancel = NULL;
5597     }
5598 
5599     /* Free strings */
5600 
5601     StringFree(opt->proxy.name);
5602     StringFree(opt->proxy.bindhost);
5603 
5604     StringFree(opt->savename_userdef);
5605     StringFree(opt->user_agent);
5606     StringFree(opt->referer);
5607     StringFree(opt->from);
5608     StringFree(opt->lang_iso);
5609     StringFree(opt->sys_com);
5610     StringFree(opt->mimedefs);
5611     StringFree(opt->filelist);
5612     StringFree(opt->urllist);
5613     StringFree(opt->footer);
5614     StringFree(opt->mod_blacklist);
5615 
5616     StringFree(opt->path_html);
5617     StringFree(opt->path_html_utf8);
5618     StringFree(opt->path_log);
5619     StringFree(opt->path_bin);
5620 
5621     /* mutexes */
5622     hts_mutexfree(&opt->state.lock);
5623 
5624     /* Free structure */
5625     free(opt);
5626   }
5627 }
5628 
5629 // TEMPORARY - PUT THIS STRUCTURE INSIDE httrackp !
hts_get_stats(httrackp * opt)5630 const hts_stat_struct* hts_get_stats(httrackp * opt) {
5631   if (opt == NULL) {
5632     return NULL;
5633   }
5634 
5635   HTS_STAT.stat_nsocket = 0;
5636   HTS_STAT.stat_errors = fspc(opt, NULL, "error");
5637   HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
5638   HTS_STAT.stat_infos = fspc(opt, NULL, "info");
5639   HTS_STAT.nbk = 0;
5640   HTS_STAT.nb = 0;
5641 
5642   return &HTS_STAT;
5643 }
5644 
5645 // defaut wrappers
htsdefault_init(t_hts_callbackarg * carg)5646 static void __cdecl htsdefault_init(t_hts_callbackarg * carg) {
5647 }
htsdefault_uninit(t_hts_callbackarg * carg)5648 static void __cdecl htsdefault_uninit(t_hts_callbackarg * carg) {
5649   // hts_freevar();
5650 }
htsdefault_start(t_hts_callbackarg * carg,httrackp * opt)5651 static int __cdecl htsdefault_start(t_hts_callbackarg * carg, httrackp * opt) {
5652   return 1;
5653 }
htsdefault_chopt(t_hts_callbackarg * carg,httrackp * opt)5654 static int __cdecl htsdefault_chopt(t_hts_callbackarg * carg, httrackp * opt) {
5655   return 1;
5656 }
htsdefault_end(t_hts_callbackarg * carg,httrackp * opt)5657 static int __cdecl htsdefault_end(t_hts_callbackarg * carg, httrackp * opt) {
5658   return 1;
5659 }
htsdefault_preprocesshtml(t_hts_callbackarg * carg,httrackp * opt,char ** html,int * len,const char * url_adresse,const char * url_fichier)5660 static int __cdecl htsdefault_preprocesshtml(t_hts_callbackarg * carg,
5661                                              httrackp * opt, char **html,
5662                                              int *len, const char *url_adresse,
5663                                              const char *url_fichier) {
5664   return 1;
5665 }
htsdefault_postprocesshtml(t_hts_callbackarg * carg,httrackp * opt,char ** html,int * len,const char * url_adresse,const char * url_fichier)5666 static int __cdecl htsdefault_postprocesshtml(t_hts_callbackarg * carg,
5667                                               httrackp * opt, char **html,
5668                                               int *len, const char *url_adresse,
5669                                               const char *url_fichier) {
5670   return 1;
5671 }
htsdefault_checkhtml(t_hts_callbackarg * carg,httrackp * opt,char * html,int len,const char * url_adresse,const char * url_fichier)5672 static int __cdecl htsdefault_checkhtml(t_hts_callbackarg * carg,
5673                                         httrackp * opt, char *html, int len,
5674                                         const char *url_adresse,
5675                                         const char *url_fichier) {
5676   return 1;
5677 }
htsdefault_loop(t_hts_callbackarg * carg,httrackp * opt,lien_back * back,int back_max,int back_index,int lien_n,int lien_tot,int stat_time,hts_stat_struct * stats)5678 static int __cdecl htsdefault_loop(t_hts_callbackarg * carg, httrackp * opt, lien_back * back, int back_max, int back_index, int lien_n, int lien_tot, int stat_time, hts_stat_struct * stats) {        // appelé à chaque boucle de HTTrack
5679   return 1;
5680 }
htsdefault_query(t_hts_callbackarg * carg,httrackp * opt,const char * question)5681 static const char *__cdecl htsdefault_query(t_hts_callbackarg * carg,
5682                                             httrackp * opt,
5683                                             const char *question) {
5684   return "";
5685 }
htsdefault_query2(t_hts_callbackarg * carg,httrackp * opt,const char * question)5686 static const char *__cdecl htsdefault_query2(t_hts_callbackarg * carg,
5687                                              httrackp * opt,
5688                                              const char *question) {
5689   return "";
5690 }
htsdefault_query3(t_hts_callbackarg * carg,httrackp * opt,const char * question)5691 static const char *__cdecl htsdefault_query3(t_hts_callbackarg * carg,
5692                                              httrackp * opt,
5693                                              const char *question) {
5694   return "";
5695 }
htsdefault_check(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * fil,int status)5696 static int __cdecl htsdefault_check(t_hts_callbackarg * carg, httrackp * opt,
5697                                     const char *adr, const char *fil,
5698                                     int status) {
5699   return -1;
5700 }
htsdefault_check_mime(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * fil,const char * mime,int status)5701 static int __cdecl htsdefault_check_mime(t_hts_callbackarg * carg,
5702                                          httrackp * opt, const char *adr,
5703                                          const char *fil, const char *mime,
5704                                          int status) {
5705   return -1;
5706 }
htsdefault_pause(t_hts_callbackarg * carg,httrackp * opt,const char * lockfile)5707 static void __cdecl htsdefault_pause(t_hts_callbackarg * carg, httrackp * opt,
5708                                      const char *lockfile) {
5709   while(fexist(lockfile)) {
5710     Sleep(1000);
5711   }
5712 }
htsdefault_filesave(t_hts_callbackarg * carg,httrackp * opt,const char * file)5713 static void __cdecl htsdefault_filesave(t_hts_callbackarg * carg,
5714                                         httrackp * opt, const char *file) {
5715 }
htsdefault_filesave2(t_hts_callbackarg * carg,httrackp * opt,const char * adr,const char * file,const char * sav,int is_new,int is_modified,int not_updated)5716 static void __cdecl htsdefault_filesave2(t_hts_callbackarg * carg,
5717                                          httrackp * opt, const char *adr,
5718                                          const char *file, const char *sav,
5719                                          int is_new, int is_modified,
5720                                          int not_updated) {
5721 }
htsdefault_linkdetected(t_hts_callbackarg * carg,httrackp * opt,char * link)5722 static int __cdecl htsdefault_linkdetected(t_hts_callbackarg * carg,
5723                                            httrackp * opt, char *link) {
5724   return 1;
5725 }
htsdefault_linkdetected2(t_hts_callbackarg * carg,httrackp * opt,char * link,const char * start_tag)5726 static int __cdecl htsdefault_linkdetected2(t_hts_callbackarg * carg,
5727                                             httrackp * opt, char *link,
5728                                             const char *start_tag) {
5729   return 1;
5730 }
htsdefault_xfrstatus(t_hts_callbackarg * carg,httrackp * opt,lien_back * back)5731 static int __cdecl htsdefault_xfrstatus(t_hts_callbackarg * carg,
5732                                         httrackp * opt, lien_back * back) {
5733   return 1;
5734 }
htsdefault_savename(t_hts_callbackarg * carg,httrackp * opt,const char * adr_complete,const char * fil_complete,const char * referer_adr,const char * referer_fil,char * save)5735 static int __cdecl htsdefault_savename(t_hts_callbackarg * carg, httrackp * opt,
5736                                        const char *adr_complete,
5737                                        const char *fil_complete,
5738                                        const char *referer_adr,
5739                                        const char *referer_fil, char *save) {
5740   return 1;
5741 }
htsdefault_sendhead(t_hts_callbackarg * carg,httrackp * opt,char * buff,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * outgoing)5742 static int __cdecl htsdefault_sendhead(t_hts_callbackarg * carg, httrackp * opt,
5743                                        char *buff, const char *adr,
5744                                        const char *fil, const char *referer_adr,
5745                                        const char *referer_fil,
5746                                        htsblk * outgoing) {
5747   return 1;
5748 }
htsdefault_receivehead(t_hts_callbackarg * carg,httrackp * opt,char * buff,const char * adr,const char * fil,const char * referer_adr,const char * referer_fil,htsblk * incoming)5749 static int __cdecl htsdefault_receivehead(t_hts_callbackarg * carg,
5750                                           httrackp * opt, char *buff,
5751                                           const char *adr, const char *fil,
5752                                           const char *referer_adr,
5753                                           const char *referer_fil,
5754                                           htsblk * incoming) {
5755   return 1;
5756 }
htsdefault_detect(t_hts_callbackarg * carg,httrackp * opt,htsmoduleStruct * str)5757 static int __cdecl htsdefault_detect(t_hts_callbackarg * carg, httrackp * opt,
5758                                      htsmoduleStruct * str) {
5759   return 0;
5760 }
htsdefault_parse(t_hts_callbackarg * carg,httrackp * opt,htsmoduleStruct * str)5761 static int __cdecl htsdefault_parse(t_hts_callbackarg * carg, httrackp * opt,
5762                                     htsmoduleStruct * str) {
5763   return 0;
5764 }
5765 
5766 /* Default internal dummy callbacks */
5767 const t_hts_htmlcheck_callbacks default_callbacks = {
5768   {htsdefault_init, NULL},
5769   {htsdefault_uninit, NULL},
5770   {htsdefault_start, NULL},
5771   {htsdefault_end, NULL},
5772   {htsdefault_chopt, NULL},
5773   {htsdefault_preprocesshtml, NULL},
5774   {htsdefault_postprocesshtml, NULL},
5775   {htsdefault_checkhtml, NULL},
5776   {htsdefault_query, NULL},
5777   {htsdefault_query2, NULL},
5778   {htsdefault_query3, NULL},
5779   {htsdefault_loop, NULL},
5780   {htsdefault_check, NULL},
5781   {htsdefault_check_mime, NULL},
5782   {htsdefault_pause, NULL},
5783   {htsdefault_filesave, NULL},
5784   {htsdefault_filesave2, NULL},
5785   {htsdefault_linkdetected, NULL},
5786   {htsdefault_linkdetected2, NULL},
5787   {htsdefault_xfrstatus, NULL},
5788   {htsdefault_savename, NULL},
5789   {htsdefault_sendhead, NULL},
5790   {htsdefault_receivehead, NULL},
5791   {htsdefault_detect, NULL},
5792   {htsdefault_parse, NULL}
5793 };
5794 
5795 #define CALLBACK_OP(CB, NAME, OPERATION, S, FUN) do {   \
5796   if (strcmp(NAME, S) == 0) {                           \
5797     OPERATION(t_hts_htmlcheck_ ##FUN, (CB)->FUN.fun);   \
5798   }                                                     \
5799 } while(0)
5800 
5801 #define DISPATCH_CALLBACK(CB, NAME, OPERATION) do { \
5802   CALLBACK_OP(CB, NAME, OPERATION, "init", init); \
5803   CALLBACK_OP(CB, NAME, OPERATION, "free", uninit); \
5804   CALLBACK_OP(CB, NAME, OPERATION, "start", start); \
5805   CALLBACK_OP(CB, NAME, OPERATION, "end", end); \
5806   CALLBACK_OP(CB, NAME, OPERATION, "change-options", chopt); \
5807   CALLBACK_OP(CB, NAME, OPERATION, "preprocess-html", preprocess); \
5808   CALLBACK_OP(CB, NAME, OPERATION, "postprocess-html", postprocess); \
5809   CALLBACK_OP(CB, NAME, OPERATION, "check-html", check_html); \
5810   CALLBACK_OP(CB, NAME, OPERATION, "query", query); \
5811   CALLBACK_OP(CB, NAME, OPERATION, "query2", query2); \
5812   CALLBACK_OP(CB, NAME, OPERATION, "query3", query3); \
5813   CALLBACK_OP(CB, NAME, OPERATION, "loop", loop); \
5814   CALLBACK_OP(CB, NAME, OPERATION, "check-link", check_link); \
5815   CALLBACK_OP(CB, NAME, OPERATION, "check-mime", check_mime); \
5816   CALLBACK_OP(CB, NAME, OPERATION, "pause", pause); \
5817   CALLBACK_OP(CB, NAME, OPERATION, "save-file", filesave); \
5818   CALLBACK_OP(CB, NAME, OPERATION, "save-file2", filesave2); \
5819   CALLBACK_OP(CB, NAME, OPERATION, "link-detected", linkdetected); \
5820   CALLBACK_OP(CB, NAME, OPERATION, "link-detected2", linkdetected2); \
5821   CALLBACK_OP(CB, NAME, OPERATION, "transfer-status", xfrstatus); \
5822   CALLBACK_OP(CB, NAME, OPERATION, "save-name", savename); \
5823   CALLBACK_OP(CB, NAME, OPERATION, "send-header", sendhead); \
5824   CALLBACK_OP(CB, NAME, OPERATION, "receive-header", receivehead); \
5825 } while(0)
5826 
hts_set_callback(t_hts_htmlcheck_callbacks * callbacks,const char * name,void * function)5827 int hts_set_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name,
5828                      void *function) {
5829   int error = 1;
5830 #define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
5831     FUNCTION = (TYPE) function;                 \
5832     error = 0;                                  \
5833   } while(0)
5834   DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
5835 #undef CALLBACK_OPERATION
5836   return error;
5837 }
5838 
hts_get_callback(t_hts_htmlcheck_callbacks * callbacks,const char * name)5839 void *hts_get_callback(t_hts_htmlcheck_callbacks * callbacks, const char *name) {
5840 #define CALLBACK_OPERATION(TYPE, FUNCTION) do { \
5841     return (void*) FUNCTION;                    \
5842   } while(0)
5843   DISPATCH_CALLBACK(callbacks, name, CALLBACK_OPERATION);
5844 #undef CALLBACK_OPERATION
5845   return NULL;
5846 }
5847 
5848 // end defaut wrappers
5849 
5850 /* libc stubs */
5851 
hts_strdup(const char * str)5852 HTSEXT_API char *hts_strdup(const char *str) {
5853   return strdup(str);
5854 }
5855 
hts_malloc(size_t size)5856 HTSEXT_API void *hts_malloc(size_t size) {
5857   return malloc(size);
5858 }
5859 
hts_realloc(void * const data,const size_t size)5860 HTSEXT_API void *hts_realloc(void *const data, const size_t size) {
5861   return realloc(data, size);
5862 }
5863 
hts_free(void * data)5864 HTSEXT_API void hts_free(void *data) {
5865   free(data);
5866 }
5867 
5868 /* Dummy functions */
hts_resetvar(void)5869 HTSEXT_API int hts_resetvar(void) {
5870   return 0;
5871 }
5872 
5873 #ifdef _WIN32
5874 
5875 typedef struct dirent dirent;
opendir(const char * name)5876 DIR *opendir(const char *name) {
5877   WIN32_FILE_ATTRIBUTE_DATA st;
5878   DIR *dir;
5879   size_t len;
5880   int i;
5881 
5882   if (name == NULL || *name == '\0') {
5883     errno = ENOENT;
5884     return NULL;
5885   }
5886   if (!GetFileAttributesEx(name, GetFileExInfoStandard, &st)
5887       || (st.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) {
5888     errno = ENOENT;
5889     return NULL;
5890   }
5891   dir = calloc(sizeof(DIR), 1);
5892   if (dir == NULL) {
5893     errno = ENOMEM;
5894     return NULL;
5895   }
5896   len = strlen(name);
5897   dir->h = INVALID_HANDLE_VALUE;
5898   dir->name = malloc(len + 2 + 1);
5899   strcpy(dir->name, name);
5900   for(i = 0; dir->name[i] != '\0'; i++) {
5901     if (dir->name[i] == '/') {
5902       dir->name[i] = '\\';
5903     }
5904   }
5905   strcat(dir->name, "\\*");
5906   return dir;
5907 }
5908 
readdir(DIR * dir)5909 struct dirent *readdir(DIR * dir) {
5910   WIN32_FIND_DATAA find;
5911 
5912   if (dir->h == INVALID_HANDLE_VALUE) {
5913     dir->h = FindFirstFileA(dir->name, &find);
5914   } else {
5915     if (!FindNextFile(dir->h, &find)) {
5916       FindClose(dir->h);
5917       dir->h = INVALID_HANDLE_VALUE;
5918     }
5919   }
5920   if (dir->h != INVALID_HANDLE_VALUE) {
5921     dir->entry.d_name[0] = 0;
5922     strncat(dir->entry.d_name, find.cFileName, HTS_DIRENT_SIZE - 1);
5923     return &dir->entry;
5924   }
5925   errno = ENOENT;
5926   return NULL;
5927 }
5928 
closedir(DIR * dir)5929 int closedir(DIR * dir) {
5930   if (dir != NULL) {
5931     if (dir->h != INVALID_HANDLE_VALUE) {
5932       CloseHandle(dir->h);
5933     }
5934     if (dir->name != NULL) {
5935       free(dir->name);
5936     }
5937     free(dir);
5938     return 0;
5939   }
5940   errno = EBADF;
5941   return -1;
5942 }
5943 
5944 // UTF-8 aware FILE API
5945 
copyWchar(LPWSTR dest,const char * src)5946 static void copyWchar(LPWSTR dest, const char *src) {
5947   int i;
5948 
5949   for(i = 0; src[i]; i++) {
5950     dest[i] = src[i];
5951   }
5952   dest[i] = '\0';
5953 }
5954 
hts_fopen_utf8(const char * path,const char * mode)5955 FILE *hts_fopen_utf8(const char *path, const char *mode) {
5956   WCHAR wmode[32];
5957   LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5958 
5959   assertf(strlen(mode) < sizeof(wmode) / sizeof(WCHAR));
5960   copyWchar(wmode, mode);
5961   if (wpath != NULL) {
5962     FILE *const fp = _wfopen(wpath, wmode);
5963 
5964     free(wpath);
5965     return fp;
5966   } else {
5967     // Fallback on conversion error.
5968     return fopen(path, mode);
5969   }
5970 }
5971 
hts_stat_utf8(const char * path,STRUCT_STAT * buf)5972 int hts_stat_utf8(const char *path, STRUCT_STAT * buf) {
5973   LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5974 
5975   if (wpath != NULL) {
5976     const int result = _wstat(wpath, buf);
5977 
5978     free(wpath);
5979     return result;
5980   } else {
5981     // Fallback on conversion error.
5982     return _stat(path, buf);
5983   }
5984 }
5985 
hts_unlink_utf8(const char * path)5986 int hts_unlink_utf8(const char *path) {
5987   LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
5988 
5989   if (wpath != NULL) {
5990     const int result = _wunlink(wpath);
5991 
5992     free(wpath);
5993     return result;
5994   } else {
5995     // Fallback on conversion error.
5996     return _unlink(path);
5997   }
5998 }
5999 
hts_rename_utf8(const char * oldpath,const char * newpath)6000 int hts_rename_utf8(const char *oldpath, const char *newpath) {
6001   LPWSTR woldpath =
6002     hts_convertUTF8StringToUCS2(oldpath, (int) strlen(oldpath), NULL);
6003   LPWSTR wnewpath =
6004     hts_convertUTF8StringToUCS2(newpath, (int) strlen(newpath), NULL);
6005   if (woldpath != NULL && wnewpath != NULL) {
6006     const int result = _wrename(woldpath, wnewpath);
6007 
6008     free(woldpath);
6009     free(wnewpath);
6010     return result;
6011   } else {
6012     if (woldpath != NULL)
6013       free(woldpath);
6014     if (wnewpath != NULL)
6015       free(wnewpath);
6016     // Fallback on conversion error.
6017     return rename(oldpath, newpath);
6018   }
6019 }
6020 
hts_mkdir_utf8(const char * path)6021 int hts_mkdir_utf8(const char *path) {
6022   LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
6023 
6024   if (wpath != NULL) {
6025     const int result = _wmkdir(wpath);
6026 
6027     free(wpath);
6028     return result;
6029   } else {
6030     // Fallback on conversion error.
6031     return _mkdir(path);
6032   }
6033 }
6034 
hts_utime_utf8(const char * path,const STRUCT_UTIMBUF * times)6035 HTSEXT_API int hts_utime_utf8(const char *path, const STRUCT_UTIMBUF * times) {
6036   STRUCT_UTIMBUF mtimes = *times;
6037   LPWSTR wpath = hts_convertUTF8StringToUCS2(path, (int) strlen(path), NULL);
6038 
6039   if (wpath != NULL) {
6040     const int result = _wutime(wpath, &mtimes);
6041 
6042     free(wpath);
6043     return result;
6044   } else {
6045     // Fallback on conversion error.
6046     return _utime(path, &mtimes);
6047   }
6048 }
6049 
6050 #endif
6051 
6052 // Fin
6053