1 /* ------------------------------------------------------------ */
2 /*
3 HTTrack Website Copier, Offline Browser for Windows and Unix
4 Copyright (C) 1998-2017 Xavier Roche and other contributors
5 
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 
19 Important notes:
20 
21 - We hereby ask people using this source NOT to use it in purpose of grabbing
22 emails addresses, or collecting any other private information on persons.
23 This would disgrace our work, and spoil the many hours we spent on it.
24 
25 Please visit our Website: http://www.httrack.com
26 */
27 
28 /* ------------------------------------------------------------ */
29 /* File: httrack.c subroutines:                                 */
30 /*       savename routine (compute output filename)             */
31 /* Author: Xavier Roche                                         */
32 /* ------------------------------------------------------------ */
33 
34 /* Internal engine bytecode */
35 #define HTS_INTERNAL_BYTECODE
36 
37 #include "htscore.h"
38 #include "htsname.h"
39 #include "md5.h"
40 #include "htsmd5.h"
41 #include "htstools.h"
42 #include "htscharset.h"
43 #include "htsencoding.h"
44 #include <ctype.h>
45 
46 #define ADD_STANDARD_PATH \
47     {  /* ajout nom */\
48       char BIGSTK buff[HTS_URLMAXSIZE*2];\
49       buff[0]='\0';\
50       strncatbuff(buff,start_pos,nom_pos - start_pos);\
51       url_savename_addstr(afs->save, buff);\
52     }
53 
54 #define ADD_STANDARD_NAME(shortname) \
55     {  /* ajout nom */\
56       char BIGSTK buff[HTS_URLMAXSIZE*2];\
57       standard_name(buff,dot_pos,nom_pos,fil_complete,(shortname));\
58       url_savename_addstr(afs->save, buff);\
59     }
60 
61 /* Avoid stupid DOS system folders/file such as 'nul' */
62 /* Based on linux/fs/umsdos/mangle.c */
63 static const char *hts_tbdev[] = {
64   "/prn", "/con", "/aux", "/nul",
65   "/lpt1", "/lpt2", "/lpt3", "/lpt4",
66   "/com1", "/com2", "/com3", "/com4",
67   "/clock$",
68   "/emmxxxx0", "/xmsxxxx0", "/setverxx",
69   ""
70 };
71 
72 #define URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET() do { \
73   int prev = opt->state._hts_in_html_parsing; \
74   while(back_pluggable_sockets_strict(sback, opt) <= 0) { \
75     opt->state. _hts_in_html_parsing = 6; \
76     /* Wait .. */ \
77     back_wait(sback,opt,cache,0); \
78     /* Transfer rate */ \
79     engine_stats(); \
80     /* Refresh various stats */ \
81     HTS_STAT.stat_nsocket=back_nsoc(sback); \
82     HTS_STAT.stat_errors=fspc(opt,NULL,"error"); \
83     HTS_STAT.stat_warnings=fspc(opt,NULL,"warning"); \
84     HTS_STAT.stat_infos=fspc(opt,NULL,"info"); \
85     HTS_STAT.nbk=backlinks_done(sback,opt->liens,opt->lien_tot,ptr); \
86     HTS_STAT.nb=back_transferred(HTS_STAT.stat_bytes,sback); \
87     /* Check */ \
88     { \
89       if (!RUN_CALLBACK7(opt, loop, sback->lnk, sback->count,-1,ptr,opt->lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) { \
90         return -1; \
91       } \
92     } \
93   } \
94   opt->state._hts_in_html_parsing = prev; \
95 } while(0)
96 
97 /* Strip all // */
cleanDoubleSlash(char * s)98 static void cleanDoubleSlash(char *s) {
99   int i, j;
100 
101   for(i = 0, j = 0; s[i] != '\0'; i++) {
102     if (s[i] == '/' && i != 0 && s[i - 1] == '/') {
103       continue;
104     }
105     if (i != j) {
106       s[j] = s[i];
107     }
108     j++;
109   }
110   // terminating \0
111   if (i != j) {
112     s[j] = s[i];
113   }
114 }
115 
116 /* Strip all ending . or ' ' (windows-forbidden) */
cleanEndingSpaceOrDot(char * s)117 static void cleanEndingSpaceOrDot(char *s) {
118   int i, j, lastWriteEnd;
119 
120   for(i = 0, j = 0, lastWriteEnd = 0; i == 0 || s[i - 1] != '\0'; i++) {
121     if (s[i] == '/' || s[i] == '\0') {
122       // Last write was not good, revert
123       if (j != lastWriteEnd) {
124         j = lastWriteEnd;
125       }
126     }
127 
128     if (i != j) {
129       s[j] = s[i];
130     }
131     j++;
132 
133     // Commit good candidate for terminating character
134     if (s[i] != ' ' && s[i] != '.') {
135       lastWriteEnd = j;
136     }
137   }
138 }
139 
140 // forme le nom du fichier à sauver (save) à partir de fil et adr
141 // système intelligent, qui renomme en cas de besoin (exemple: deux INDEX.HTML et index.html)
url_savename(lien_adrfilsave * const afs,lien_adrfil * const former,const char * referer_adr,const char * referer_fil,httrackp * opt,struct_back * sback,cache_back * cache,hash_struct * hash,int ptr,int numero_passe,const lien_back * headers)142 int url_savename(lien_adrfilsave *const afs,
143                  lien_adrfil *const former,
144                  const char *referer_adr, const char *referer_fil,
145                  httrackp * opt, struct_back * sback, cache_back * cache,
146                  hash_struct * hash, int ptr, int numero_passe,
147                  const lien_back * headers) {
148   char catbuff[CATBUFF_SIZE];
149   const int is_redirect = headers != NULL && HTTP_IS_REDIRECT(headers->r.statuscode);
150   const char *mime_type = headers != NULL && !is_redirect ? headers->r.contenttype : NULL;
151   /*const char* mime_type = ( headers && HTTP_IS_OK(headers->r.statuscode) ) ? headers->r.contenttype : NULL; */
152   lien_back *const back = sback->lnk;
153 
154   /* */
155   char BIGSTK fil[HTS_URLMAXSIZE * 2];       /* ="" */
156 
157   const char *const adr_complete = afs->af.adr;
158   const char *const fil_complete = afs->af.fil;
159 
160   /*char BIGSTK normadr_[HTS_URLMAXSIZE*2]; */
161   char BIGSTK normadr_[HTS_URLMAXSIZE * 2], normfil_[HTS_URLMAXSIZE * 2];
162   enum { PROTOCOL_HTTP, PROTOCOL_HTTPS, PROTOCOL_FTP, PROTOCOL_FILE,
163       PROTOCOL_UNKNOWN };
164   static const char *protocol_str[] =
165     { "http", "https", "ftp", "file", "unknown" };
166   int protocol = PROTOCOL_HTTP;
167   const char *const adr = jump_identification_const(adr_complete);
168   // copy of fil, used for lookups (see urlhack)
169   const char *normadr = adr;
170   const char *normfil = fil_complete;
171   const char *const print_adr = jump_protocol_const(adr);
172   const char *start_pos = NULL, *nom_pos = NULL, *dot_pos = NULL;     // Position nom et point
173 
174   // pour changement d'extension ou de nom (content-disposition)
175   int ext_chg = 0, ext_chg_delayed = 0;
176   int is_html = 0;
177   char ext[256];
178   int max_char = 0;
179 
180   //CLEAR
181   fil[0] = ext[0] = '\0';
182   afs->save[0] = '\0';
183 
184   /* 8-3 ? */
185   switch (opt->savename_83) {
186   case 1:                      // 8-3
187     max_char = 8;
188     break;
189   case 2:                      // Level 2 File names may be up to 31 characters.
190     max_char = 31;
191     break;
192   default:
193     max_char = 8;
194     break;
195   }
196 
197   // normalize the URL:
198   // www.foo.com -> foo.com
199   // www-42.foo.com -> foo.com
200   // foo.com/bar//foobar -> foo.com/bar/foobar
201   if (opt->urlhack) {
202     // copy of adr (without protocol), used for lookups (see urlhack)
203     normadr = adr_normalized(adr, normadr_);
204     normfil = fil_normalized(fil_complete, normfil_);
205   } else {
206     if (link_has_authority(adr_complete)) {     // https or other protocols : in "http/" subfolder
207       char *pos = strchr(adr_complete, ':');
208 
209       if (pos != NULL) {
210         normadr_[0] = '\0';
211         strncatbuff(normadr_, adr_complete, (int) (pos - adr_complete));
212         strcatbuff(normadr_, "://");
213         strcatbuff(normadr_, normadr);
214         normadr = normadr_;
215       }
216     }
217   }
218 
219   // à afficher sans ftp://
220   if (strfield(adr_complete, "https:")) {
221     protocol = PROTOCOL_HTTPS;
222   } else if (strfield(adr_complete, "ftp:")) {
223     protocol = PROTOCOL_FTP;
224   } else if (strfield(adr_complete, "file:")) {
225     protocol = PROTOCOL_FILE;
226   } else {
227     protocol = PROTOCOL_HTTP;
228   }
229 
230   // court-circuit pour lien primaire
231   if (strnotempty(adr) == 0) {
232     if (strcmp(fil_complete, "primary") == 0) {
233       strcatbuff(afs->save, "primary.html");
234       return 0;
235     }
236   }
237 
238   /* Declare adr (IDNA-decoded if necessary) */
239 #define DECLARE_ADR(FINAL_ADR) \
240   char *idna_adr =\
241     /* http or https */\
242     (\
243     protocol == PROTOCOL_HTTP\
244     || protocol == PROTOCOL_HTTPS \
245     )\
246     /* and contains IDNA */\
247     && hts_isStringIDNA(adr_complete, strlen(print_adr))\
248     ? hts_convertStringIDNAToUTF8(print_adr, strlen(print_adr))\
249     : NULL;\
250   const char *const FINAL_ADR = idna_adr != NULL \
251     ? idna_adr : ( protocol == PROTOCOL_FILE ? "file" : print_adr )
252 
253   /* Release adr */
254 #define RELEASE_ADR() do {\
255   if (idna_adr != NULL) {\
256     free(idna_adr);\
257     idna_adr = NULL;\
258   }\
259 } while(0)
260 
261   // vérifier que le nom n'a pas déja été calculé (si oui le renvoyer tel que)
262   // vérifier que le nom n'est pas déja pris...
263   // NOTE: si on cherche /toto/ et que /toto est trouvé on le prend (et réciproquqment) ** // **
264   if (opt->liens != NULL) {
265     int i;
266 
267     i = hash_read(hash, normadr, normfil, HASH_STRUCT_ADR_PATH);     // recherche table 1 (adr+fil)
268     if (i >= 0) {               // ok, trouvé
269       strcpybuff(afs->save, heap(i)->sav);
270       return 0;
271     }
272     i = hash_read(hash, normadr, normfil, HASH_STRUCT_ORIGINAL_ADR_PATH);     // recherche table 2 (former->adr+former->fil)
273     if (i >= 0) {               // ok, trouvé
274       // copier location moved!
275       strcpybuff(afs->af.adr, heap(i)->adr);
276       strcpybuff(afs->af.fil, heap(i)->fil);
277       // et save
278       strcpybuff(afs->save, heap(i)->sav);  // copier (formé à partir du nouveau lien!)
279       return 0;
280     }
281     // chercher sans / ou avec / dans former
282     {
283       char BIGSTK fil_complete_patche[HTS_URLMAXSIZE * 2];
284 
285       strcpybuff(fil_complete_patche, normfil);
286       // Version avec ou sans /
287       if (fil_complete_patche[strlen(fil_complete_patche) - 1] == '/')
288         fil_complete_patche[strlen(fil_complete_patche) - 1] = '\0';
289       else
290         strcatbuff(fil_complete_patche, "/");
291       i = hash_read(hash, normadr, fil_complete_patche, HASH_STRUCT_ORIGINAL_ADR_PATH);       // recherche table 2 (former->adr+former->fil)
292       if (i >= 0) {
293         // écraser fil et adr (pas former->fil?????)
294         strcpybuff(afs->af.adr, heap(i)->adr);
295         strcpybuff(afs->af.fil, heap(i)->fil);
296         // écrire save
297         strcpybuff(afs->save, heap(i)->sav);
298         return 0;
299       }
300     }
301   }
302   // vérifier la non présence de paramètres dans le nom de fichier
303   // si il y en a, les supprimer (ex: truc.cgi?subj=aspirateur)
304   // néanmoins, gardé pour vérifier la non duplication (voir après)
305   {
306     char *a;
307 
308     a = strchr(fil_complete, '?');
309     if (a != NULL) {
310       strncatbuff(fil, fil_complete, a - fil_complete);
311     } else {
312       strcpybuff(fil, fil_complete);
313     }
314   }
315 
316   // decode remaining % (normally not necessary; already done in htsparse.c)
317   // this will NOT decode buggy %xx (ie. not UTF-8) ones
318   if (hts_unescapeUrl(fil, catbuff, sizeof(catbuff)) == 0) {
319     strcpybuff(fil, catbuff);
320   } else {
321     hts_log_print(opt, LOG_WARNING,
322       "could not URL-decode string '%s'", fil);
323   }
324 
325   /* replace shtml to html.. */
326   if (opt->savename_delayed == 2)
327     is_html = -1;               /* ALWAYS delay type */
328   else
329     is_html = ishtml(opt, fil);
330   switch (is_html) {            /* .html,.shtml,.. */
331   case 1:
332     if ((strfield2(get_ext(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil), "html") == 0)
333         && (strfield2(get_ext(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), fil), "htm") == 0)
334       ) {
335       strcpybuff(ext, "html");
336       ext_chg = 1;
337     }
338     break;
339   case 0:
340     if (!strnotempty(ext)) {
341       if (is_userknowntype(opt, fil)) { // mime known by user
342         char BIGSTK mime[1024];
343 
344         mime[0] = ext[0] = '\0';
345         get_userhttptype(opt, mime, fil);
346         if (strnotempty(mime)) {
347           give_mimext(ext, mime);
348           if (strnotempty(ext)) {
349             ext_chg = 1;
350           }
351         }
352       }
353     }
354     break;
355   }
356 
357   // si option check_type activée
358   if (is_html < 0 && opt->check_type && !ext_chg) {
359     int ishtest = 0;
360 
361     if (protocol != PROTOCOL_FILE
362         && protocol != PROTOCOL_FTP
363       ) {
364       // tester type avec requète HEAD si on ne connait pas le type du fichier
365       if (!((opt->check_type == 1) && (fil[strlen(fil) - 1] == '/')))   // slash doit être html?
366         if (opt->savename_delayed == 2 || (ishtest = ishtml(opt, fil)) < 0) {   // on ne sait pas si c'est un html ou un fichier..
367           // lire dans le cache
368           htsblk r = cache_read_including_broken(opt, cache, adr, fil); // test uniquement
369 
370           if (r.statuscode != -1) {     // pas d'erreur de lecture cache
371             char s[32];
372 
373             s[0] = '\0';
374             hts_log_print(opt, LOG_DEBUG, "Testing link type (from cache) %s%s",
375                           adr_complete, fil_complete);
376             if (!HTTP_IS_REDIRECT(r.statuscode)) {
377               if (strnotempty(r.cdispo)) {        /* filename given */
378                 ext_chg = 2;      /* change filename */
379                 strcpybuff(ext, r.cdispo);
380               } else if (!may_unknown2(opt, r.contenttype, fil)) {        // on peut patcher à priori?
381                 give_mimext(s, r.contenttype);    // obtenir extension
382                 if (strnotempty(s) > 0) { // on a reconnu l'extension
383                   ext_chg = 1;
384                   strcpybuff(ext, s);
385                 }
386               }
387             }
388 #ifdef DEFAULT_BIN_EXT
389             // no extension and potentially bogus
390             else if (ishtest == -2) {
391               ext_chg = 1;
392               strcpybuff(ext, DEFAULT_BIN_EXT + 1);
393             }
394 #endif
395             //
396           } else if (opt->savename_delayed != 2 && is_userknowntype(opt, fil)) {        /* PATCH BY BRIAN SCHRÖDER.
397                                                                                            Lookup mimetype not only by extension,
398                                                                                            but also by filename */
399             /* Note: "foo.cgi => text/html" means that foo.cgi shall have the text/html MIME file type,
400                that is, ".html" */
401             char BIGSTK mime[1024];
402 
403             mime[0] = ext[0] = '\0';
404             get_userhttptype(opt, mime, fil);
405             if (strnotempty(mime)) {
406               give_mimext(ext, mime);
407               if (strnotempty(ext)) {
408                 ext_chg = 1;
409               }
410             }
411           }
412           // note: if savename_delayed is enabled, the naming will be temporary (and slightly invalid!)
413           // note: if we are about to stop (opt->state.stop), back_add() will fail later
414           else if (opt->savename_delayed != 0 && !opt->state.stop) {
415             // Check if the file is ready in backing. We basically take the same logic as later.
416             // FIXME: we should cleanup and factorize this unholy mess
417             if (headers != NULL && headers->status >= 0 && !is_redirect) {
418               if (strnotempty(headers->r.cdispo)) {        /* filename given */
419                 ext_chg = 2;      /* change filename */
420                 strcpybuff(ext, headers->r.cdispo);
421               } else if (!may_unknown2(opt, headers->r.contenttype, headers->url_fil)) {    // on peut patcher à priori? (pas interdit ou pas de type)
422                 char s[16];
423                 s[0] = '\0';
424                 give_mimext(s, headers->r.contenttype);    // obtenir extension
425                 if (strnotempty(s) > 0) { // on a reconnu l'extension
426                   ext_chg = 1;
427                   strcpybuff(ext, s);
428                 }
429               }
430             }
431             else if (mime_type != NULL) {
432               ext[0] = '\0';
433               if (*mime_type) {
434                 give_mimext(ext, mime_type);
435               }
436               if (strnotempty(ext)) {
437                 char mime_from_file[128];
438 
439                 mime_from_file[0] = 0;
440                 get_httptype(opt, mime_from_file, fil, 1);
441                 if (!strnotempty(mime_from_file) || strcasecmp(mime_type, mime_from_file) != 0) {       /* different mime for this type */
442                   /* type change not forbidden (or no extension at all) */
443                   if (!may_unknown2(opt, mime_type, fil)) {
444                     ext_chg = 1;
445                   }
446 #ifdef DEFAULT_BIN_EXT
447                   // no extension and potentially bogus
448                   else if (ishtml(opt, fil) == -2) {
449                     ext_chg = 1;
450                     strcpybuff(ext, DEFAULT_BIN_EXT + 1);
451                   }
452 #endif
453                 } else {
454                   ext_chg = 0;
455                 }
456               }
457             } else {
458               /* Avoid collisions (no collisionning detection) */
459               sprintf(ext, "%x.%s", opt->state.delayedId++, DELAYED_EXT);
460               ext_chg = 1;
461               ext_chg_delayed = 1;      /* due to naming system */
462             }
463           }
464           // test imposible dans le cache, faire une requête
465           else {
466             //
467             int hihp = opt->state._hts_in_html_parsing;
468             int has_been_moved = 0;
469             lien_adrfil current;
470 
471             /* Ensure we don't use too many sockets by using a "testing" one
472                If we have only 1 simultaneous connection authorized, wait for pending download
473                Wait for an available slot
474              */
475             URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
476 
477             /* Rock'in */
478             current.adr[0] = current.fil[0] = '\0';
479             opt->state._hts_in_html_parsing = 2;        // test
480             hts_log_print(opt, LOG_DEBUG, "Testing link type %s%s",
481                           adr_complete, fil_complete);
482             strcpybuff(current.adr, adr_complete);
483             strcpybuff(current.fil, fil_complete);
484             // ajouter dans le backing le fichier en mode test
485             // savename: rien car en mode test
486             if (back_add
487                 (sback, opt, cache, current.adr, current.fil, BACK_ADD_TEST,
488                  referer_adr, referer_fil, 1) != -1) {
489               int b;
490 
491               b = back_index(opt, sback, current.adr, current.fil, BACK_ADD_TEST);
492               if (b >= 0) {
493                 int stop_looping = 0;
494                 int petits_tours = 0;
495                 int get_test_request = 0;       // en cas de bouclage sur soi même avec HEAD, tester avec GET.. parfois c'est la cause des problèmes
496 
497                 do {
498                   // temps à attendre, et remplir autant que l'on peut le cache (backing)
499                   if (back[b].status > 0) {
500                     back_wait(sback, opt, cache, 0);
501                   }
502                   if (ptr >= 0) {
503                     back_fillmax(sback, opt, cache, ptr, numero_passe);
504                   }
505                   // on est obligé d'appeler le shell pour le refresh..
506                   // Transfer rate
507                   engine_stats();
508 
509                   // Refresh various stats
510                   HTS_STAT.stat_nsocket = back_nsoc(sback);
511                   HTS_STAT.stat_errors = fspc(opt, NULL, "error");
512                   HTS_STAT.stat_warnings = fspc(opt, NULL, "warning");
513                   HTS_STAT.stat_infos = fspc(opt, NULL, "info");
514                   HTS_STAT.nbk = backlinks_done(sback, opt->liens, opt->lien_tot, ptr);
515                   HTS_STAT.nb = back_transferred(HTS_STAT.stat_bytes, sback);
516 
517                   if (!RUN_CALLBACK7
518                       (opt, loop, sback->lnk, sback->count, b, ptr, opt->lien_tot,
519                        (int) (time_local() - HTS_STAT.stat_timestart),
520                        &HTS_STAT)) {
521                     return -1;
522                   } else if (opt->state._hts_cancel || !back_checkmirror(opt)) {        // cancel 2 ou 1 (cancel parsing)
523                     back_delete(opt, cache, sback, b);  // cancel test
524                     stop_looping = 1;
525                   }
526                   // traitement des 304,303..
527                   if (back[b].status <= 0) {
528                     if (HTTP_IS_REDIRECT(back[b].r.statuscode)) {       // agh moved.. un tit tour de plus
529                       if ((petits_tours < 5) && former != NULL) { // on va pas tourner en rond non plus!
530                         if (strnotempty(back[b].r.location)) {    // location existe!
531                           char BIGSTK mov_url[HTS_URLMAXSIZE * 2];
532                           lien_adrfil moved;
533                           mov_url[0] = moved.adr[0] = moved.fil[0] = '\0';
534                           //
535                           strcpybuff(mov_url, back[b].r.location);      // copier URL
536                           if (ident_url_relatif
537                               (mov_url, current.adr, current.fil, &moved) >= 0) {
538                             // si non bouclage sur soi même, ou si test avec GET non testé
539                             if ((strcmp(moved.adr, current.adr))
540                                 || (strcmp(moved.fil, current.fil))
541                                 || (get_test_request == 0)) {
542                               // bouclage?
543                               if ((!strcmp(moved.adr, current.adr))
544                                   && (!strcmp(moved.fil, current.fil)))
545                                 get_test_request = 1;   // faire requète avec GET
546 
547                               // recopier former->adr/fil?
548                               if (former != NULL) {
549                                 if (strnotempty(former->adr) == 0) {     // Pas déja noté
550                                   strcpybuff(former->adr, current.adr);
551                                   strcpybuff(former->fil, current.fil);
552                                 }
553                               }
554                               // check explicit forbidden - don't follow 3xx in this case
555                               {
556                                 int set_prio_to = 0;
557 
558                                 if (hts_acceptlink(opt, ptr, moved.adr, moved.fil, NULL, NULL, &set_prio_to, NULL) == 1) { /* forbidden */
559                                   has_been_moved = 1;
560                                   back_maydelete(opt, cache, sback, b); // ok
561                                   strcpybuff(current.adr, moved.adr);
562                                   strcpybuff(current.fil, moved.fil);
563                                   mov_url[0] = '\0';
564                                   stop_looping = 1;
565                                 }
566                               }
567 
568                               // ftp: stop!
569                               if (strfield(mov_url, "ftp://")
570                                 ) {     // ftp, ok on arrête
571                                 has_been_moved = 1;
572                                 back_maydelete(opt, cache, sback, b);   // ok
573                                 strcpybuff(current.adr, moved.adr);
574                                 strcpybuff(current.fil, moved.fil);
575                                 stop_looping = 1;
576                               } else if (*mov_url) {
577                                 const char *methode;
578 
579                                 if (!get_test_request)
580                                   methode = BACK_ADD_TEST;      // tester avec HEAD
581                                 else {
582                                   methode = BACK_ADD_TEST2;     // tester avec GET
583                                   hts_log_print(opt, LOG_WARNING,
584                                                 "Loop with HEAD request (during prefetch) at %s%s",
585                                                 current.adr, current.fil);
586                                 }
587                                 // Ajouter
588                                 URLSAVENAME_WAIT_FOR_AVAILABLE_SOCKET();
589                                 if (back_add(sback, opt, cache, moved.adr, moved.fil, methode, referer_adr, referer_fil, 1) != -1) {        // OK
590                                   hts_log_print(opt, LOG_DEBUG,
591                                                 "(during prefetch) %s (%d) to link %s at %s%s",
592                                                 back[b].r.msg,
593                                                 back[b].r.statuscode,
594                                                 back[b].r.location, current.adr,
595                                                 current.fil);
596 
597                                   // libérer emplacement backing actuel et attendre le prochain
598                                   back_maydelete(opt, cache, sback, b);
599                                   strcpybuff(current.adr, moved.adr);
600                                   strcpybuff(current.fil, moved.fil);
601                                   b =
602                                     back_index(opt, sback, current.adr, current.fil,
603                                                methode);
604                                   if (!get_test_request)
605                                     has_been_moved = 1; // sinon ne pas forcer has_been_moved car non déplacé
606                                   petits_tours++;
607                                   //
608                                 } else {        // sinon on fait rien et on s'en va.. (ftp etc)
609                                   hts_log_print(opt, LOG_DEBUG,
610                                                 "Warning: Savename redirect backing error at %s%s",
611                                                 moved.adr, moved.fil);
612                                 }
613                               }
614                             } else {
615                               hts_log_print(opt, LOG_WARNING,
616                                             "Unable to test %s%s (loop to same filename)",
617                                             adr_complete, fil_complete);
618                             }
619 
620                           }
621                         }
622                       } else {  // arrêter les frais
623                         hts_log_print(opt, LOG_WARNING,
624                                       "Unable to test %s%s (loop)",
625                                       adr_complete, fil_complete);
626                       }
627                     }           // ok, leaving
628                   }
629                 } while(!stop_looping && back[b].status > 0
630                         && back[b].status < 1000);
631 
632                 // Si non déplacé, forcer type?
633                 if (!has_been_moved) {
634                   if (back[b].r.statuscode != -10) {    // erreur
635                     if (strnotempty(back[b].r.contenttype) == 0)
636                       strcpybuff(back[b].r.contenttype, "text/html");   // message d'erreur en html
637                     // Finalement on, renvoie un erreur, pour ne toucher à rien dans le code
638                     // libérer emplacement backing
639                   }
640 
641                   {             // pas d'erreur, changer type?
642                     char s[16];
643 
644                     s[0] = '\0';
645                     if (strnotempty(back[b].r.cdispo)) {        /* filename given */
646                       ext_chg = 2;      /* change filename */
647                       strcpybuff(ext, back[b].r.cdispo);
648                     } else if (!may_unknown2(opt, back[b].r.contenttype, back[b].url_fil)) {    // on peut patcher à priori? (pas interdit ou pas de type)
649                       give_mimext(s, back[b].r.contenttype);    // obtenir extension
650                       if (strnotempty(s) > 0) { // on a reconnu l'extension
651                         ext_chg = 1;
652                         strcpybuff(ext, s);
653                       }
654                     }
655 #ifdef DEFAULT_BIN_EXT
656                     // no extension and potentially bogus
657                     else if (ishtest == -2) {
658                       ext_chg = 1;
659                       strcpybuff(ext, DEFAULT_BIN_EXT + 1);
660                     }
661 #endif
662                   }
663                 }
664                 // FIN Si non déplacé, forcer type?
665 
666                 // libérer emplacement backing
667                 back_maydelete(opt, cache, sback, b);
668 
669                 // --- --- ---
670                 // oops, a été déplacé.. on recalcule en récursif (osons!)
671                 if (has_been_moved) {
672                   // copier adr, fil (optionnel, mais sinon marche pas pour le rip)
673                   strcpybuff(afs->af.adr, current.adr);
674                   strcpybuff(afs->af.fil, current.fil);
675                   // copier adr, fil
676 
677                   return url_savename(afs, NULL,
678                                       referer_adr, referer_fil, opt,
679                                       sback, cache, hash, ptr,
680                                       numero_passe, NULL);
681                 }
682                 // --- --- ---
683 
684               }
685 
686             } else {
687               printf
688                 ("PANIC! : Savename Crash adding error, unexpected error found.. [%d]\n",
689                  __LINE__);
690 #if BDEBUG==1
691               printf("error while savename crash adding\n");
692 #endif
693               hts_log_print(opt, LOG_ERROR,
694                             "Unexpected savename backing error at %s%s", adr,
695                             fil_complete);
696 
697             }
698             // restaurer
699             opt->state._hts_in_html_parsing = hihp;
700           }                     // caché?
701         }
702     }
703   }
704 
705   // - - - DEBUT NOMMAGE - - -
706 
707   // Donner nom par défaut?
708   if (fil[strlen(fil) - 1] == '/') {
709     if (!strfield(adr_complete, "ftp://")
710       ) {
711       strcatbuff(fil, DEFAULT_HTML);    // nommer page par défaut!!
712     } else {
713       if (!opt->proxy.active)
714         strcatbuff(fil, DEFAULT_FTP);   // nommer page par défaut (texte)
715       else
716         strcatbuff(fil, DEFAULT_HTML);  // nommer page par défaut (à priori ici html depuis un proxy http)
717     }
718   }
719   // Changer extension?
720   // par exemple, php3 sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
721   if (ext_chg && !opt->no_type_change) {                // changer ext
722     char *a = fil + strlen(fil) - 1;
723 
724     if ((opt->debug > 1) && (opt->log != NULL)) {
725       if (ext_chg == 1)
726         hts_log_print(opt, LOG_DEBUG, "Changing link extension %s%s to .%s",
727                       adr_complete, fil_complete, ext);
728       else
729         hts_log_print(opt, LOG_DEBUG, "Changing link name %s%s to %s",
730                       adr_complete, fil_complete, ext);
731     }
732     if (ext_chg == 1) {
733       while((a > fil) && (*a != '.') && (*a != '/'))
734         a--;
735       if (*a == '.')
736         *a = '\0';              // couper
737       strcatbuff(fil, ".");     // recopier point
738     } else {
739       while((a > fil) && (*a != '/'))
740         a--;
741       if (*a == '/')
742         a++;
743       *a = '\0';
744     }
745     strcatbuff(fil, ext);       // copier ext/nom
746   }
747   // Rechercher premier / et dernier .
748   {
749     const char *a = fil + strlen(fil) - 1;
750 
751     // passer structures
752     start_pos = fil;
753     while((a > fil) && (*a != '/') && (*a != '\\')) {
754       if (*a == '.')            // point? noter position
755         if (!dot_pos)
756           dot_pos = a;
757       a--;
758     }
759     if ((*a == '/') || (*a == '\\'))
760       a++;
761     nom_pos = a;
762   }
763 
764   // un nom de fichier est généré
765   // s'il existe déja, alors on le mofifie légèrement
766 
767   // ajouter nom du site éventuellement en premier
768   if (opt->savename_type == -1) {       // utiliser savename_userdef! (%h%p/%n%q.%t)
769     const char *a = StringBuff(opt->savename_userdef);
770     char *b = afs->save;
771 
772     /*char *nom_pos=NULL,*dot_pos=NULL;  // Position nom et point */
773     char tok;
774 
775     /*
776        {  // Rechercher premier /
777        char* a=fil+strlen(fil)-1;
778        // passer structures
779        while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) {
780        if (*a == '.')    // point? noter position
781        if (!dot_pos)
782        dot_pos=a;
783        a--;
784        }
785        if ((*a=='/') || (*a=='\\')) a++;
786        nom_pos = a;
787        }
788      */
789 
790     // Construire nom
791     while((*a) && (((int) (b - afs->save)) < HTS_URLMAXSIZE)) {      // parser, et pas trop long..
792       if (*a == '%') {
793         int short_ver = 0;
794 
795         a++;
796         if (*a == 's') {
797           short_ver = 1;
798           a++;
799         }
800         *b = '\0';
801         switch (tok = *a++) {
802         case '[':              // %[param:prefix_if_not_empty:suffix_if_not_empty:empty_replacement:notfound_replacement]
803           if (strchr(a, ']')) {
804             int pos = 0;
805             char name[5][256];
806             char *c = name[0];
807 
808             for(pos = 0; pos < 5; pos++) {
809               name[pos][0] = '\0';
810             }
811             pos = 0;
812             while(*a != '\0' && *a != ']') {
813               if (pos < 5) {
814                 if (*a == ':') {        // next token
815                   c = name[++pos];
816                   a++;
817                 } else {
818                   *c++ = *a++;
819                   *c = '\0';
820                 }
821               }
822             }
823             if (*a == ']') {
824               a++;
825             }
826             strcatbuff(name[0], "=");   /* param=.. */
827             c = strchr(fil_complete, '?');
828             /* parameters exists */
829             if (c) {
830               char *cp;
831 
832               while((cp = strstr(c + 1, name[0])) && *(cp - 1) != '?' && *(cp - 1) != '&') {    /* finds [?&]param= */
833                 c = cp;
834               }
835               if (cp) {
836                 c = cp + strlen(name[0]);       /* jumps "param=" */
837                 strcpybuff(b, name[1]); /* prefix */
838                 b += strlen(b);
839                 if (*c != '\0' && *c != '&') {
840                   char *d = name[0];
841 
842                   /* */
843                   while(*c != '\0' && *c != '&') {
844                     *d++ = *c++;
845                   }
846                   *d = '\0';
847                   d = unescape_http(catbuff, sizeof(catbuff), name[0]);
848                   if (d && *d) {
849                     strcpybuff(b, d);   /* value */
850                     b += strlen(b);
851                   } else {
852                     strcpybuff(b, name[3]);     /* empty replacement if any */
853                     b += strlen(b);
854                   }
855                 } else {
856                   strcpybuff(b, name[3]);       /* empty replacement if any */
857                   b += strlen(b);
858                 }
859                 strcpybuff(b, name[2]); /* suffix */
860                 b += strlen(b);
861               } else {
862                 strcpybuff(b, name[4]); /* not found replacement if any */
863                 b += strlen(b);
864               }
865             } else {
866               strcpybuff(b, name[4]);   /* not found replacement if any */
867               b += strlen(b);
868             }
869           }
870           break;
871         case '%':
872           *b++ = '%';
873           break;
874         case 'n':              // nom sans ext
875           *b = '\0';
876           if (dot_pos) {
877             if (!short_ver)     // Noms longs
878               strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos));
879             else
880               strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8));
881           } else {
882             if (!short_ver)     // Noms longs
883               strcpybuff(b, nom_pos);
884             else
885               strncatbuff(b, nom_pos, 8);
886           }
887           b += strlen(b);       // pointer à la fin
888           break;
889         case 'N':              // nom avec ext
890           // RECOPIE NOM + EXT
891           *b = '\0';
892           if (dot_pos) {
893             if (!short_ver)     // Noms longs
894               strncatbuff(b, nom_pos, (int) (dot_pos - nom_pos));
895             else
896               strncatbuff(b, nom_pos, min((int) (dot_pos - nom_pos), 8));
897           } else {
898             if (!short_ver)     // Noms longs
899               strcpybuff(b, nom_pos);
900             else
901               strncatbuff(b, nom_pos, 8);
902           }
903           b += strlen(b);       // pointer à la fin
904           // RECOPIE NOM + EXT
905           *b = '\0';
906           if (dot_pos) {
907             if (!short_ver)     // Noms longs
908               strcpybuff(b, dot_pos + 1);
909             else
910               strncatbuff(b, dot_pos + 1, 3);
911           } else {
912             if (!short_ver)     // Noms longs
913               strcpybuff(b, DEFAULT_EXT + 1);   // pas de..
914             else
915               strcpybuff(b, DEFAULT_EXT_SHORT + 1);     // pas de..
916           }
917           b += strlen(b);       // pointer à la fin
918           //
919           break;
920         case 't':              // ext
921           *b = '\0';
922           if (dot_pos) {
923             if (!short_ver)     // Noms longs
924               strcpybuff(b, dot_pos + 1);
925             else
926               strncatbuff(b, dot_pos + 1, 3);
927           } else {
928             if (!short_ver)     // Noms longs
929               strcpybuff(b, DEFAULT_EXT + 1);   // pas de..
930             else
931               strcpybuff(b, DEFAULT_EXT_SHORT + 1);     // pas de..
932           }
933           b += strlen(b);       // pointer à la fin
934           break;
935         case 'p':              // path sans dernier /
936           *b = '\0';
937           if (nom_pos != fil + 1) {     // pas: /index.html (chemin nul)
938             if (!short_ver) {   // Noms longs
939               strncatbuff(b, fil, (int) (nom_pos - fil) - 1);
940             } else {
941               char BIGSTK pth[HTS_URLMAXSIZE * 2], n83[HTS_URLMAXSIZE * 2];
942 
943               pth[0] = n83[0] = '\0';
944               //
945               strncatbuff(pth, fil, (int) (nom_pos - fil) - 1);
946               long_to_83(opt->savename_83, n83, pth);
947               strcpybuff(b, n83);
948             }
949           }
950           b += strlen(b);       // pointer à la fin
951           break;
952         case 'h':              // host (IDNA decoded if suitable)
953           // IDNA / RFC 3492 (Punycode) handling for HTTP(s)
954           {
955             DECLARE_ADR(final_adr);
956 
957             /* Copy address */
958             *b = '\0';
959             if (!short_ver)
960               strcpybuff(b, final_adr);
961             else
962               strcpybuff(b, final_adr);
963 
964             /* release */
965             RELEASE_ADR();
966           }
967           b += strlen(b);       // pointer à la fin
968           break;
969         case 'H':              // host, raw (old mode)
970           *b = '\0';
971           if (protocol == PROTOCOL_FILE) {
972             if (!short_ver)     // Noms longs
973               strcpybuff(b, "localhost");
974             else
975               strcpybuff(b, "local");
976           } else {
977             if (!short_ver)     // Noms longs
978               strcpybuff(b, print_adr);
979             else
980               strncatbuff(b, print_adr, 8);
981           }
982           b += strlen(b);       // pointer à la fin
983           break;
984         case 'M':              /* host/address?query MD5 (128-bits) */
985           *b = '\0';
986           {
987             char digest[32 + 2];
988             char BIGSTK buff[HTS_URLMAXSIZE * 2];
989 
990             digest[0] = buff[0] = '\0';
991             strcpybuff(buff, adr);
992             strcatbuff(buff, fil_complete);
993             domd5mem(buff, strlen(buff), digest, 1);
994             strcpybuff(b, digest);
995           }
996           b += strlen(b);       // pointer à la fin
997           break;
998         case 'Q':
999         case 'q':              /* query MD5 (128-bits/16-bits)
1000                                    GENERATED ONLY IF query string exists! */
1001           {
1002             char md5[32 + 2];
1003 
1004             *b = '\0';
1005             strncatbuff(b, url_md5(md5, fil_complete), (tok == 'Q') ? 32 : 4);
1006             b += strlen(b);     // pointer à la fin
1007           }
1008           break;
1009         case 'r':
1010         case 'R':              // protocol
1011           *b = '\0';
1012           strcatbuff(b, protocol_str[protocol]);
1013           b += strlen(b);       // pointer à la fin
1014           break;
1015 
1016           /* Patch by Juan Fco Rodriguez to get the full query string */
1017         case 'k':
1018           {
1019             char *d = strchr(fil_complete, '?');
1020 
1021             if (d != NULL) {
1022               strcatbuff(b, d);
1023               b += strlen(b);
1024             }
1025           }
1026           break;
1027 
1028         }
1029       } else
1030         *b++ = *a++;
1031     }
1032     *b++ = '\0';
1033     //
1034     // Types prédéfinis
1035     //
1036 
1037   }
1038   //
1039   // Structure originale
1040   else if (opt->savename_type % 100 == 0) {
1041     /* recopier www.. */
1042     if (opt->savename_type != 100) {
1043       if (((opt->savename_type / 1000) % 2) == 0) {     // >1000 signifie "pas de www/"
1044         DECLARE_ADR(final_adr);
1045 
1046         // adresse url
1047         if (!opt->savename_83) {      // noms longs (et pas de .)
1048           strcatbuff(afs->save, final_adr);
1049         } else {              // noms 8-3
1050           if (strlen(final_adr) > 4) {
1051             if (strfield(final_adr, "www."))
1052               hts_appendStringUTF8(afs->save, final_adr + 4, max_char);
1053             else
1054               hts_appendStringUTF8(afs->save, final_adr, max_char);
1055           } else
1056             hts_appendStringUTF8(afs->save, final_adr, max_char);
1057         }
1058 
1059         /* release */
1060         RELEASE_ADR();
1061 
1062         if (*fil != '/')
1063           strcatbuff(afs->save, "/");
1064       }
1065     }
1066 
1067     hts_lowcase(afs->save);
1068 
1069     /*
1070        // ne sert à rien car a déja été filtré normalement
1071        if ((*fil=='.') && (*(fil+1)=='/'))          // ./index.html ** //
1072        url_savename_addstr(save,fil+2);
1073        else                                               // index.html ou /index.html
1074        url_savename_addstr(save,fil);
1075        if (save[strlen(save)-1]=='/')
1076        strcatbuff(save,DEFAULT_HTML);     // nommer page par défaut!!
1077      */
1078 
1079     /* add name */
1080     ADD_STANDARD_PATH;
1081     ADD_STANDARD_NAME(0);
1082 
1083   }
1084   //
1085   // Structure html/image
1086   else {
1087     // dossier "web" ou "www.xxx" ?
1088     if (((opt->savename_type / 1000) % 2) == 0) {       // >1000 signifie "pas de www/"
1089       if ((opt->savename_type / 100) % 2) {
1090         DECLARE_ADR(final_adr);
1091 
1092         if (!opt->savename_83) {      // noms longs
1093           strcatbuff(afs->save, final_adr);
1094           strcatbuff(afs->save, "/");
1095         } else {              // noms 8-3
1096           if (strlen(final_adr) > 4) {
1097             if (strfield(final_adr, "www."))
1098               hts_appendStringUTF8(afs->save, final_adr + 4, max_char);
1099             else
1100               hts_appendStringUTF8(afs->save, final_adr, max_char);
1101             strcatbuff(afs->save, "/");
1102           } else {
1103             hts_appendStringUTF8(afs->save, final_adr, max_char);
1104             strcatbuff(afs->save, "/");
1105           }
1106         }
1107 
1108         /* release */
1109         RELEASE_ADR();
1110       } else {
1111         strcatbuff(afs->save, "web/");       // répertoire général
1112       }
1113     }
1114     // si un html à coup sûr
1115     if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) {
1116       if (opt->savename_type % 100 == 2) {      // html/
1117         strcatbuff(afs->save, "html/");
1118       }
1119     } else {
1120       if ((opt->savename_type % 100 == 1) || (opt->savename_type % 100 == 2)) { // html & images
1121         strcatbuff(afs->save, "images/");
1122       }
1123     }
1124 
1125     switch (opt->savename_type % 100) {
1126     case 4:
1127     case 5:{                   // séparer par types
1128         const char *a = fil + strlen(fil) - 1;
1129 
1130         // passer structures
1131         while((a > fil) && (*a != '/') && (*a != '\\'))
1132           a--;
1133         if ((*a == '/') || (*a == '\\'))
1134           a++;
1135 
1136         // html?
1137         if ((ext_chg != 0) ? (ishtml_ext(ext) == 1) : (ishtml(opt, fil) == 1)) {
1138           if (opt->savename_type % 100 == 5)
1139             strcatbuff(afs->save, "html/");
1140         } else {
1141           const char *a = fil + strlen(fil) - 1;
1142 
1143           while((a > fil) && (*a != '/') && (*a != '.'))
1144             a--;
1145           if (*a != '.')
1146             strcatbuff(afs->save, "other");
1147           else
1148             strcatbuff(afs->save, a + 1);
1149           strcatbuff(afs->save, "/");
1150         }
1151         /*strcatbuff(save,a); */
1152         /* add name */
1153         ADD_STANDARD_NAME(0);
1154       }
1155       break;
1156     case 99:{                  // 'codé' .. c'est un gadget
1157         size_t i;
1158         size_t j;
1159         const char *a;
1160         char C[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-";
1161         int L;
1162 
1163         // pseudo-CRC sur fil et adr pour initialiser générateur aléatoire..
1164         unsigned int s = 0;
1165 
1166         L = (int) strlen(C);
1167         for(i = 0; fil_complete[i] != '\0'; i++) {
1168           s += (unsigned int) fil_complete[i];
1169         }
1170         for(i = 0; adr_complete[i] != '\0'; i++) {
1171           s += (unsigned int) adr_complete[i];
1172         }
1173         srand(s);
1174 
1175         j = strlen(afs->save);
1176         for(i = 0; i < 8; i++) {
1177           char c = C[(rand() % L)];
1178 
1179           afs->save[i + j] = c;
1180         }
1181         afs->save[i + j] = '\0';
1182         // ajouter extension
1183         a = fil + strlen(fil) - 1;
1184         while((a > fil) && (*a != '/') && (*a != '.'))
1185           a--;
1186         if (*a == '.') {
1187           strcatbuff(afs->save, a);  // ajouter
1188         }
1189       }
1190       break;
1191     default:{                  // noms sans les noms des répertoires
1192         // ne garder que le nom, pas la structure
1193         /*
1194            char* a=fil+strlen(fil)-1;
1195            while(((int) a>(int) fil) && (*a != '/') && (*a != '\\')) a--;
1196            if ((*a=='/') || (*a=='\\')) a++;
1197            strcatbuff(save,a);
1198          */
1199 
1200         /* add name */
1201         ADD_STANDARD_NAME(0);
1202       }
1203       break;
1204     }
1205 
1206     hts_lowcase(afs->save);
1207 
1208     if (afs->save[strlen(afs->save) - 1] == '/')
1209       strcatbuff(afs->save, DEFAULT_HTML);   // nommer page par défaut!!
1210   }
1211 
1212   // vérifier qu'on ne doit pas forcer l'extension
1213   // par exemple, asp sera sauvé en html, cgi en html ou gif, xbm etc.. selon les cas
1214   /*if (ext_chg) {
1215      char* a=save+strlen(save)-1;
1216      while(((int) a>(int) save) && (*a!='.') && (*a!='/')) a--;
1217      if (*a=='.') *a='\0';  // couper
1218      // recopier extension
1219      strcatbuff(save,".");
1220      strcatbuff(save,ext);    // copier ext
1221      } */
1222 
1223   // Not used anymore unless non-delayed types.
1224   // de même en cas de manque d'extension on en place une de manière forcée..
1225   // cela évite les /chez/toto et les /chez/toto/index.html incompatibles
1226   if (opt->savename_type != -1 && opt->savename_delayed != 2) {
1227     char *a = afs->save + strlen(afs->save) - 1;
1228 
1229     while((a > afs->save) && (*a != '.') && (*a != '/'))
1230       a--;
1231     if (*a != '.') {            // agh pas de point
1232       //strcatbuff(save,".none");                 // a éviter
1233       strcatbuff(afs->save, ".html");        // préférable!
1234       hts_log_print(opt, LOG_DEBUG, "Default HTML type set for %s%s => %s",
1235                     adr_complete, fil_complete, afs->save);
1236     }
1237   }
1238   // effacer pass au besoin pour les autentifications
1239   // (plus la peine : masqué au début)
1240 /*
1241   {
1242     char* a = jump_identification(afs->save);
1243     if (a!=afs->save) {
1244       char BIGSTK tempo[HTS_URLMAXSIZE*2];
1245       char *b;
1246       tempo[0]='\0';
1247       strcpybuff(tempo,"[");
1248       b=strchr(save,':');
1249       if (!b) b=strchr(save,'@');
1250       if (b)
1251         strncatbuff(tempo,save,(int) b-(int) a);
1252       strcatbuff(tempo,"]");
1253       strcatbuff(tempo,a);
1254       strcpybuff(save,a);
1255     }
1256   }
1257 */
1258 
1259   // éviter les / au début (cause: N100)
1260   if (afs->save[0] == '/') {
1261     char BIGSTK tempo[HTS_URLMAXSIZE * 2];
1262 
1263     strcpybuff(tempo, afs->save + 1);
1264     strcpybuff(afs->save, tempo);
1265   }
1266 
1267   /* Cleanup reserved or forbidden characters. */
1268   {
1269     size_t i;
1270     for(i = 0 ; afs->save[i] != '\0' ; i++) {
1271       unsigned char c = (unsigned char) afs->save[i];
1272       if (c < 32      // control
1273         || c == 127   // unwise
1274         || c == '~'   // unix unwise
1275         || c == '\\'  // windows separator
1276         || c == ':'   // windows forbidden
1277         || c == '*'   // windows forbidden
1278         || c == '?'   // windows forbidden
1279         || c == '\"'  // windows forbidden
1280         || c == '<'   // windows forbidden
1281         || c == '>'   // windows forbidden
1282         || c == '|'   // windows forbidden
1283         //|| c == '@' // ?
1284         ||
1285           (
1286             opt->savename_83 == 2 // CDROM
1287             &&
1288             (
1289               c == '-'
1290               || c == '='
1291               || c == '+'
1292             )
1293           )
1294         )
1295       {
1296          afs->save[i] = '_';
1297       }
1298     }
1299   }
1300 
1301   // éliminer les // (comme ftp://)
1302   cleanDoubleSlash(afs->save);
1303 
1304 #if HTS_OVERRIDE_DOS_FOLDERS
1305   /* Replace /foo/nul/bar by /foo/nul_/bar */
1306   {
1307     int i = 0;
1308 
1309     while(hts_tbdev[i][0]) {
1310       const char *a = afs->save;
1311 
1312       while((a = strstrcase(a, hts_tbdev[i]))) {
1313         switch ((int) a[strlen(hts_tbdev[i])]) {
1314         case '\0':
1315         case '/':
1316         case '.':
1317           {
1318             char BIGSTK tempo[HTS_URLMAXSIZE * 2];
1319 
1320             tempo[0] = '\0';
1321             strncatbuff(tempo, afs->save, (int) (a - afs->save) + strlen(hts_tbdev[i]));
1322             strcatbuff(tempo, "_");
1323             strcatbuff(tempo, a + strlen(hts_tbdev[i]));
1324             strcpybuff(afs->save, tempo);
1325           }
1326           break;
1327         }
1328         a += strlen(hts_tbdev[i]);
1329       }
1330       i++;
1331     }
1332   }
1333 
1334   /* Strip ending . or ' ' forbidden on windoz */
1335   cleanEndingSpaceOrDot(afs->save);
1336 
1337 #endif
1338 
1339   // conversion 8-3 .. y compris pour les répertoires
1340   if (opt->savename_83) {
1341     char BIGSTK n83[HTS_URLMAXSIZE * 2];
1342 
1343     long_to_83(opt->savename_83, n83, afs->save);
1344     strcpybuff(afs->save, n83);
1345   }
1346   // enforce stricter ISO9660 compliance (bug reported by Steffo Carlsson)
1347   // Level 1 File names are restricted to 8 characters with a 3 character extension,
1348   // upper case letters, numbers and underscore; maximum depth of directories is 8.
1349   // This will be our "DOS mode"
1350   // L2: 31 characters
1351   // A-Z,0-9,_
1352   if (opt->savename_83 > 0) {
1353     char *a, *last;
1354 
1355     for(last = afs->save + strlen(afs->save) - 1;
1356         last != afs->save && *last != '/' && *last != '\\' && *last != '.'; last--) ;
1357     if (*last != '.') {
1358       last = NULL;
1359     }
1360     for(a = afs->save; *a != '\0'; a++) {
1361       if (*a >= 'a' && *a <= 'z') {
1362         *a -= 'a' - 'A';
1363       } else if (*a == '.') {
1364         if (a != last) {
1365           *a = '_';
1366         }
1367       } else
1368         if (!
1369             ((*a >= 'A' && *a <= 'Z') || (*a >= '0' && *a <= '9') || *a == '_'
1370              || *a == '/' || *a == '\\')) {
1371         *a = '_';
1372       }
1373     }
1374   }
1375 
1376   /* ensure that there is no ../ (potential vulnerability) */
1377   fil_simplifie(afs->save);
1378 
1379   /* convert name to UTF-8 ? Note: already done while parsing. */
1380   //if (charset != NULL && charset[0] != '\0') {
1381   //  char *const s = hts_convertStringToUTF8(save, (int) strlen(save), charset);
1382 
1383   //  if (s != NULL) {
1384   //    hts_log_print(opt, LOG_DEBUG,
1385   //                  "engine: save-name: charset conversion from '%s' to '%s' using charset '%s'",
1386   //                  save, s, charset);
1387   //    strcpybuff(save, s);
1388   //    free(s);
1389   //  }
1390   //}
1391 
1392   /* callback */
1393   RUN_CALLBACK5(opt, savename, adr_complete, fil_complete, referer_adr,
1394                 referer_fil, afs->save);
1395 
1396   hts_log_print(opt, LOG_DEBUG, "engine: save-name: local name: %s%s -> %s",
1397                 adr, fil, afs->save);
1398 
1399   /* Ensure that the MANDATORY "temporary" extension is set */
1400   if (ext_chg_delayed) {
1401     char *ptr;
1402     char *lastDot = NULL;
1403 
1404     for(ptr = afs->save; *ptr != 0; ptr++) {
1405       if (*ptr == '.') {
1406         lastDot = ptr;
1407       } else if (*ptr == '/' || *ptr == '\\') {
1408         lastDot = NULL;
1409       }
1410     }
1411     if (lastDot == NULL) {
1412       strcatbuff(afs->save, "." DELAYED_EXT);
1413     } else if (!IS_DELAYED_EXT(afs->save)) {
1414       strcatbuff(lastDot, "." DELAYED_EXT);
1415     }
1416   }
1417   // enforce 260-character path limit before inserting destination path
1418   // note: 12 characters at least for WIN32, and 12 for ".99.delayed"
1419   // (MSDN) "When using an API to create a directory, the specified path
1420   // cannot be so long that you cannot append an 8.3 file name
1421   // (that is, the directory name cannot exceed MAX_PATH minus 12)."
1422 #define HTS_MAX_PATH_LEN ( 260 - 12 - 12 )
1423 #define MIN_LAST_SEG_RESERVE 12
1424 #define MAX_LAST_SEG_RESERVE 24
1425 #define MAX_SEG_LEN 48
1426   if (hts_stringLengthUTF8(afs->save) +
1427       hts_stringLengthUTF8(StringBuff(opt->path_html_utf8)) >=
1428       HTS_MAX_PATH_LEN) {
1429     // convert to Unicode (much simpler)
1430     size_t wsaveLen;
1431     hts_UCS4 *const wsave = hts_convertUTF8StringToUCS4(afs->save, strlen(afs->save), &wsaveLen);
1432     if (wsave != NULL) {
1433       const size_t parentLen =
1434         hts_stringLengthUTF8(StringBuff(opt->path_html_utf8));
1435       // parent path length is not insane (otherwise, ignore and pick 200 as
1436       // suffix length)
1437       const size_t maxLen =
1438         parentLen <
1439         HTS_MAX_PATH_LEN - HTS_MAX_PATH_LEN / 4
1440         ? HTS_MAX_PATH_LEN - parentLen : HTS_MAX_PATH_LEN;
1441       size_t i, j, lastSeg, lastSegSize, dirSize;
1442       char *saveFinal;
1443 
1444       // pick up last segment
1445       for(i = 0, lastSeg = 0; wsave[i] != '\0'; i++) {
1446         if (wsave[i] == '/') {
1447           lastSeg = i + 1;
1448         }
1449       }
1450       lastSegSize = wsaveLen - lastSeg;
1451       if (lastSegSize > MAX_LAST_SEG_RESERVE) {
1452         lastSegSize = MAX_LAST_SEG_RESERVE;
1453       }
1454       else if (lastSegSize < MIN_LAST_SEG_RESERVE) {
1455         lastSegSize = MIN_LAST_SEG_RESERVE;
1456       }
1457 
1458       // add as much pathes as we can.
1459       // note: i is in bytes, iUtf in characters
1460       for(i = 0, j = 0, dirSize = 0
1461         ; i + 1 < lastSeg && j + lastSegSize < maxLen; i++) {
1462           // reset segment counting
1463           if (wsave[i] == '/') {
1464             dirSize = 0;
1465           }
1466 
1467           // copy if not too long
1468           if (dirSize < MAX_SEG_LEN) {
1469             wsave[j++] = wsave[i];
1470             dirSize++;
1471           }
1472       }
1473 
1474       // last segment
1475       wsave[j++] = '/';
1476 #define MAX_UTF8_SEQ_CHARS 4
1477       for(i = lastSeg; wsave[i] != '\0' && j < maxLen; i++) {
1478         wsave[j++] = wsave[i];
1479       }
1480       // terminating \0
1481       wsave[j++] = '\0';
1482 
1483       // copy final name and cleanup
1484       saveFinal = hts_convertUCS4StringToUTF8(wsave, j);
1485       if (saveFinal != NULL) {
1486         strcpybuff(afs->save, saveFinal);
1487         free(saveFinal);
1488       } else {
1489         hts_log_print(opt, LOG_ERROR, "Could not revert to UTF-8: %s%s",
1490           adr_complete, fil_complete);
1491       }
1492       free(wsave);
1493 
1494       // log in debug
1495       hts_log_print(opt, LOG_DEBUG, "Too long filename shortened: %s%s => %s",
1496         adr_complete, fil_complete, afs->save);
1497     } else {
1498       hts_log_print(opt, LOG_ERROR, "Could not read UTF-8: %s", afs->save);
1499     }
1500 
1501     // Re-check again ending space or dot after cut (see bug #5)
1502     cleanEndingSpaceOrDot(afs->save);
1503   }
1504 #undef MAX_UTF8_SEQ_CHARS
1505 #undef MIN_LAST_SEG_RESERVE
1506 #undef HTS_MAX_PATH_LEN
1507 
1508   // chemin primaire éventuel A METTRE AVANT
1509   if (strnotempty(StringBuff(opt->path_html_utf8))) {
1510     char BIGSTK tempo[HTS_URLMAXSIZE * 2];
1511 
1512     strcpybuff(tempo, StringBuff(opt->path_html_utf8));
1513     strcatbuff(tempo, afs->save);
1514     strcpybuff(afs->save, tempo);
1515   }
1516   // vérifier que le nom n'est pas déja pris...
1517   if (opt->liens != NULL) {
1518     int nom_ok;
1519 
1520     do {
1521       int i;
1522 
1523       //
1524       nom_ok = 1;               // à priori bon
1525       // on part de la fin pour optimiser, plus les opti de taille pour
1526       // aller encore plus vite..
1527 #if DEBUG_SAVENAME
1528       printf("\nStart search\n");
1529 #endif
1530 
1531       i = hash_read(hash, afs->save, NULL, HASH_STRUCT_FILENAME);      // lecture type 0 (sav)
1532       if (i >= 0) {
1533         int sameAdr = (strfield2(heap(i)->adr, normadr) != 0);
1534         int sameFil;
1535 
1536         // NO - URL hack is only for stripping // and www.
1537         //if (opt->urlhack != 0)
1538         //  sameFil = ( strfield2(heap(i)->fil, normfil) != 0);
1539         //else
1540         sameFil = (strcmp(heap(i)->fil, normfil) == 0);
1541         if (sameAdr && sameFil) {       // ok c'est le même lien, adresse déja définie
1542           /* Take the existing name not to screw up with cAsE sEnSiTiViTy of Linux/Unix */
1543           if (strcmp(heap(i)->sav, afs->save) != 0) {
1544             strcpybuff(afs->save, heap(i)->sav);
1545           }
1546           i = 0;
1547 #if DEBUG_SAVENAME
1548           printf("\nOK ALREADY DEFINED\n", 13, i);
1549 #endif
1550         } else {                // utilisé par un AUTRE, changer de nom
1551           char BIGSTK tempo[HTS_URLMAXSIZE * 2];
1552           char *a = afs->save + strlen(afs->save) - 1;
1553           char *b;
1554           int n = 2;
1555           char collisionSeparator = ((opt->savename_83 != 2) ? '-' : '_');
1556 
1557           tempo[0] = '\0';
1558 
1559 #if DEBUG_SAVENAME
1560           printf("\nWRONG CASE UNMATCH : \n%s\n%s, REDEFINE\n", heap(i)->fil,
1561                  fil_complete);
1562 #endif
1563           nom_ok = 0;
1564           i = 0;
1565 
1566           while((a > afs->save) && (*a != '.') && (*a != '\\') && (*a != '/'))
1567             a--;
1568           if (*a == '.')
1569             strncatbuff(tempo, afs->save, a - afs->save);
1570           else
1571             strcatbuff(tempo, afs->save);
1572 
1573           // tester la présence d'un -xx (ex: index-2.html -> index-3.html)
1574           b = tempo + strlen(tempo) - 1;
1575           while(isdigit((unsigned char) *b))
1576             b--;
1577           if (*b == collisionSeparator) {
1578             sscanf(b + 1, "%d", &n);
1579             *b = '\0';          // couper
1580             n++;                // plus un
1581           }
1582           // en plus il faut gérer le 8-3 .. pas facile le client
1583           if (opt->savename_83) {
1584             int max;
1585             char *a = tempo + strlen(tempo) - 1;
1586 
1587             while((a > tempo) && (*a != '/'))
1588               a--;
1589             if (*a == '/')
1590               a++;
1591             max = max_char - 1 - nombre_digit(n);
1592             if ((int) strlen(a) > max)
1593               *(a + max) = '\0';        // couper sinon il n'y aura pas la place!
1594           }
1595           // ajouter -xx (ex: index.html -> index-2.html)
1596           sprintf(tempo + strlen(tempo), "%c%d", collisionSeparator, n);
1597 
1598           // ajouter extension
1599           if (*a == '.')
1600             strcatbuff(tempo, a);
1601 
1602           strcpybuff(afs->save, tempo);
1603 
1604           //printf("switched: %s\n",save);
1605 
1606         }                       // if
1607       }
1608 #if DEBUG_SAVENAME
1609       printf("\nEnd search, %s\n", fil_complete);
1610 #endif
1611     } while(!nom_ok);
1612 
1613   }
1614   //printf("'%s' %s %s\n",save,adr,fil);
1615 
1616   return 0;
1617 }
1618 
1619 /* nom avec md5 urilisé partout */
standard_name(char * b,const char * dot_pos,const char * nom_pos,const char * fil,int short_ver)1620 void standard_name(char *b, const char *dot_pos, const char *nom_pos, const char *fil,
1621                    int short_ver) {
1622   char md5[32 + 2];
1623 
1624   b[0] = '\0';
1625   /* Nom */
1626   if (dot_pos) {
1627     if (!short_ver)             // Noms longs
1628       strncatbuff(b, nom_pos, (dot_pos - nom_pos));
1629     else
1630       strncatbuff(b, nom_pos, min(dot_pos - nom_pos, 8));
1631   } else {
1632     if (!short_ver)             // Noms longs
1633       strcatbuff(b, nom_pos);
1634     else
1635       strncatbuff(b, nom_pos, 8);
1636   }
1637   /* MD5 - 16 bits */
1638   strncatbuff(b, url_md5(md5, fil), 4);
1639   /* Ext */
1640   if (dot_pos) {
1641     strcatbuff(b, ".");
1642     if (!short_ver)             // Noms longs
1643       strcatbuff(b, dot_pos + 1);
1644     else
1645       strncatbuff(b, dot_pos + 1, 3);
1646   }
1647   // Allow extensionless
1648 #ifdef DO_NOT_ALLOW_EXTENSIONLESS
1649   else {
1650     if (!short_ver)             // Noms longs
1651       strcatbuff(b, DEFAULT_EXT);
1652     else
1653       strcatbuff(b, DEFAULT_EXT_SHORT);
1654   }
1655 #endif
1656 }
1657 
1658 /* Petit md5 */
url_md5(char * digest,const char * fil)1659 char *url_md5(char *digest, const char *fil) {
1660   char *a;
1661 
1662   digest[0] = '\0';
1663   a = strchr(fil, '?');
1664   if (a) {
1665     if (strlen(a)) {
1666       char BIGSTK buff[HTS_URLMAXSIZE * 2];
1667 
1668       a++;
1669       digest[0] = buff[0] = '\0';
1670       strcatbuff(buff, a);      /* query string MD5 */
1671       domd5mem(buff, strlen(buff), digest, 1);
1672     }
1673   }
1674   return digest;
1675 }
1676 
1677 // interne à url_savename: ajoute une chaîne à une autre avec \ -> /
url_savename_addstr(char * d,const char * s)1678 void url_savename_addstr(char *d, const char *s) {
1679   int i = (int) strlen(d);
1680 
1681   while(*s) {
1682     if (*s == '\\')             // remplacer \ par des /
1683       d[i++] = '/';
1684     else
1685       d[i++] = *s;
1686     s++;
1687   }
1688   d[i] = '\0';
1689 }
1690 
1691 /* "filename" should be at least 64 bytes. */
url_savename_refname(const char * adr,const char * fil,char * filename)1692 void url_savename_refname(const char *adr, const char *fil, char *filename) {
1693   unsigned char bindigest[16];
1694   struct MD5Context ctx;
1695 
1696   MD5Init(&ctx, 0);
1697   MD5Update(&ctx, (const unsigned char *) adr, (int) strlen(adr));
1698   MD5Update(&ctx, (const unsigned char *) ",", 1);
1699   MD5Update(&ctx, (const unsigned char *) fil, (int) strlen(fil));
1700   MD5Final(bindigest, &ctx);
1701   sprintf(filename,
1702           CACHE_REFNAME "/" "%02x%02x%02x%02x%02x%02x%02x%02x"
1703           "%02x%02x%02x%02x%02x%02x%02x%02x" ".ref", bindigest[0], bindigest[1],
1704           bindigest[2], bindigest[3], bindigest[4], bindigest[5], bindigest[6],
1705           bindigest[7], bindigest[8], bindigest[9], bindigest[10],
1706           bindigest[11], bindigest[12], bindigest[13], bindigest[14],
1707           bindigest[15]);
1708 }
1709 
1710 /* note: return a local filename */
url_savename_refname_fullpath(httrackp * opt,const char * adr,const char * fil)1711 char *url_savename_refname_fullpath(httrackp * opt, const char *adr,
1712                                     const char *fil) {
1713   char digest_filename[64];
1714 
1715   url_savename_refname(adr, fil, digest_filename);
1716   return fconcat(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt),
1717     StringBuff(opt->path_log), digest_filename);
1718 }
1719 
1720 /* remove refname if any */
url_savename_refname_remove(httrackp * opt,const char * adr,const char * fil)1721 void url_savename_refname_remove(httrackp * opt, const char *adr,
1722                                  const char *fil) {
1723   char *filename = url_savename_refname_fullpath(opt, adr, fil);
1724 
1725   (void) UNLINK(filename);
1726 }
1727