1 /* ------------------------------------------------------------ */
2 /*
3 HTTrack Website Copier, Offline Browser for Windows and Unix
4 Copyright (C) 1998-2017 Xavier Roche and other contributors
5 
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 
19 Important notes:
20 
21 - We hereby ask people using this source NOT to use it in purpose of grabbing
22 emails addresses, or collecting any other private information on persons.
23 This would disgrace our work, and spoil the many hours we spent on it.
24 
25 Please visit our Website: http://www.httrack.com
26 */
27 
28 /* ------------------------------------------------------------ */
29 /* File: httrack.c subroutines:                                 */
30 /*       robots.txt (website robot file)                        */
31 /* Author: Xavier Roche                                         */
32 /* ------------------------------------------------------------ */
33 
34 /* Internal engine bytecode */
35 #define HTS_INTERNAL_BYTECODE
36 
37 /* specific definitions */
38 #include "htscore.h"
39 #include "htsbase.h"
40 #include "htslib.h"
41 /* END specific definitions */
42 
43 #include "htsrobots.h"
44 
45 // -- robots --
46 
47 // fil="" : vérifier si règle déja enregistrée
checkrobots(robots_wizard * robots,const char * adr,const char * fil)48 int checkrobots(robots_wizard * robots, const char *adr, const char *fil) {
49   while(robots) {
50     if (strfield2(robots->adr, adr)) {
51       if (fil[0]) {
52         int ptr = 0;
53         char line[250];
54 
55         if (strnotempty(robots->token)) {
56           do {
57             ptr += binput(robots->token + ptr, line, 200);
58             if (line[0] == '/') {       // absolu
59               if (strfield(fil, line)) {        // commence avec ligne
60                 return -1;      // interdit
61               }
62             } else {            // relatif
63               if (strstrcase(fil, line)) {
64                 return -1;
65               }
66             }
67           } while((strnotempty(line)) && (ptr < (int) strlen(robots->token)));
68         }
69       } else {
70         return -1;
71       }
72     }
73     robots = robots->next;
74   }
75   return 0;
76 }
checkrobots_set(robots_wizard * robots,const char * adr,const char * data)77 int checkrobots_set(robots_wizard * robots, const char *adr, const char *data) {
78   if (((int) strlen(adr)) >= sizeof(robots->adr) - 2)
79     return 0;
80   if (((int) strlen(data)) >= sizeof(robots->token) - 2)
81     return 0;
82   while(robots) {
83     if (strfield2(robots->adr, adr)) {  // entrée existe
84       strcpybuff(robots->token, data);
85 #if DEBUG_ROBOTS
86       printf("robots.txt: set %s to %s\n", adr, data);
87 #endif
88       return -1;
89     } else if (!robots->next) {
90       robots->next = (robots_wizard *) calloct(1, sizeof(robots_wizard));
91       if (robots->next) {
92         robots->next->next = NULL;
93         strcpybuff(robots->next->adr, adr);
94         strcpybuff(robots->next->token, data);
95 #if DEBUG_ROBOTS
96         printf("robots.txt: new set %s to %s\n", adr, data);
97 #endif
98       }
99 #if DEBUG_ROBOTS
100       else
101         printf("malloc error!!\n");
102 #endif
103     }
104     robots = robots->next;
105   }
106   return 0;
107 }
checkrobots_free(robots_wizard * robots)108 void checkrobots_free(robots_wizard * robots) {
109   if (robots->next) {
110     checkrobots_free(robots->next);
111     freet(robots->next);
112     robots->next = NULL;
113   }
114 }
115 
116 // -- robots --
117