1 /* ------------------------------------------------------------ */
2 /*
3 HTTrack Website Copier, Offline Browser for Windows and Unix
4 Copyright (C) 1998-2017 Xavier Roche and other contributors
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 Important notes:
20
21 - We hereby ask people using this source NOT to use it in purpose of grabbing
22 emails addresses, or collecting any other private information on persons.
23 This would disgrace our work, and spoil the many hours we spent on it.
24
25 Please visit our Website: http://www.httrack.com
26 */
27
28 /* ------------------------------------------------------------ */
29 /* File: httrack.c subroutines: */
30 /* robots.txt (website robot file) */
31 /* Author: Xavier Roche */
32 /* ------------------------------------------------------------ */
33
34 /* Internal engine bytecode */
35 #define HTS_INTERNAL_BYTECODE
36
37 /* specific definitions */
38 #include "htscore.h"
39 #include "htsbase.h"
40 #include "htslib.h"
41 /* END specific definitions */
42
43 #include "htsrobots.h"
44
45 // -- robots --
46
47 // fil="" : vérifier si règle déja enregistrée
checkrobots(robots_wizard * robots,const char * adr,const char * fil)48 int checkrobots(robots_wizard * robots, const char *adr, const char *fil) {
49 while(robots) {
50 if (strfield2(robots->adr, adr)) {
51 if (fil[0]) {
52 int ptr = 0;
53 char line[250];
54
55 if (strnotempty(robots->token)) {
56 do {
57 ptr += binput(robots->token + ptr, line, 200);
58 if (line[0] == '/') { // absolu
59 if (strfield(fil, line)) { // commence avec ligne
60 return -1; // interdit
61 }
62 } else { // relatif
63 if (strstrcase(fil, line)) {
64 return -1;
65 }
66 }
67 } while((strnotempty(line)) && (ptr < (int) strlen(robots->token)));
68 }
69 } else {
70 return -1;
71 }
72 }
73 robots = robots->next;
74 }
75 return 0;
76 }
checkrobots_set(robots_wizard * robots,const char * adr,const char * data)77 int checkrobots_set(robots_wizard * robots, const char *adr, const char *data) {
78 if (((int) strlen(adr)) >= sizeof(robots->adr) - 2)
79 return 0;
80 if (((int) strlen(data)) >= sizeof(robots->token) - 2)
81 return 0;
82 while(robots) {
83 if (strfield2(robots->adr, adr)) { // entrée existe
84 strcpybuff(robots->token, data);
85 #if DEBUG_ROBOTS
86 printf("robots.txt: set %s to %s\n", adr, data);
87 #endif
88 return -1;
89 } else if (!robots->next) {
90 robots->next = (robots_wizard *) calloct(1, sizeof(robots_wizard));
91 if (robots->next) {
92 robots->next->next = NULL;
93 strcpybuff(robots->next->adr, adr);
94 strcpybuff(robots->next->token, data);
95 #if DEBUG_ROBOTS
96 printf("robots.txt: new set %s to %s\n", adr, data);
97 #endif
98 }
99 #if DEBUG_ROBOTS
100 else
101 printf("malloc error!!\n");
102 #endif
103 }
104 robots = robots->next;
105 }
106 return 0;
107 }
checkrobots_free(robots_wizard * robots)108 void checkrobots_free(robots_wizard * robots) {
109 if (robots->next) {
110 checkrobots_free(robots->next);
111 freet(robots->next);
112 robots->next = NULL;
113 }
114 }
115
116 // -- robots --
117