1 /*
2    sitecopy, for managing remote web sites. Stored state handling routines.
3    Copyright (C) 1999-2006, Joe Orton <joe@manyfish.co.uk>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 
19 */
20 
21 #include "config.h"
22 
23 #include <sys/stat.h>
24 
25 #ifdef HAVE_UNISTD_H
26 #include <unistd.h>
27 #endif
28 #ifdef HAVE_STDLIB_H
29 #include <stdlib.h>
30 #endif
31 #ifdef HAVE_STRING_H
32 #include <string.h>
33 #endif
34 
35 #ifdef HAVE_LIMITS_H
36 #include <limits.h>
37 #endif
38 
39 #include <ctype.h>
40 #include <errno.h>
41 #include <stdio.h>
42 
43 #include <ne_xml.h>
44 #include <ne_dates.h>
45 #include <ne_alloc.h>
46 #include <ne_string.h>
47 
48 #include "i18n.h"
49 #include "common.h"
50 #include "sitesi.h"
51 
52 /* Use a version in the site state file:
53  * Bump the major number if a backwardly-incompatible change is made.
54  */
55 #define SITE_STATE_FILE_VERSION "1.0"
56 
57 /* Used in stored.mode to indicate no mode known. */
58 #define INVALID_MODE ((mode_t)-1)
59 
60 /* Opens the storage file for writing */
site_open_storage_file(struct site * site)61 FILE *site_open_storage_file(struct site *site)
62 {
63     if (site->storage_file == NULL) {
64 	site->storage_file = fopen(site->infofile, "w" FOPEN_BINARY_FLAGS);
65     }
66     return site->storage_file;
67 }
68 
site_close_storage_file(struct site * site)69 int site_close_storage_file(struct site *site)
70 {
71     int ret = fclose(site->storage_file);
72     site->storage_file = NULL;
73     return ret;
74 }
75 
76 /* Return escaped form of 'filename'; any XML-unsafe characters are
77  * escaped. */
fn_escape(const char * filename)78 static char *fn_escape(const char *filename)
79 {
80     const unsigned char *pnt = (const unsigned char *)filename;
81     char *ret = ne_malloc(strlen(filename) * 3 + 1), *p = ret;
82 
83     do {
84         if (!(isalnum(*pnt) || *pnt == '/' || *pnt == '.' || *pnt == '-')
85             || *pnt > 0x7f) {
86             sprintf(p, "%%%02x", *pnt);
87             p += 3;
88         } else {
89             *p++ = *(char *)pnt;
90         }
91     } while (*++pnt != '\0');
92 
93     *p = '\0';
94 
95     return ret;
96 }
97 
98 /* Return unescaped filename; reverse of fn_escape. */
fn_unescape(const char * filename)99 static char *fn_unescape(const char *filename)
100 {
101     const unsigned char *pnt = (const unsigned char *)filename;
102     char *ret = ne_malloc(strlen(filename) + 1), *p = ret;
103 
104     do {
105         if (*pnt == '%') {
106             *p = (NE_ASC2HEX(pnt[1]) << 4) & 0xf0;
107             *p++ |= (NE_ASC2HEX(pnt[2]) & 0x0f);
108             pnt += 2;
109         } else {
110             *p++ = *pnt;
111         }
112     } while (*++pnt != '\0');
113 
114     *p = '\0';
115 
116     return ret;
117 }
118 
119 /* Write out the stored state for the site.
120  * Returns 0 on success, non-zero on error. */
site_write_stored_state(struct site * site)121 int site_write_stored_state(struct site *site)
122 {
123     struct site_file *current;
124     FILE *fp = site_open_storage_file(site);
125 
126     if (fp == NULL) {
127 	return -1;
128     }
129 
130     fprintf(fp, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n");
131     fprintf(fp, "<sitestate version='" SITE_STATE_FILE_VERSION "'>\n");
132     fprintf(fp, "<options>\n");
133     fprintf(fp, " <saved-by package='" PACKAGE_NAME "'"
134 	    " version='" PACKAGE_VERSION "'/>\n");
135     if (site->state_method == state_checksum) {
136 	/* For forwards-compatibility */
137 	fprintf(fp, " <checksum-algorithm><checksum-MD5/></checksum-algorithm>\n");
138     }
139     fprintf(fp, " <state-method><state-%s/></state-method>\n",
140 	     (site->state_method==state_checksum)?"checksum":"timesize");
141     if (site->safemode) {
142 	fprintf(fp, " <safemode/>\n");
143     }
144     fprintf(fp, " <escaped-filenames/>\n");
145     fprintf(fp, "</options>\n");
146     fprintf(fp, "<items>\n");
147     /* Now write out the items */
148     for (current = site->files; current!=NULL; current = current->next) {
149 	char *fname;
150 	if (!current->stored.exists) continue;
151 	fprintf(fp, "<item>");
152 	fprintf(fp, "<type><type-%s/></type>",
153 		 (current->type==file_file)?"file":(
154 		     (current->type==file_dir)?"directory":"link"));
155         /* escape filenames correctly for XML. */
156         fname = fn_escape(current->stored.filename);
157 	fprintf(fp, "<filename>%s</filename>\n", fname);
158         ne_free(fname);
159         if (current->stored.mode != INVALID_MODE) {
160             fprintf(fp, "<protection>%03o</protection>",
161                     current->stored.mode); /* three-digit octal */
162         }
163 	switch (current->type) {
164 	case file_link:
165 	    fprintf(fp, "<linktarget>%s</linktarget>",
166 		     current->stored.linktarget);
167 	    break;
168 	case file_file:
169 	    fprintf(fp, "<size>%" NE_FMT_OFF_T "</size>",
170 		    current->stored.size);
171 	    switch (site->state_method) {
172 	    case state_checksum: {
173 		char csum[33];
174 		ne_md5_to_ascii(current->stored.checksum, csum);
175 		fprintf(fp, "<checksum>%s</checksum>", csum);
176 	    } break;
177 	    case state_timesize:
178 		fprintf(fp, "<modtime>%ld</modtime>", current->stored.time);
179 		break;
180 	    }
181 	    fprintf(fp, "<ascii>%s</ascii>",
182 		     current->stored.ascii?"<true/>":"<false/>");
183 	    if (current->server.exists) {
184 		fprintf(fp, "<server-modtime>%ld</server-modtime>",
185 			 current->server.time);
186 	    }
187 	    break;
188 	case file_dir:
189 	    /* nothing to do */
190 	    break;
191 	}
192 	fprintf(fp, "</item>\n");
193     }
194     fprintf(fp, "</items>\n");
195     fprintf(fp, "</sitestate>\n");
196     site->stored_state_method = site->state_method;
197     return site_close_storage_file(site);
198 }
199 
200 /* neon ne_xml-based XML parsing */
201 
202 #define ELM_BASE 500
203 #define SITE_ELM_sitestate (ELM_BASE + 1)
204 #define SITE_ELM_options (ELM_BASE + 2)
205 #define SITE_ELM_opt_saved_by (ELM_BASE + 3)
206 #define SITE_ELM_opt_checksum (ELM_BASE + 4)
207 #define SITE_ELM_opt_checksum_md5 (ELM_BASE + 5)
208 #define SITE_ELM_opt_state_method (ELM_BASE + 6)
209 #define SITE_ELM_opt_state_method_timesize (ELM_BASE + 7)
210 #define SITE_ELM_opt_state_method_checksum (ELM_BASE + 8)
211 #define SITE_ELM_items (ELM_BASE + 9)
212 #define SITE_ELM_item (ELM_BASE + 10)
213 #define SITE_ELM_type (ELM_BASE + 11)
214 #define SITE_ELM_type_file (ELM_BASE + 12)
215 #define SITE_ELM_type_directory (ELM_BASE + 13)
216 #define SITE_ELM_type_link (ELM_BASE + 14)
217 #define SITE_ELM_filename (ELM_BASE + 15)
218 #define SITE_ELM_size (ELM_BASE + 16)
219 #define SITE_ELM_modtime (ELM_BASE + 17)
220 #define SITE_ELM_ascii (ELM_BASE + 18)
221 #define SITE_ELM_linktarget (ELM_BASE + 19)
222 #define SITE_ELM_checksum (ELM_BASE + 20)
223 #define SITE_ELM_protection (ELM_BASE + 21)
224 #define SITE_ELM_server_modtime (ELM_BASE + 22)
225 #define SITE_ELM_true (ELM_BASE + 23)
226 #define SITE_ELM_false (ELM_BASE + 24)
227 
228 static const struct ne_xml_idmap elmmap[] = {
229     { "", "sitestate", SITE_ELM_sitestate },
230     { "", "options", SITE_ELM_options },
231     { "", "saved-by", SITE_ELM_opt_saved_by },
232     { "", "checksum-algorithm", SITE_ELM_opt_checksum },
233     { "", "checksum-MD5", SITE_ELM_opt_checksum_md5 },
234     { "", "state-method", SITE_ELM_opt_state_method },
235     { "", "state-timesize", SITE_ELM_opt_state_method_timesize },
236     { "", "state-checksum", SITE_ELM_opt_state_method_checksum },
237     { "", "items", SITE_ELM_items },
238     { "", "item", SITE_ELM_item },
239     { "", "type", SITE_ELM_type },
240     { "", "type-file", SITE_ELM_type_file },
241     { "", "type-directory", SITE_ELM_type_directory },
242     { "", "type-link", SITE_ELM_type_link },
243     { "", "filename", SITE_ELM_filename },
244     { "", "size", SITE_ELM_size },
245     { "", "modtime", SITE_ELM_modtime },
246     { "", "ascii", SITE_ELM_ascii },
247     { "", "linktarget", SITE_ELM_linktarget },
248     { "", "checksum", SITE_ELM_checksum },
249     { "", "protection", SITE_ELM_protection },
250     { "", "server-modtime", SITE_ELM_server_modtime },
251     { "", "true", SITE_ELM_true },
252     { "", "false", SITE_ELM_false }
253 };
254 
255 struct site_xmldoc {
256     ne_xml_parser *parser;
257     struct site *site;
258     /* What we've collected so far */
259     enum file_type type;
260     struct file_state stored;
261     struct file_state server;
262     ne_buffer *cdata;
263     unsigned int truth:2; /* 0: invalid, 1: true, 2: false */
264 };
265 
start_element(void * userdata,int parent,const char * nspace,const char * name,const char ** atts)266 static int start_element(void *userdata, int parent,
267                          const char *nspace, const char *name,
268                          const char **atts)
269 {
270     int state = ne_xml_mapid(elmmap, NE_XML_MAPLEN(elmmap), nspace, name);
271     struct site_xmldoc *doc = userdata;
272 
273     if (state)
274         ne_buffer_clear(doc->cdata);
275 
276     if (state == SITE_ELM_item) {
277         /* Clear current stored state */
278         memset(&doc->stored, 0, sizeof doc->stored);
279         /* Initialize perms bits to invalid state */
280         doc->stored.mode = INVALID_MODE;
281     }
282 
283     if (state == SITE_ELM_ascii) {
284         doc->truth = 0;
285     }
286 
287     return state;
288 }
289 
char_data(void * userdata,int state,const char * cdata,size_t len)290 static int char_data(void *userdata, int state, const char *cdata, size_t len)
291 {
292     struct site_xmldoc *doc = userdata;
293     ne_buffer_append(doc->cdata, cdata, len);
294     return 0;
295 }
296 
end_element(void * userdata,int state,const char * nspace,const char * name)297 static int end_element(void *userdata, int state,
298                        const char *nspace, const char *name)
299 {
300     struct site_xmldoc *doc = userdata;
301     const char *cdata = doc->cdata->data;
302     char err[512];
303 
304     /* Dispatch Ajax */
305     switch (state) {
306     case SITE_ELM_opt_state_method_timesize:
307 	doc->site->stored_state_method = state_timesize;
308 	break;
309     case SITE_ELM_opt_state_method_checksum:
310 	doc->site->stored_state_method = state_checksum;
311 	break;
312     case SITE_ELM_type_file:
313 	doc->type = file_file;
314 	break;
315     case SITE_ELM_type_directory:
316 	doc->type = file_dir;
317 	break;
318     case SITE_ELM_type_link:
319 	doc->type = file_link;
320 	break;
321     case SITE_ELM_filename:
322 	doc->stored.filename = fn_unescape(cdata);
323 	break;
324     case SITE_ELM_checksum:
325 	if (strlen(cdata) > 32) {
326             ne_snprintf(err, sizeof err, _("Invalid checksum at line %d"),
327                         ne_xml_currentline(doc->parser));
328             ne_xml_set_error(doc->parser, err);
329 	    return -1;
330 	} else {
331 	    /* FIXME: validate */
332 	    ne_ascii_to_md5(cdata, doc->stored.checksum);
333 #ifdef DEBUGGING
334 	    {
335 		char tmp[33];
336 		ne_md5_to_ascii(doc->stored.checksum, tmp);
337 		NE_DEBUG(DEBUG_FILES, "Checksum recoded: [%32s]\n", tmp);
338 	    }
339 #endif /* DEBUGGING */
340 	}
341 	break;
342     case SITE_ELM_size:
343 	doc->stored.size = strtoll(cdata, NULL, 10);
344 	if (doc->stored.size == LLONG_MAX) {
345         }
346 	break;
347     case SITE_ELM_protection:
348 	doc->stored.mode = strtoul(cdata, NULL, 8);
349 	break;
350     case SITE_ELM_server_modtime:
351 	doc->server.time = strtol(cdata, NULL, 10);
352 	if (doc->server.time == LONG_MIN || doc->server.time == LONG_MAX)
353             goto overflow_err;
354 	doc->server.exists = true;
355 	break;
356     case SITE_ELM_modtime:
357 	doc->stored.time = strtol(cdata, NULL, 10);
358 	if (doc->stored.time == LONG_MIN || doc->stored.time == LONG_MAX)
359             goto overflow_err;
360 	break;
361     case SITE_ELM_true:
362 	doc->truth = 1;
363 	break;
364     case SITE_ELM_false:
365 	doc->truth = 2;
366 	break;
367     case SITE_ELM_ascii:
368 	if (doc->truth) {
369 	    doc->stored.ascii = doc->truth == 1;
370 	} else {
371             ne_snprintf(err, sizeof err, _("Boolean missing in 'ascii' "
372                                            "at line %d"),
373                         ne_xml_currentline(doc->parser));
374             ne_xml_set_error(doc->parser, err);
375 	    return -1;
376 	}
377 	break;
378     case SITE_ELM_linktarget:
379 	doc->stored.linktarget = ne_strdup(cdata);
380 	break;
381     case SITE_ELM_item: {
382 	struct site_file *file;
383 	doc->stored.exists = true;
384 	file = file_set_stored(doc->type, &doc->stored, doc->site);
385 	if (doc->server.exists) {
386 	    file_state_copy(&file->server, &doc->server, doc->site);
387 	}
388 	DEBUG_DUMP_FILE_PROPS(DEBUG_FILES, file, doc->site);
389     }	break;
390     default:
391 	break;
392     }
393 
394     return 0;
395 overflow_err:
396     ne_snprintf(err, sizeof err, _("Size overflow (%s) in '%s' at line %d"),
397                 cdata, name, ne_xml_currentline(doc->parser));
398     ne_xml_set_error(doc->parser, err);
399     return -1;
400 }
401 
402 /* Read a new XML-format state storage file */
parse_storage_file(struct site * site,FILE * fp)403 static int parse_storage_file(struct site *site, FILE *fp)
404 {
405     ne_xml_parser *p;
406     struct site_xmldoc doc = {0};
407     int ret;
408 
409     doc.site = site;
410     doc.cdata = ne_buffer_create();
411 
412     doc.parser = p = ne_xml_create();
413     ne_xml_push_handler(p, start_element, char_data, end_element, &doc);
414 
415     ret = 0;
416     do {
417 	char buffer[BUFSIZ];
418 	int len;
419 	len = fread(buffer, 1, BUFSIZ, fp);
420 	if (len < BUFSIZ) {
421 	    if (feof(fp)) {
422 		ret = 1;
423 	    } else if (ferror(fp)) {
424 		ret = -1;
425 		/* And don't parse anything else... */
426 		break;
427 	    }
428 	}
429 	ne_xml_parse(p, buffer, len);
430     } while (ret == 0 && !ne_xml_failed(p));
431 
432     if (!ne_xml_failed(p)) ne_xml_parse(p, "", 0);
433 
434     if (ne_xml_failed(p)) {
435 	site->last_error = ne_strdup(ne_xml_get_error(p));
436 	ret = SITE_ERRORS;
437     } else if (ret < 0) {
438 	site->last_error = ne_strdup(strerror(errno));
439 	ret = SITE_ERRORS;
440     }
441 
442     ne_xml_destroy(p);
443 
444     return ret;
445 }
446 
site_read_stored_state(struct site * site)447 int site_read_stored_state(struct site *site)
448 {
449     FILE *fp;
450     int ret;
451 
452     NE_DEBUG(DEBUG_FILES, "Reading info file: %s\n", site->infofile);
453     fp = fopen(site->infofile, "r");
454     if (fp == NULL) {
455 	struct stat st;
456         site->last_error = ne_strdup(strerror(errno));
457 	ret = stat(site->infofile, &st);
458 	if ((ret == 0) || (errno != ENOENT)) {
459 	    /* The file exists but could not be opened for reading...
460 	     * this is an error condition. */
461 	    NE_DEBUG(DEBUG_FILES, "Stat failed %s\n", strerror(errno));
462 	    return SITE_ERRORS;
463 	} else {
464 	    NE_DEBUG(DEBUG_FILES, "Info file doesn't exist.\n");
465 	    return SITE_FAILED;
466 	}
467     }
468     ret = parse_storage_file(site, fp);
469     fclose(fp);
470     return ret;
471 }
472 
473