1 /*
2    sitecopy, for managing remote web sites.
3    Copyright (C) 1999-2006, Joe Orton <joe@manyfish.co.uk>
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 
20 #ifndef SITES_H
21 #define SITES_H
22 
23 #include "config.h"
24 
25 /* Need this for off_t, mode_t etc */
26 #include <sys/types.h>
27 
28 #include <time.h>
29 
30 #include <stdio.h> /* for FILE * for the storage_file... unfortuntely */
31 
32 #include "ne_ssl.h" /* for ne_ssl_certificate. */
33 
34 #include "common.h"
35 #include "protocol.h"
36 
37 
38 /*
39    Description of the sitecopy "Data Model"
40    ----------------------------------------
41 
42    SITES are made up of lists of FILES.  A file can be a directory, a
43    link, or an actual file, but, we call it a FILE, whichever of these
44    it actually is.  The "type" member of site_file indicates the file
45    type.
46 
47    Several STATES are associated with each file.  A state records all
48    the relevant properties of a file *at a given point in time*: its
49    filename, size, last modification time, checksum, file permissions,
50    etc etc.  Note that some properties are redundant for some files;
51    e.g., a directory has no checksum, or link target.
52 
53    It is important to realize that a "filename" is a part of a file
54    STATE, note merely another property of the file.  This is because
55    of the "at a given point in time" note... when a file is moved, its
56    filename will change, but it is conceptually the same "file".
57 
58    The first state is the LOCAL STATE.  This state is a direct mapping
59    from the file on disk, to the file in memory.  The second is the
60    STORED STATE.  This state is a copy of the local state of the file
61    - as at the last update.  By comparing the stored and local states
62    of a file, we tell whether it needs to be updated or not.  The
63    third state is the SERVER STATE, which is only used for sites in
64    'safe mode'.  This is a copy of the the state of the file *on the
65    server*, as at the last update.
66 
67    It might help to think of the stored state as a snapshot of the
68    file taken at the time of the last update.
69 
70    The server state IS different from the stored state, since, e.g.
71    the last-modification time of an uploaded file on the server is
72    different to that locally.  Try it with an FTP client.
73 
74    pre-0.9.0, we used to call the 'stored state' remotetime and
75    remotesize.  But they were misnomers, because they were nothing to
76    do with the real remote modtime and the real remote size.
77 
78    A slightly confusing flag is "file->ignored". EXCLUDED files (which
79    match a regex in site->excludes) are never added to the files list.
80    IGNORED files *are* added to the list, hence this flag. In update
81    mode, a changed file which is "ignored" is NOT uploaded to the
82    remote site.  This is the ONLY effect of the ignored flag.
83 
84 */
85 
86 /*
87    We don't currently use the 'server state' to its full potential...
88   only storing the server modification time. It would be possible to
89   do more clever things with this, such as use HTTP Etags, or
90   the HTTP Content-MD5 etc.
91 
92    There is a fourth state:
93 
94    The 'live state', or 'remote state', which is the actual state
95   of the file as held on the server (the complement of the local
96   state, as the server state is the complement of the stored state).
97   This can be used to do a 'verify' mode for sites with safety
98   turned on:
99     -> run across the entire remote site, grabbing the file state
100     into 'live state', like fetch mode except fetch mode writes it
101     into 'stored state'.
102     -> if serverstate and livestate differ, scream blue murder.
103  */
104 
105 /*
106   Within a given site, the site roots are the same for all files locally
107   and remotely. The site roots may be 0-length, eg. for FTP sites where
108   the home (login) directory is the site root directory.
109 
110   The root directories are stored as three members of site.
111       ->foo_root, ->foo_root_user, ->foo_root_isrel.
112    (where foo is remote or local)
113 
114   ->foo_root_user is what the user enters as the root in the
115   rcfile. This may have a ~/ prefix to indicate the root is to be
116   taken relative to the login directory. This is translated into a
117   usable version, in ->foo_root. foo_roo_isrel is true if this is a
118   relative directory (i.e., ->foo_root_user has a ~/ prefix).
119 
120   Example:
121 	->local_root_user = "~/html/mysite/"
122 	->local_root = "/home/ego/html/mysite/"
123 	->local_root_isrel = true;
124 
125 	->remote_root_user = "/mydir/"
126 	->remote_root = "/mydir/"
127 	->remote_root_isrel = false;
128 */
129 
130 /* The different methods of defining the state of a file at a given
131    moment in time are:
132 
133     - modification time and size
134     - checksum of contents
135     - link target
136 
137   The method chosen dictates when we need to update the remote copy of
138   the site. For a given file, exactly ONE method is used to define
139   state.  The same method is used for all files of the same type in
140   any given site.  For link files, the linktarget is always used. For
141   normal files, the user chooses between using modification time and
142   file size, or checksumming - on a per-site basis.
143 
144   For 'link' files, the 'link target' determines the state - only when
145   the link target changes, does the remote site need updating.
146 
147   Checksumming allows you to do random things to the modification
148   time, which is what RCS users want. But, it's a muuuch slower than
149   time/size. Also, moved files can be spotted more accurately using
150   checksums.
151 
152 */
153 
154 /*  Filename handling
155     -----------------
156 
157   The filename of a state is relative to the site root. It has no
158   leading slash, and directories do not have a trailing slash.  If a
159   state "does not exist" (i.e. state.exists == false), then the
160   filename is undefined.  If it does exist (i.e. exists == true), then
161   the filename is guaranteed to be defined.
162 
163   This makes filename handling in the frontend slightly awkward, since
164   for any given file, determining its filename entails checking it's
165   diff.  Consequently, the "file_name" function is provided, which,
166   given a file, returns the stored filename of a deleted file (since
167   file->local.filename is undefined), and otherwise the local
168   filename.
169 
170   To operate on the local filesystem and on the remote site via the
171   protocol driver, the file_full_remote and file_full_local functions
172   are used. Given a file state, these functions return the filename
173   that should be used to manipulate that file remotely and locally.
174 
175   These functions must only be used for states which exist (i.e., have
176   a filename); otherwise they will dereference NULL pointers. For this
177   reason, the use of these functions in the frontend is not
178   encouraged.
179 
180  */
181 
182 /* Return codes for site_update/fetch/synch */
183 /* updated okay */
184 #define SITE_OK 0
185 /* could not resolve hostname */
186 #define SITE_LOOKUP -1
187 /* Could not resolve hostname of proxy server */
188 #define SITE_PROXYLOOKUP -2
189 /* could not connect to remote host */
190 #define SITE_CONNECT -3
191 /* there were some errors when updating */
192 #define SITE_ERRORS -4
193 /* Could not authenticate user on server */
194 #define SITE_AUTH -5
195 /* Could not authenticate user on proxy server */
196 #define SITE_PROXYAUTH -6
197 /* Operation failed */
198 #define SITE_FAILED -7
199 /* Unsupported operation / protocol */
200 #define SITE_UNSUPPORTED -9
201 
202 /* For use by the frontend ONLY - never returned by site_* */
203 #define SITE_ABORTED -101
204 
205 struct site_file;
206 struct site;
207 
208 /* Which state method is in use over the site */
209 enum state_method {
210     state_timesize,
211     state_checksum
212 };
213 
214 enum file_diff {
215     file_unchanged, /* Remote file is same as local file */
216     file_changed, /* File has changed locally, and should be uploaded */
217     file_new, /* File is new locally, and should be uploaded */
218     file_deleted,  /* File deleted locally, and should be deleted remotely */
219     file_moved /* File has been moved locally, should be moved remotely */
220 };
221 
222 enum file_type {
223     file_file,
224     file_dir,
225     file_link
226 };
227 
228 struct file_state {
229     char *filename; /* the file name */
230     time_t time; /* the last-modification time of the file */
231     off_t size; /* the size of the file */
232     unsigned char checksum[16]; /* the MD5 checksum of the file */
233     char *linktarget; /* the target of the link */
234     unsigned int exists; /* whether the file exists in this state or not */
235     unsigned int ascii; /* whether the file is 'ASCII' or not */
236     mode_t mode; /* the protection modes & 0777 of the file */
237 };
238 
239 /* To Consider:
240  *
241  * - The directory is identical among many files - make a site_dir
242  * structure, sharing the char *. This could include a depth, which
243  * could enable 'forcecd' mode for relative remote directories more
244  * easily. This could also pave the way for checking whether a whole
245  * directory has moved.
246  * */
247 
248 /* File representation */
249 struct site_file {
250     /* The diff between the local and stored states. */
251     enum file_diff diff;
252 
253     /* The diff between the server and live states. */
254     enum file_diff live_diff;
255 
256     enum file_type type;
257 
258     unsigned int ignore; /* whether to ignore any changes to this file */
259 
260     /* Probably want to make the states into an array, so they can be
261      * indexed and used more generically than this. e.g.:
262      *    struct file_state states[4];
263      *    struct file_state *local, *stored, *server, *live;
264      * In file_create, set ->local = ->states[0],
265      *                     ->stored = ->states[1] etc etc.
266      * This allows file_set_local and file_set_stored to be
267      * abstracted out. Should also allow the abstract file_set to be
268      * used for site_verify.
269      */
270     struct file_state local, stored, server, live;
271 
272     /* Linked list nodes */
273     struct site_file *next;
274     struct site_file *prev;
275 };
276 
277 /* Valid file permissions mirroring values */
278 enum site_perm_modes {
279     sitep_ignore, /* Ignore file permissions */
280     sitep_exec, /* Maintain execute permissions */
281     sitep_all /* Maintain all permissions */
282 };
283 
284 /* Valid symlink handling modes */
285 enum site_symlink_modes {
286     sitesym_ignore,
287     sitesym_follow,
288     sitesym_maintain
289 };
290 
291 /* Protocol modes */
292 enum site_protocol_modes {
293     siteproto_ftp,
294     siteproto_dav,
295     siteproto_rsh,
296     siteproto_sftp,
297     siteproto_unknown
298 };
299 
300 /*
301 
302  fnlist - lists of fnmatch() patterns
303  ------------------------------------
304 
305  There are two types of pattern - patterns with paths, and patterns
306  without paths. The rcfile entry
307      exclude "/backup/back*"
308  excludes files matching back* in the asda/ directory of the site. Whereas,
309  the entry
310      exclude *~
311  excludes ALL files matching *~ throughout the site.
312 
313  Internally, the leading slash of with-path patterns must be stripped,
314  since they are used match against filenames, which don't have a
315  leading slash.  If the pattern *did* have a leading slash, then the
316  'haspath' field must be set to 'true'.
317 
318  e.g.
319     exclude *.txt
320     exclude /asda/back*
321 
322  ->  fnlist list:
323 	{ "*.txt", false, ... } ,
324 	{ "asda/back*", true, ... }
325 
326 */
327 
328 struct fnlist {
329     char *pattern;
330     unsigned int haspath;
331     struct fnlist *next;
332     struct fnlist *prev;
333 };
334 
335 
336 struct site_host {
337     char *hostname;
338     int port;
339     char *username;
340     char *password;
341 };
342 
343 /* This represents a site */
344 struct site {
345 
346     char *name; /* symbolic name for site */
347     char *url; /* URL for site - used by flatlist mode */
348 
349     struct site_host server;
350     struct site_host proxy;
351 
352     enum site_protocol_modes protocol;
353     char *proto_string; /* protocol name used in rcfile. */
354     const struct proto_driver *driver; /* the protocol driver routines */
355 
356     char *remote_root; /* root directory of site on server */
357     char *remote_root_user; /* what the user gave/sees as the remote root */
358     unsigned int remote_isrel; /* is the remote root dir relative to login dir? (~/) */
359     char *local_root; /* root directory of site locally */
360     char *local_root_user; /* what the user gave/sees as the remote root */
361     unsigned int local_isrel; /* is the local root directory relative to home dir */
362 
363     char *infofile;  /* local storage file in ~/.sitecopy/  */
364     char *certfile;  /* file in which cached SSL certificate is stored. */
365     FILE *storage_file;  /* The file opened for the storage file */
366 
367     char *client_cert; /* client certificate */
368     ne_ssl_certificate *server_cert; /* pre-cached server cert */
369 
370     /* Options for the site */
371     enum site_perm_modes perms; /* permissions maintenance mode */
372     int dirperms; /* directory permissions maintenance mode */
373     enum site_symlink_modes symlinks; /* symlink handline mode */
374 
375     /* Protocol-driver specific options here */
376     unsigned int ftp_pasv_mode;
377     unsigned int ftp_echo_quit;
378     unsigned int ftp_forcecd;
379     unsigned int ftp_use_cwd;
380     unsigned int http_use_expect;
381     unsigned int http_limit;
382     unsigned int http_secure;
383     unsigned int http_tolerant;
384     char *rsh_cmd;
385     char *rcp_cmd;
386 
387     unsigned int nodelete; /* whether to delete any files remotely */
388     unsigned int checkmoved; /* whether to check for moved files */
389     unsigned int checkrenames; /* whether to check for renamed files */
390     unsigned int nooverwrite; /* whether to delete changed files before overwriting */
391     unsigned int safemode;  /* whether we are in safe mode or not */
392     unsigned int lowercase; /* whether to use all-lowercase filenames remotely */
393     unsigned int tempupload; /* whether to use temporary files when uploading */
394 
395     /* These are parameters to site_update really. */
396     unsigned int keep_going; /* if true, keep going past errors in updates */
397 
398     unsigned int use_this; /* whether the site is being operated on - handy
399 			      * for the console FE */
400 
401     /* We have two 'is_different' fields. This is unintuitive, since
402      * if the local site is different from the remote site, the
403      * reverse must also be true, right? Wrong, because of 'ignores'
404      * and 'nodelete': using these, a change can be made to the local
405      * site which will NOT be mirrored by update mode, but WILL be
406      * affected by synch mode. */
407     unsigned int local_is_different; /* use this if you want to know whether
408 					* site_synch will do anything */
409     unsigned int remote_is_different; /* use this if you want to know whether
410 					 * site_update will do anything */
411 
412     enum state_method state_method; /* as dictated by rcfile */
413     enum state_method stored_state_method; /* as used in info file */
414 
415     /* Files which are excluded */
416     struct fnlist *excludes;
417     /* Files which are ignored */
418     struct fnlist *ignores;
419     /* Files which are ASCII */
420     struct fnlist *asciis;
421 
422     struct site_file *files; /* list of files */
423     struct site_file *files_tail; /* end of the list */
424 
425     /* Some useful counts for the files */
426     int numnew; /* number of new files */
427     int numchanged; /* number of changed files */
428     int numignored; /* number of changed files which are being ignored */
429     int numdeleted; /* number of deleted files */
430     int nummoved; /* number of moved files */
431     int numunchanged; /* number of unchanged files */
432 
433     off_t totalnew; /* total file size of new files */
434     off_t totalchanged; /* total file size of changed files */
435 
436     char *last_error;
437 
438     /* "Critical section" handling: do NOT modify */
439     int critical;
440 
441     struct site *next;
442     struct site *prev;
443 };
444 
445 /* The list of all sites as read from the rcfile */
446 extern struct site *all_sites;
447 
448 /* Open the storage file for writing, pre-update.
449  * Returns site->storage_file or NULL on error. */
450 FILE *site_open_storage_file(struct site *site);
451 int site_close_storage_file(struct site *site);
452 
453 void fe_initialize(void);
454 
455 /* This reads the files information for the given site - both the
456  * local and remote ones. Returns:
457  *   SITE_OK      on success
458  *   SITE_ERRORS  on corrupt info file
459  *   SITE_FAILED  on non-existent info file
460  */
461 int site_readfiles(struct site *);
462 
463 /* This makes out like we've just done a successful site_update. */
464 
465 /* This writes the stored files list back to disk.
466  * Returns 0 on success or -1 on failure. */
467 int site_write_stored_state(struct site *);
468 
469 /* This merges the stored files list in the storage file with the
470  * in-memory files list of the site. Returns:
471  *   SITE_OK      on success
472  *   SITE_ERRORS  on corrupt info file
473  *   SITE_FAILED  on non-existent info file
474  */
475 int site_read_stored_state(struct site *site);
476 
477 /* This merges the local files on disk with the in-memory files list
478  * of the site. */
479 void site_read_local_state(struct site *site);
480 
481 /* Initialize the site - pretend there are NO files held remotely */
482 void site_initialize(struct site *);
483 
484 /* Catch up the site - mark all files as updated remotely */
485 void site_catchup(struct site *site);
486 
487 /* Verify that that the stored state of the remote site matches the
488  * actual make up of the remote site. Returns:
489  *    SITE_OK       if states match up
490  *    SITE_ERRORS   if states do not match
491  *    SITE_FAILED   if the comparison could not begin (e.g. auth failure).
492  *
493  * If SITE_ERRORS is returned, then *numremoved is set to the number
494  * of files missing from the server, and fe_verified() will have been
495  * called for any changed or added to the remote site.  */
496 int site_verify(struct site *site, int *numremoved);
497 
498 /* Update the remote site.
499  * fe_updating, fe_updated, fe_setting_perms, fe_set_perms may be
500  * called during the update. fe_can_update may be called during the
501  * update if site->prompting is set.
502  *
503  * Returns:
504  *   SITE_ERRORS if an error occurred which was reported using
505  *     the fe_update_* functions. site->last_error is undefined.
506  *   SITE_FAILED if the update never began, and you should
507  *     look at site->last_error for the error message.
508  *   SITE_* for other errors. site->last_error is undefined.
509  */
510 int site_update(struct site *site);
511 
512 /* Finds a site with the given name, and returns a pointer to it.
513  * If no site of given name is found, returns NULL
514  */
515 struct site *site_find(const char *sitename);
516 
517 /* Syncronizes the local site with the remote copy.
518  * fe_synch_* will be called during the synchronize.
519  *
520  * Returns:
521  *   SITE_ERRORS if an error occurred which was reported using
522  *     the fe_* functions.
523  *   SITE_FAILED if the update never began, and you should
524  *     look at site->last_error for the error message.
525  *   SITE_* for other errors.
526  *
527  */
528 int site_synch(struct site *site);
529 
530 /* Updates the files listing from the remote site.
531  *
532  * fe_fetch_found() will be called for each file that is found
533  * in the fetch.  If the site is using checksumming, after the fe_fetch_found
534  * calls are made, fe_checksumming/fe_checksummed call pairs will be made
535  * for each file on the remote site.
536  * Returns:
537  *   SITE_ERRORS if an error occurred which was reported using
538  *     the fe_* functions.
539  *   SITE_FAILED if the update never began, and you should
540  *     look at site->last_error for the error message.
541  *   SITE_* for other errors.
542  */
543 int site_fetch(struct site *site);
544 
545 /* Destroys all the files... use before doing a second
546  * site_readfiles on a site. */
547 void site_destroy(struct site *the_site);
548 
549 /* Destroys the stored state of the site. Use before calling
550  * site_fetch, or site_read_stored_state. */
551 void site_destroy_stored(struct site *site);
552 
553 /* Outputs the flat listing style output for the given site
554  * to the given stream
555  */
556 void site_flatlist(FILE *f, struct site *the_site);
557 
558 /* Returns a pseudo-URL for the given site, in a statically allocated
559  * memory location which will be overwritten by subsequent calls to
560  * the function. (-> NOT thread-safe) */
561 const char *site_pseudourl(struct site *the_site);
562 
563 char *file_full_remote(struct file_state *state, struct site *site);
564 char *file_full_local(struct file_state *state, struct site *site);
565 char *file_name(const struct site_file *file);
566 
567 struct fnlist *fnlist_prepend(struct fnlist **list);
568 struct fnlist *fnlist_deep_copy(const struct fnlist *src);
569 
570 const char *site_get_protoname(struct site *site);
571 
572 #endif /* SITES_H */
573