1 /* 2 sitecopy, for managing remote web sites. 3 Copyright (C) 1999-2006, Joe Orton <joe@manyfish.co.uk> 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 20 #ifndef SITES_H 21 #define SITES_H 22 23 #include "config.h" 24 25 /* Need this for off_t, mode_t etc */ 26 #include <sys/types.h> 27 28 #include <time.h> 29 30 #include <stdio.h> /* for FILE * for the storage_file... unfortuntely */ 31 32 #include "ne_ssl.h" /* for ne_ssl_certificate. */ 33 34 #include "common.h" 35 #include "protocol.h" 36 37 38 /* 39 Description of the sitecopy "Data Model" 40 ---------------------------------------- 41 42 SITES are made up of lists of FILES. A file can be a directory, a 43 link, or an actual file, but, we call it a FILE, whichever of these 44 it actually is. The "type" member of site_file indicates the file 45 type. 46 47 Several STATES are associated with each file. A state records all 48 the relevant properties of a file *at a given point in time*: its 49 filename, size, last modification time, checksum, file permissions, 50 etc etc. Note that some properties are redundant for some files; 51 e.g., a directory has no checksum, or link target. 52 53 It is important to realize that a "filename" is a part of a file 54 STATE, note merely another property of the file. This is because 55 of the "at a given point in time" note... when a file is moved, its 56 filename will change, but it is conceptually the same "file". 57 58 The first state is the LOCAL STATE. This state is a direct mapping 59 from the file on disk, to the file in memory. The second is the 60 STORED STATE. This state is a copy of the local state of the file 61 - as at the last update. By comparing the stored and local states 62 of a file, we tell whether it needs to be updated or not. The 63 third state is the SERVER STATE, which is only used for sites in 64 'safe mode'. This is a copy of the the state of the file *on the 65 server*, as at the last update. 66 67 It might help to think of the stored state as a snapshot of the 68 file taken at the time of the last update. 69 70 The server state IS different from the stored state, since, e.g. 71 the last-modification time of an uploaded file on the server is 72 different to that locally. Try it with an FTP client. 73 74 pre-0.9.0, we used to call the 'stored state' remotetime and 75 remotesize. But they were misnomers, because they were nothing to 76 do with the real remote modtime and the real remote size. 77 78 A slightly confusing flag is "file->ignored". EXCLUDED files (which 79 match a regex in site->excludes) are never added to the files list. 80 IGNORED files *are* added to the list, hence this flag. In update 81 mode, a changed file which is "ignored" is NOT uploaded to the 82 remote site. This is the ONLY effect of the ignored flag. 83 84 */ 85 86 /* 87 We don't currently use the 'server state' to its full potential... 88 only storing the server modification time. It would be possible to 89 do more clever things with this, such as use HTTP Etags, or 90 the HTTP Content-MD5 etc. 91 92 There is a fourth state: 93 94 The 'live state', or 'remote state', which is the actual state 95 of the file as held on the server (the complement of the local 96 state, as the server state is the complement of the stored state). 97 This can be used to do a 'verify' mode for sites with safety 98 turned on: 99 -> run across the entire remote site, grabbing the file state 100 into 'live state', like fetch mode except fetch mode writes it 101 into 'stored state'. 102 -> if serverstate and livestate differ, scream blue murder. 103 */ 104 105 /* 106 Within a given site, the site roots are the same for all files locally 107 and remotely. The site roots may be 0-length, eg. for FTP sites where 108 the home (login) directory is the site root directory. 109 110 The root directories are stored as three members of site. 111 ->foo_root, ->foo_root_user, ->foo_root_isrel. 112 (where foo is remote or local) 113 114 ->foo_root_user is what the user enters as the root in the 115 rcfile. This may have a ~/ prefix to indicate the root is to be 116 taken relative to the login directory. This is translated into a 117 usable version, in ->foo_root. foo_roo_isrel is true if this is a 118 relative directory (i.e., ->foo_root_user has a ~/ prefix). 119 120 Example: 121 ->local_root_user = "~/html/mysite/" 122 ->local_root = "/home/ego/html/mysite/" 123 ->local_root_isrel = true; 124 125 ->remote_root_user = "/mydir/" 126 ->remote_root = "/mydir/" 127 ->remote_root_isrel = false; 128 */ 129 130 /* The different methods of defining the state of a file at a given 131 moment in time are: 132 133 - modification time and size 134 - checksum of contents 135 - link target 136 137 The method chosen dictates when we need to update the remote copy of 138 the site. For a given file, exactly ONE method is used to define 139 state. The same method is used for all files of the same type in 140 any given site. For link files, the linktarget is always used. For 141 normal files, the user chooses between using modification time and 142 file size, or checksumming - on a per-site basis. 143 144 For 'link' files, the 'link target' determines the state - only when 145 the link target changes, does the remote site need updating. 146 147 Checksumming allows you to do random things to the modification 148 time, which is what RCS users want. But, it's a muuuch slower than 149 time/size. Also, moved files can be spotted more accurately using 150 checksums. 151 152 */ 153 154 /* Filename handling 155 ----------------- 156 157 The filename of a state is relative to the site root. It has no 158 leading slash, and directories do not have a trailing slash. If a 159 state "does not exist" (i.e. state.exists == false), then the 160 filename is undefined. If it does exist (i.e. exists == true), then 161 the filename is guaranteed to be defined. 162 163 This makes filename handling in the frontend slightly awkward, since 164 for any given file, determining its filename entails checking it's 165 diff. Consequently, the "file_name" function is provided, which, 166 given a file, returns the stored filename of a deleted file (since 167 file->local.filename is undefined), and otherwise the local 168 filename. 169 170 To operate on the local filesystem and on the remote site via the 171 protocol driver, the file_full_remote and file_full_local functions 172 are used. Given a file state, these functions return the filename 173 that should be used to manipulate that file remotely and locally. 174 175 These functions must only be used for states which exist (i.e., have 176 a filename); otherwise they will dereference NULL pointers. For this 177 reason, the use of these functions in the frontend is not 178 encouraged. 179 180 */ 181 182 /* Return codes for site_update/fetch/synch */ 183 /* updated okay */ 184 #define SITE_OK 0 185 /* could not resolve hostname */ 186 #define SITE_LOOKUP -1 187 /* Could not resolve hostname of proxy server */ 188 #define SITE_PROXYLOOKUP -2 189 /* could not connect to remote host */ 190 #define SITE_CONNECT -3 191 /* there were some errors when updating */ 192 #define SITE_ERRORS -4 193 /* Could not authenticate user on server */ 194 #define SITE_AUTH -5 195 /* Could not authenticate user on proxy server */ 196 #define SITE_PROXYAUTH -6 197 /* Operation failed */ 198 #define SITE_FAILED -7 199 /* Unsupported operation / protocol */ 200 #define SITE_UNSUPPORTED -9 201 202 /* For use by the frontend ONLY - never returned by site_* */ 203 #define SITE_ABORTED -101 204 205 struct site_file; 206 struct site; 207 208 /* Which state method is in use over the site */ 209 enum state_method { 210 state_timesize, 211 state_checksum 212 }; 213 214 enum file_diff { 215 file_unchanged, /* Remote file is same as local file */ 216 file_changed, /* File has changed locally, and should be uploaded */ 217 file_new, /* File is new locally, and should be uploaded */ 218 file_deleted, /* File deleted locally, and should be deleted remotely */ 219 file_moved /* File has been moved locally, should be moved remotely */ 220 }; 221 222 enum file_type { 223 file_file, 224 file_dir, 225 file_link 226 }; 227 228 struct file_state { 229 char *filename; /* the file name */ 230 time_t time; /* the last-modification time of the file */ 231 off_t size; /* the size of the file */ 232 unsigned char checksum[16]; /* the MD5 checksum of the file */ 233 char *linktarget; /* the target of the link */ 234 unsigned int exists; /* whether the file exists in this state or not */ 235 unsigned int ascii; /* whether the file is 'ASCII' or not */ 236 mode_t mode; /* the protection modes & 0777 of the file */ 237 }; 238 239 /* To Consider: 240 * 241 * - The directory is identical among many files - make a site_dir 242 * structure, sharing the char *. This could include a depth, which 243 * could enable 'forcecd' mode for relative remote directories more 244 * easily. This could also pave the way for checking whether a whole 245 * directory has moved. 246 * */ 247 248 /* File representation */ 249 struct site_file { 250 /* The diff between the local and stored states. */ 251 enum file_diff diff; 252 253 /* The diff between the server and live states. */ 254 enum file_diff live_diff; 255 256 enum file_type type; 257 258 unsigned int ignore; /* whether to ignore any changes to this file */ 259 260 /* Probably want to make the states into an array, so they can be 261 * indexed and used more generically than this. e.g.: 262 * struct file_state states[4]; 263 * struct file_state *local, *stored, *server, *live; 264 * In file_create, set ->local = ->states[0], 265 * ->stored = ->states[1] etc etc. 266 * This allows file_set_local and file_set_stored to be 267 * abstracted out. Should also allow the abstract file_set to be 268 * used for site_verify. 269 */ 270 struct file_state local, stored, server, live; 271 272 /* Linked list nodes */ 273 struct site_file *next; 274 struct site_file *prev; 275 }; 276 277 /* Valid file permissions mirroring values */ 278 enum site_perm_modes { 279 sitep_ignore, /* Ignore file permissions */ 280 sitep_exec, /* Maintain execute permissions */ 281 sitep_all /* Maintain all permissions */ 282 }; 283 284 /* Valid symlink handling modes */ 285 enum site_symlink_modes { 286 sitesym_ignore, 287 sitesym_follow, 288 sitesym_maintain 289 }; 290 291 /* Protocol modes */ 292 enum site_protocol_modes { 293 siteproto_ftp, 294 siteproto_dav, 295 siteproto_rsh, 296 siteproto_sftp, 297 siteproto_unknown 298 }; 299 300 /* 301 302 fnlist - lists of fnmatch() patterns 303 ------------------------------------ 304 305 There are two types of pattern - patterns with paths, and patterns 306 without paths. The rcfile entry 307 exclude "/backup/back*" 308 excludes files matching back* in the asda/ directory of the site. Whereas, 309 the entry 310 exclude *~ 311 excludes ALL files matching *~ throughout the site. 312 313 Internally, the leading slash of with-path patterns must be stripped, 314 since they are used match against filenames, which don't have a 315 leading slash. If the pattern *did* have a leading slash, then the 316 'haspath' field must be set to 'true'. 317 318 e.g. 319 exclude *.txt 320 exclude /asda/back* 321 322 -> fnlist list: 323 { "*.txt", false, ... } , 324 { "asda/back*", true, ... } 325 326 */ 327 328 struct fnlist { 329 char *pattern; 330 unsigned int haspath; 331 struct fnlist *next; 332 struct fnlist *prev; 333 }; 334 335 336 struct site_host { 337 char *hostname; 338 int port; 339 char *username; 340 char *password; 341 }; 342 343 /* This represents a site */ 344 struct site { 345 346 char *name; /* symbolic name for site */ 347 char *url; /* URL for site - used by flatlist mode */ 348 349 struct site_host server; 350 struct site_host proxy; 351 352 enum site_protocol_modes protocol; 353 char *proto_string; /* protocol name used in rcfile. */ 354 const struct proto_driver *driver; /* the protocol driver routines */ 355 356 char *remote_root; /* root directory of site on server */ 357 char *remote_root_user; /* what the user gave/sees as the remote root */ 358 unsigned int remote_isrel; /* is the remote root dir relative to login dir? (~/) */ 359 char *local_root; /* root directory of site locally */ 360 char *local_root_user; /* what the user gave/sees as the remote root */ 361 unsigned int local_isrel; /* is the local root directory relative to home dir */ 362 363 char *infofile; /* local storage file in ~/.sitecopy/ */ 364 char *certfile; /* file in which cached SSL certificate is stored. */ 365 FILE *storage_file; /* The file opened for the storage file */ 366 367 char *client_cert; /* client certificate */ 368 ne_ssl_certificate *server_cert; /* pre-cached server cert */ 369 370 /* Options for the site */ 371 enum site_perm_modes perms; /* permissions maintenance mode */ 372 int dirperms; /* directory permissions maintenance mode */ 373 enum site_symlink_modes symlinks; /* symlink handline mode */ 374 375 /* Protocol-driver specific options here */ 376 unsigned int ftp_pasv_mode; 377 unsigned int ftp_echo_quit; 378 unsigned int ftp_forcecd; 379 unsigned int ftp_use_cwd; 380 unsigned int http_use_expect; 381 unsigned int http_limit; 382 unsigned int http_secure; 383 unsigned int http_tolerant; 384 char *rsh_cmd; 385 char *rcp_cmd; 386 387 unsigned int nodelete; /* whether to delete any files remotely */ 388 unsigned int checkmoved; /* whether to check for moved files */ 389 unsigned int checkrenames; /* whether to check for renamed files */ 390 unsigned int nooverwrite; /* whether to delete changed files before overwriting */ 391 unsigned int safemode; /* whether we are in safe mode or not */ 392 unsigned int lowercase; /* whether to use all-lowercase filenames remotely */ 393 unsigned int tempupload; /* whether to use temporary files when uploading */ 394 395 /* These are parameters to site_update really. */ 396 unsigned int keep_going; /* if true, keep going past errors in updates */ 397 398 unsigned int use_this; /* whether the site is being operated on - handy 399 * for the console FE */ 400 401 /* We have two 'is_different' fields. This is unintuitive, since 402 * if the local site is different from the remote site, the 403 * reverse must also be true, right? Wrong, because of 'ignores' 404 * and 'nodelete': using these, a change can be made to the local 405 * site which will NOT be mirrored by update mode, but WILL be 406 * affected by synch mode. */ 407 unsigned int local_is_different; /* use this if you want to know whether 408 * site_synch will do anything */ 409 unsigned int remote_is_different; /* use this if you want to know whether 410 * site_update will do anything */ 411 412 enum state_method state_method; /* as dictated by rcfile */ 413 enum state_method stored_state_method; /* as used in info file */ 414 415 /* Files which are excluded */ 416 struct fnlist *excludes; 417 /* Files which are ignored */ 418 struct fnlist *ignores; 419 /* Files which are ASCII */ 420 struct fnlist *asciis; 421 422 struct site_file *files; /* list of files */ 423 struct site_file *files_tail; /* end of the list */ 424 425 /* Some useful counts for the files */ 426 int numnew; /* number of new files */ 427 int numchanged; /* number of changed files */ 428 int numignored; /* number of changed files which are being ignored */ 429 int numdeleted; /* number of deleted files */ 430 int nummoved; /* number of moved files */ 431 int numunchanged; /* number of unchanged files */ 432 433 off_t totalnew; /* total file size of new files */ 434 off_t totalchanged; /* total file size of changed files */ 435 436 char *last_error; 437 438 /* "Critical section" handling: do NOT modify */ 439 int critical; 440 441 struct site *next; 442 struct site *prev; 443 }; 444 445 /* The list of all sites as read from the rcfile */ 446 extern struct site *all_sites; 447 448 /* Open the storage file for writing, pre-update. 449 * Returns site->storage_file or NULL on error. */ 450 FILE *site_open_storage_file(struct site *site); 451 int site_close_storage_file(struct site *site); 452 453 void fe_initialize(void); 454 455 /* This reads the files information for the given site - both the 456 * local and remote ones. Returns: 457 * SITE_OK on success 458 * SITE_ERRORS on corrupt info file 459 * SITE_FAILED on non-existent info file 460 */ 461 int site_readfiles(struct site *); 462 463 /* This makes out like we've just done a successful site_update. */ 464 465 /* This writes the stored files list back to disk. 466 * Returns 0 on success or -1 on failure. */ 467 int site_write_stored_state(struct site *); 468 469 /* This merges the stored files list in the storage file with the 470 * in-memory files list of the site. Returns: 471 * SITE_OK on success 472 * SITE_ERRORS on corrupt info file 473 * SITE_FAILED on non-existent info file 474 */ 475 int site_read_stored_state(struct site *site); 476 477 /* This merges the local files on disk with the in-memory files list 478 * of the site. */ 479 void site_read_local_state(struct site *site); 480 481 /* Initialize the site - pretend there are NO files held remotely */ 482 void site_initialize(struct site *); 483 484 /* Catch up the site - mark all files as updated remotely */ 485 void site_catchup(struct site *site); 486 487 /* Verify that that the stored state of the remote site matches the 488 * actual make up of the remote site. Returns: 489 * SITE_OK if states match up 490 * SITE_ERRORS if states do not match 491 * SITE_FAILED if the comparison could not begin (e.g. auth failure). 492 * 493 * If SITE_ERRORS is returned, then *numremoved is set to the number 494 * of files missing from the server, and fe_verified() will have been 495 * called for any changed or added to the remote site. */ 496 int site_verify(struct site *site, int *numremoved); 497 498 /* Update the remote site. 499 * fe_updating, fe_updated, fe_setting_perms, fe_set_perms may be 500 * called during the update. fe_can_update may be called during the 501 * update if site->prompting is set. 502 * 503 * Returns: 504 * SITE_ERRORS if an error occurred which was reported using 505 * the fe_update_* functions. site->last_error is undefined. 506 * SITE_FAILED if the update never began, and you should 507 * look at site->last_error for the error message. 508 * SITE_* for other errors. site->last_error is undefined. 509 */ 510 int site_update(struct site *site); 511 512 /* Finds a site with the given name, and returns a pointer to it. 513 * If no site of given name is found, returns NULL 514 */ 515 struct site *site_find(const char *sitename); 516 517 /* Syncronizes the local site with the remote copy. 518 * fe_synch_* will be called during the synchronize. 519 * 520 * Returns: 521 * SITE_ERRORS if an error occurred which was reported using 522 * the fe_* functions. 523 * SITE_FAILED if the update never began, and you should 524 * look at site->last_error for the error message. 525 * SITE_* for other errors. 526 * 527 */ 528 int site_synch(struct site *site); 529 530 /* Updates the files listing from the remote site. 531 * 532 * fe_fetch_found() will be called for each file that is found 533 * in the fetch. If the site is using checksumming, after the fe_fetch_found 534 * calls are made, fe_checksumming/fe_checksummed call pairs will be made 535 * for each file on the remote site. 536 * Returns: 537 * SITE_ERRORS if an error occurred which was reported using 538 * the fe_* functions. 539 * SITE_FAILED if the update never began, and you should 540 * look at site->last_error for the error message. 541 * SITE_* for other errors. 542 */ 543 int site_fetch(struct site *site); 544 545 /* Destroys all the files... use before doing a second 546 * site_readfiles on a site. */ 547 void site_destroy(struct site *the_site); 548 549 /* Destroys the stored state of the site. Use before calling 550 * site_fetch, or site_read_stored_state. */ 551 void site_destroy_stored(struct site *site); 552 553 /* Outputs the flat listing style output for the given site 554 * to the given stream 555 */ 556 void site_flatlist(FILE *f, struct site *the_site); 557 558 /* Returns a pseudo-URL for the given site, in a statically allocated 559 * memory location which will be overwritten by subsequent calls to 560 * the function. (-> NOT thread-safe) */ 561 const char *site_pseudourl(struct site *the_site); 562 563 char *file_full_remote(struct file_state *state, struct site *site); 564 char *file_full_local(struct file_state *state, struct site *site); 565 char *file_name(const struct site_file *file); 566 567 struct fnlist *fnlist_prepend(struct fnlist **list); 568 struct fnlist *fnlist_deep_copy(const struct fnlist *src); 569 570 const char *site_get_protoname(struct site *site); 571 572 #endif /* SITES_H */ 573