1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 2 * 3 * librsync -- library for network deltas 4 * 5 * Copyright 2000, 2001, 2014, 2015 by Martin Pool <mbp@sourcefrog.net> 6 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU Lesser General Public License as published by 10 * the Free Software Foundation; either version 2.1 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 */ 22 23 /*= 24 | You should never wear your best 25 | trousers when you go out to fight for 26 | freedom and liberty. 27 | -- Henrik Ibsen 28 */ 29 30 /** \file librsync.h 31 * Public header for librsync. */ 32 #ifndef _RSYNC_H 33 # define _RSYNC_H 34 35 # include <stdio.h> 36 # include <stdint.h> 37 # include <time.h> 38 # include "librsync_export.h" 39 40 # ifdef __cplusplus 41 extern "C" { 42 # endif 43 44 /** Library version string. 45 * 46 * \sa \ref versioning */ 47 LIBRSYNC_EXPORT extern char const rs_librsync_version[]; 48 49 typedef uint8_t rs_byte_t; 50 typedef intmax_t rs_long_t; 51 52 /*= 53 | "The IETF already has more than enough 54 | RFCs that codify the obvious, make 55 | stupidity illegal, support truth, 56 | justice, and the IETF way, and generally 57 | demonstrate the author is a brilliant and 58 | valuable Contributor to The Standards 59 | Process." 60 | -- Vernon Schryver 61 */ 62 63 /** A uint32 magic number, emitted in bigendian/network order at the start of 64 * librsync files. */ 65 typedef enum { 66 /** A delta file. 67 * 68 * At present, there's only one delta format. 69 * 70 * The four-byte literal \c "rs\x026". */ 71 RS_DELTA_MAGIC = 0x72730236, 72 73 /** A signature file with MD4 signatures. 74 * 75 * Backward compatible with librsync < 1.0, but strongly deprecated because 76 * it creates a security vulnerability on files containing partly untrusted 77 * data. See <https://github.com/librsync/librsync/issues/5>. 78 * 79 * The four-byte literal \c "rs\x016". 80 * 81 * \sa rs_sig_begin() */ 82 RS_MD4_SIG_MAGIC = 0x72730136, 83 84 /** A signature file using the BLAKE2 hash. Supported from librsync 1.0. 85 * 86 * The four-byte literal \c "rs\x017". 87 * 88 * \sa rs_sig_begin() */ 89 RS_BLAKE2_SIG_MAGIC = 0x72730137, 90 91 /** A signature file with RabinKarp rollsum and MD4 hash. 92 * 93 * Uses a faster/safer rollsum, but still strongly discouraged because of 94 * MD4's security vulnerability. Supported since librsync 2.2.0. 95 * 96 * The four-byte literal \c "rs\x01F". 97 * 98 * \sa rs_sig_begin() */ 99 RS_RK_MD4_SIG_MAGIC = 0x72730146, 100 101 /** A signature file with RabinKarp rollsum and BLAKE2 hash. 102 * 103 * Uses a faster/safer rollsum together with the safer BLAKE2 hash. This is 104 * the recommended default supported since librsync 2.2.0. 105 * 106 * The four-byte literal \c "rs\x01G". 107 * 108 * \sa rs_sig_begin() */ 109 RS_RK_BLAKE2_SIG_MAGIC = 0x72730147, 110 111 } rs_magic_number; 112 113 /** Log severity levels. 114 * 115 * These are the same as syslog, at least in glibc. 116 * 117 * \sa rs_trace_set_level() \sa \ref api_trace */ 118 typedef enum { 119 RS_LOG_EMERG = 0, /**< System is unusable */ 120 RS_LOG_ALERT = 1, /**< Action must be taken immediately */ 121 RS_LOG_CRIT = 2, /**< Critical conditions */ 122 RS_LOG_ERR = 3, /**< Error conditions */ 123 RS_LOG_WARNING = 4, /**< Warning conditions */ 124 RS_LOG_NOTICE = 5, /**< Normal but significant condition */ 125 RS_LOG_INFO = 6, /**< Informational */ 126 RS_LOG_DEBUG = 7 /**< Debug-level messages */ 127 } rs_loglevel; 128 129 /** Callback to write out log messages. 130 * 131 * \param level a syslog level. 132 * 133 * \param msg message to be logged. 134 * 135 * \sa \ref api_trace */ 136 typedef void rs_trace_fn_t(rs_loglevel level, char const *msg); 137 138 /** Set the least important message severity that will be output. 139 * 140 * \sa \ref api_trace */ 141 LIBRSYNC_EXPORT void rs_trace_set_level(rs_loglevel level); 142 143 /** Set trace callback. 144 * 145 * \sa \ref api_trace */ 146 LIBRSYNC_EXPORT void rs_trace_to(rs_trace_fn_t *); 147 148 /** Default trace callback that writes to stderr. 149 * 150 * Implements ::rs_trace_fn_t, and may be passed to rs_trace_to(). 151 * 152 * \sa \ref api_trace */ 153 LIBRSYNC_EXPORT void rs_trace_stderr(rs_loglevel level, char const *msg); 154 155 /** Check whether the library was compiled with debugging trace. 156 * 157 * \returns True if the library contains trace code; otherwise false. 158 * 159 * If this returns false, then trying to turn trace on will achieve nothing. 160 * 161 * \sa \ref api_trace */ 162 LIBRSYNC_EXPORT int rs_supports_trace(void); 163 164 /** Convert \p from_len bytes at \p from_buf into a hex representation in \p 165 * to_buf, which must be twice as long plus one byte for the null terminator. */ 166 LIBRSYNC_EXPORT void rs_hexify(char *to_buf, void const *from_buf, 167 int from_len); 168 169 /** Decode a base64 buffer in place. 170 * 171 * \returns The number of binary bytes. */ 172 LIBRSYNC_EXPORT size_t rs_unbase64(char *s); 173 174 /** Encode a buffer as base64. */ 175 LIBRSYNC_EXPORT void rs_base64(unsigned char const *buf, int n, char *out); 176 177 /** Return codes from nonblocking rsync operations. 178 * 179 * \sa rs_strerror() \sa api_callbacks */ 180 typedef enum rs_result { 181 RS_DONE = 0, /**< Completed successfully. */ 182 RS_BLOCKED = 1, /**< Blocked waiting for more data. */ 183 RS_RUNNING = 2, /**< The job is still running, and not yet 184 * finished or blocked. (This value should 185 * never be seen by the application.) */ 186 RS_TEST_SKIPPED = 77, /**< Test neither passed or failed. */ 187 RS_IO_ERROR = 100, /**< Error in file or network IO. */ 188 RS_SYNTAX_ERROR = 101, /**< Command line syntax error. */ 189 RS_MEM_ERROR = 102, /**< Out of memory. */ 190 RS_INPUT_ENDED = 103, /**< Unexpected end of input file, perhaps due 191 * to a truncated file or dropped network 192 * connection. */ 193 RS_BAD_MAGIC = 104, /**< Bad magic number at start of stream. 194 * Probably not a librsync file, or possibly 195 * the wrong kind of file or from an 196 * incompatible library version. */ 197 RS_UNIMPLEMENTED = 105, /**< Author is lazy. */ 198 RS_CORRUPT = 106, /**< Unbelievable value in stream. */ 199 RS_INTERNAL_ERROR = 107, /**< Probably a library bug. */ 200 RS_PARAM_ERROR = 108 /**< Bad value passed in to library, probably 201 * an application bug. */ 202 } rs_result; 203 204 /** Return an English description of a ::rs_result value. */ 205 LIBRSYNC_EXPORT char const *rs_strerror(rs_result r); 206 207 /** Performance statistics from a librsync encoding or decoding operation. 208 * 209 * \sa api_stats \sa rs_format_stats() \sa rs_log_stats() */ 210 typedef struct rs_stats { 211 char const *op; /**< Human-readable name of current operation. 212 * For example, "delta". */ 213 int lit_cmds; /**< Number of literal commands. */ 214 rs_long_t lit_bytes; /**< Number of literal bytes. */ 215 rs_long_t lit_cmdbytes; /**< Number of bytes used in literal command 216 * headers. */ 217 218 rs_long_t copy_cmds, copy_bytes, copy_cmdbytes; 219 rs_long_t sig_cmds, sig_bytes; 220 int false_matches; 221 222 rs_long_t sig_blocks; /**< Number of blocks described by the 223 * signature. */ 224 225 size_t block_len; 226 227 rs_long_t in_bytes; /**< Total bytes read from input. */ 228 rs_long_t out_bytes; /**< Total bytes written to output. */ 229 230 time_t start, end; 231 } rs_stats_t; 232 233 /** MD4 message-digest accumulator. 234 * 235 * \sa rs_mdfour(), rs_mdfour_begin(), rs_mdfour_update(), rs_mdfour_result() */ 236 typedef struct rs_mdfour rs_mdfour_t; 237 238 LIBRSYNC_EXPORT extern const int RS_MD4_SUM_LENGTH, RS_BLAKE2_SUM_LENGTH; 239 240 # define RS_MAX_STRONG_SUM_LENGTH 32 241 242 typedef uint32_t rs_weak_sum_t; 243 typedef unsigned char rs_strong_sum_t[RS_MAX_STRONG_SUM_LENGTH]; 244 245 LIBRSYNC_EXPORT void rs_mdfour(unsigned char *out, void const *in, size_t); 246 LIBRSYNC_EXPORT void rs_mdfour_begin( /* @out@ */ rs_mdfour_t *md); 247 248 /** Feed some data into the MD4 accumulator. 249 * 250 * \param md MD4 accumulator. 251 * 252 * \param in_void Data to add. 253 * 254 * \param n Number of bytes fed in. */ 255 LIBRSYNC_EXPORT void rs_mdfour_update(rs_mdfour_t *md, void const *in_void, 256 size_t n); 257 LIBRSYNC_EXPORT void rs_mdfour_result(rs_mdfour_t *md, unsigned char *out); 258 259 /** Return a human-readable representation of statistics. 260 * 261 * The string is truncated if it does not fit. 100 characters should be 262 * sufficient space. 263 * 264 * \param stats Statistics from an encoding or decoding operation. 265 * 266 * \param buf Buffer to receive result. 267 * 268 * \param size Size of buffer. 269 * 270 * \return \p buf. 271 * 272 * \sa \ref api_stats */ 273 LIBRSYNC_EXPORT char *rs_format_stats(rs_stats_t const *stats, char *buf, 274 size_t size); 275 276 /** Write statistics into the current log as text. 277 * 278 * \sa \ref api_stats \sa \ref api_trace */ 279 LIBRSYNC_EXPORT int rs_log_stats(rs_stats_t const *stats); 280 281 /** The signature datastructure type. */ 282 typedef struct rs_signature rs_signature_t; 283 284 /** Log the rs_signature_delta match stats. */ 285 LIBRSYNC_EXPORT void rs_signature_log_stats(rs_signature_t const *sig); 286 287 /** Deep deallocation of checksums. */ 288 LIBRSYNC_EXPORT void rs_free_sumset(rs_signature_t *); 289 290 /** Dump signatures to the log. */ 291 LIBRSYNC_EXPORT void rs_sumset_dump(rs_signature_t const *); 292 293 /** Description of input and output buffers. 294 * 295 * On each call to ::rs_job_iter(), the caller can make available 296 * 297 * - #avail_in bytes of input data at #next_in 298 * 299 * - #avail_out bytes of output space at #next_out 300 * 301 * - or some of both 302 * 303 * Buffers must be allocated and passed in by the caller. 304 * 305 * On input, the buffers structure must contain the address and length of the 306 * input and output buffers. The library updates these values to indicate the 307 * amount of \b remaining buffer. So, on return, #avail_out is not the amount 308 * of output data produced, but rather the amount of output buffer space still 309 * available. 310 * 311 * This means that the values on return are consistent with the values on 312 * entry, and suitable to be passed in on a second call, but they don't 313 * directly tell you how much output data was produced. 314 * 315 * Note also that if *#avail_in is nonzero on return, then not all of the input 316 * data has been consumed. The caller should either provide more output buffer 317 * space and call ::rs_job_iter() again passing the same #next_in and 318 * #avail_in, or put the remaining input data into some persistent buffer and 319 * call rs_job_iter() with it again when there is more output space. 320 * 321 * \sa rs_job_iter() */ 322 struct rs_buffers_s { 323 /** Next input byte. 324 * 325 * References a pointer which on entry should point to the start of the 326 * data to be encoded. Updated to point to the byte after the last one 327 * consumed. */ 328 char *next_in; 329 330 /** Number of bytes available at next_in. 331 * 332 * References the length of available input. Updated to be the number of 333 * unused data bytes, which will be zero if all the input was consumed. May 334 * be zero if there is no new input, but the caller just wants to drain 335 * output. */ 336 size_t avail_in; 337 338 /** True if there is no more data after this. */ 339 int eof_in; 340 341 /** Next output byte should be put there. 342 * 343 * References a pointer which on entry points to the start of the output 344 * buffer. Updated to point to the byte after the last one filled. */ 345 char *next_out; 346 347 /** Remaining free space at next_out. 348 * 349 * References the size of available output buffer. Updated to the size of 350 * unused output buffer. */ 351 size_t avail_out; 352 }; 353 354 /** \sa ::rs_buffers_s */ 355 typedef struct rs_buffers_s rs_buffers_t; 356 357 /** Default block length, if not determined by any other factors. 358 * 359 * The 2K default assumes a typical file is about 4MB and should be OK for 360 * files up to 32G with more than 1GB ram. */ 361 # define RS_DEFAULT_BLOCK_LEN 2048 362 363 /** Default minimum strong sum length, if the filesize is unknown. 364 * 365 * This is conservative, and should be safe for files less than 45TB with a 2KB 366 * block_len, assuming no collision attack with crafted data. */ 367 # define RS_DEFAULT_MIN_STRONG_LEN 12 368 369 /** Job of work to be done. 370 * 371 * Created by functions such as rs_sig_begin(), and then iterated over by 372 * rs_job_iter(). 373 * 374 * The contents are opaque to the application, and instances are always 375 * allocated by the library. 376 * 377 * \sa \ref api_streaming \sa rs_job */ 378 typedef struct rs_job rs_job_t; 379 380 /** Run a ::rs_job state machine until it blocks (::RS_BLOCKED), returns an 381 * error, or completes (::RS_DONE). 382 * 383 * \param job Description of job state. 384 * 385 * \param buffers Pointer to structure describing input and output buffers. 386 * 387 * \return The ::rs_result that caused iteration to stop. 388 * 389 * \c buffers->eof_in should be true if there is no more data after what's in 390 * the input buffer. The final block checksum will run across whatever's in 391 * there, without trying to accumulate anything else. 392 * 393 * \sa \ref api_streaming */ 394 LIBRSYNC_EXPORT rs_result rs_job_iter(rs_job_t *job, rs_buffers_t *buffers); 395 396 /** Type of application-supplied function for rs_job_drive(). 397 * 398 * \sa \ref api_pull */ 399 typedef rs_result rs_driven_cb(rs_job_t *job, rs_buffers_t *buf, 400 void *opaque); 401 402 /** Actively process a job, by making callbacks to fill and empty the buffers 403 * until the job is done. */ 404 LIBRSYNC_EXPORT rs_result rs_job_drive(rs_job_t *job, rs_buffers_t *buf, 405 rs_driven_cb in_cb, void *in_opaque, 406 rs_driven_cb out_cb, void *out_opaque); 407 408 /** Return a pointer to the statistics in a job. */ 409 LIBRSYNC_EXPORT const rs_stats_t *rs_job_statistics(rs_job_t *job); 410 411 /** Deallocate job state. */ 412 LIBRSYNC_EXPORT rs_result rs_job_free(rs_job_t *); 413 414 /** Get or check signature arguments for a given file size. 415 * 416 * This can be used to get the recommended arguments for generating a 417 * signature. On calling, old_fsize should be set to the old file size or -1 418 * for "unknown". The magic and block_len arguments should be set to a valid 419 * value or 0 for "recommended". The strong_len input should be set to a valid 420 * value, 0 for "maximum", or -1 for "miniumum". Use strong_len=0 for the best 421 * protection against active hash collision attacks for the given magic type. 422 * Use strong_len=-1 for the smallest signature size that is safe against 423 * random hash collisions for the block_len and old_fsize. Use strong_len=20 424 * for something probably good enough against attacks with smaller signatures. 425 * On return the 0 or -1 input args will be set to recommended values and the 426 * returned result will indicate if any inputs were invalid. 427 * 428 * \param old_fsize - the original file size (-1 for "unknown"). 429 * 430 * \param *magic - the magic type to use (0 for "recommended"). 431 * 432 * \param *block_len - the block length to use (0 for "recommended"). 433 * 434 * \param *strong_len - the strongsum length to use (0 for "maximum", -1 for 435 * "minimum"). 436 * 437 * \return RS_DONE if all arguments are valid, otherwise an error code. */ 438 LIBRSYNC_EXPORT rs_result rs_sig_args(rs_long_t old_fsize, 439 rs_magic_number * magic, 440 size_t *block_len, size_t *strong_len); 441 442 /** Start generating a signature. 443 * 444 * It's recommended you use rs_sig_args() to get the recommended arguments for 445 * this based on the original file size. 446 * 447 * \return A new rs_job_t into which the old file data can be passed. 448 * 449 * \param sig_magic Signature file format to generate (0 for "recommended"). 450 * See ::rs_magic_number. 451 * 452 * \param block_len Checksum block size to use (0 for "recommended"). Larger 453 * values make the signature shorter, and the delta longer. 454 * 455 * \param strong_len Strongsum length in bytes to use (0 for "maximum", -1 for 456 * "minimum"). Smaller values make the signature shorter but increase the risk 457 * of corruption from hash collisions. 458 * 459 * \sa rs_sig_file() */ 460 LIBRSYNC_EXPORT rs_job_t *rs_sig_begin(size_t block_len, size_t strong_len, 461 rs_magic_number sig_magic); 462 463 /** Prepare to compute a streaming delta. 464 * 465 * \todo Add a version of this that takes a ::rs_magic_number controlling the 466 * delta format. */ 467 LIBRSYNC_EXPORT rs_job_t *rs_delta_begin(rs_signature_t *); 468 469 /** Read a signature from a file into an ::rs_signature structure in memory. 470 * 471 * Once there, it can be used to generate a delta to a newer version of the 472 * file. 473 * 474 * \note After loading the signatures, you must call \ref rs_build_hash_table() 475 * before you can use them. */ 476 LIBRSYNC_EXPORT rs_job_t *rs_loadsig_begin(rs_signature_t **); 477 478 /** Call this after loading a signature to index it. 479 * 480 * Use rs_free_sumset() to release it after use. */ 481 LIBRSYNC_EXPORT rs_result rs_build_hash_table(rs_signature_t *sums); 482 483 /** Callback used to retrieve parts of the basis file. 484 * 485 * \param pos Position where copying should begin. 486 * 487 * \param len On input, the amount of data that should be retrieved. Updated to 488 * show how much is actually available, but should not be greater than the 489 * input value. 490 * 491 * \param buf On input, a buffer of at least \p *len bytes. May be updated to 492 * point to a buffer allocated by the callback if it prefers. */ 493 typedef rs_result rs_copy_cb(void *opaque, rs_long_t pos, size_t *len, 494 void **buf); 495 496 /** Apply a \a delta to a \a basis file to recreate the \a new file. 497 * 498 * This gives you back a ::rs_job_t object, which can be cranked by calling 499 * rs_job_iter() and updating the stream pointers. When finished, call 500 * rs_job_free() to dispose of it. 501 * 502 * \param copy_cb Callback used to retrieve content from the basis file. 503 * 504 * \param copy_arg Opaque environment pointer passed through to the callback. 505 * 506 * \todo As output is produced, accumulate the MD4 checksum of the output. Then 507 * if we find a CHECKSUM command we can check it's contents against the output. 508 * 509 * \todo Implement COPY commands. 510 * 511 * \sa rs_patch_file() \sa \ref api_streaming */ 512 LIBRSYNC_EXPORT rs_job_t *rs_patch_begin(rs_copy_cb * copy_cb, void *copy_arg); 513 514 # ifndef RSYNC_NO_STDIO_INTERFACE 515 # include <stdio.h> 516 517 /** Open a file with special handling for stdin or stdout. 518 * 519 * This provides a platform independent way to open large binary files. A 520 * filename "" or "-" means use stdin for reading, or stdout for writing. 521 * 522 * \param filename - The filename to open. 523 * 524 * \param mode - fopen style mode string. 525 * 526 * \param force - bool to force overwriting of existing files. */ 527 LIBRSYNC_EXPORT FILE *rs_file_open(char const *filename, char const *mode, 528 int force); 529 530 /** Close a file with special handling for stdin or stdout. 531 * 532 * This will not actually close the file if it is stdin or stdout. 533 * 534 * \param file - the stdio file to close. */ 535 LIBRSYNC_EXPORT int rs_file_close(FILE *file); 536 537 /** Get the size of a file. 538 * 539 * This provides a platform independent way to get the size of large files. It 540 * will return -1 if the size cannot be determined because it is not a regular 541 * file. 542 * 543 * \param file - the stdio file to get the size of. */ 544 LIBRSYNC_EXPORT rs_long_t rs_file_size(FILE *file); 545 546 /** ::rs_copy_cb that reads from a stdio file. */ 547 LIBRSYNC_EXPORT rs_result rs_file_copy_cb(void *arg, rs_long_t pos, size_t *len, 548 void **buf); 549 550 /** Buffer sizes for file IO. 551 * 552 * The default 0 means use the recommended buffer size for the operation being 553 * performed, any other value will override the recommended sizes. You probably 554 * only need to change these in testing. */ 555 LIBRSYNC_EXPORT extern int rs_inbuflen, rs_outbuflen; 556 557 /** Generate the signature of a basis file, and write it out to another. 558 * 559 * It's recommended you use rs_sig_args() to get the recommended arguments for 560 * this based on the original file size. 561 * 562 * \param old_file Stdio readable file whose signature will be generated. 563 * 564 * \param sig_file Writable stdio file to which the signature will be written./ 565 * 566 * \param block_len Checksum block size to use (0 for "recommended"). Larger 567 * values make the signature shorter, and the delta longer. 568 * 569 * \param strong_len Strongsum length in bytes to use (0 for "maximum", -1 for 570 * "minimum"). Smaller values make the signature shorter but increase the risk 571 * of corruption from hash collisions. 572 * 573 * \param sig_magic Signature file format to generate (0 for "recommended"). 574 * See ::rs_magic_number. 575 * 576 * \param stats Optional pointer to receive statistics. 577 * 578 * \sa \ref api_whole */ 579 LIBRSYNC_EXPORT rs_result rs_sig_file(FILE *old_file, FILE *sig_file, 580 size_t block_len, size_t strong_len, 581 rs_magic_number sig_magic, 582 rs_stats_t *stats); 583 584 /** Load signatures from a signature file into memory. 585 * 586 * \param sig_file Readable stdio file from which the signature will be read. 587 * 588 * \param sumset on return points to the newly allocated structure. 589 * 590 * \param stats Optional pointer to receive statistics. 591 * 592 * \sa \ref api_whole */ 593 LIBRSYNC_EXPORT rs_result rs_loadsig_file(FILE *sig_file, 594 rs_signature_t **sumset, 595 rs_stats_t *stats); 596 597 /** Generate a delta between a signature and a new file into a delta file. 598 * 599 * \sa \ref api_whole */ 600 LIBRSYNC_EXPORT rs_result rs_delta_file(rs_signature_t *, FILE *new_file, 601 FILE *delta_file, rs_stats_t *); 602 603 /** Apply a patch, relative to a basis, into a new file. 604 * 605 * \sa \ref api_whole */ 606 LIBRSYNC_EXPORT rs_result rs_patch_file(FILE *basis_file, FILE *delta_file, 607 FILE *new_file, rs_stats_t *); 608 # endif /* !RSYNC_NO_STDIO_INTERFACE */ 609 610 # ifdef __cplusplus 611 } /* extern "C" */ 612 # endif 613 614 #endif /* !_RSYNC_H */ 615