1 /*
2  * Routines to support checksumming of bytes.
3  *
4  * Copyright (C) 1996 Andrew Tridgell
5  * Copyright (C) 1996 Paul Mackerras
6  * Copyright (C) 2004-2020 Wayne Davison
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * In addition, as a special exception, the copyright holders give
14  * permission to dynamically link rsync with the OpenSSL and xxhash
15  * libraries when those libraries are being distributed in compliance
16  * with their license terms, and to distribute a dynamically linked
17  * combination of rsync and these libraries.  This is also considered
18  * to be covered under the GPL's System Libraries exception.
19  *
20  * This program is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License along
26  * with this program; if not, visit the http://fsf.org website.
27  */
28 
29 #include "rsync.h"
30 
31 #ifdef SUPPORT_XXHASH
32 #include <xxhash.h>
33 # if XXH_VERSION_NUMBER >= 800
34 #  define SUPPORT_XXH3 1
35 # endif
36 #endif
37 
38 extern int am_server;
39 extern int whole_file;
40 extern int checksum_seed;
41 extern int protocol_version;
42 extern int proper_seed_order;
43 extern const char *checksum_choice;
44 
45 struct name_num_obj valid_checksums = {
46 	"checksum", NULL, NULL, 0, 0, {
47 #ifdef SUPPORT_XXH3
48 		{ CSUM_XXH3_128, "xxh128", NULL },
49 		{ CSUM_XXH3_64, "xxh3", NULL },
50 #endif
51 #ifdef SUPPORT_XXHASH
52 		{ CSUM_XXH64, "xxh64", NULL },
53 		{ CSUM_XXH64, "xxhash", NULL },
54 #endif
55 		{ CSUM_MD5, "md5", NULL },
56 		{ CSUM_MD4, "md4", NULL },
57 		{ CSUM_NONE, "none", NULL },
58 		{ 0, NULL, NULL }
59 	}
60 };
61 
62 int xfersum_type = 0; /* used for the file transfer checksums */
63 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
64 
parse_csum_name(const char * name,int len)65 int parse_csum_name(const char *name, int len)
66 {
67 	struct name_num_item *nni;
68 
69 	if (len < 0 && name)
70 		len = strlen(name);
71 
72 	if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
73 		if (protocol_version >= 30)
74 			return CSUM_MD5;
75 		if (protocol_version >= 27)
76 			return CSUM_MD4_OLD;
77 		if (protocol_version >= 21)
78 			return CSUM_MD4_BUSTED;
79 		return CSUM_MD4_ARCHAIC;
80 	}
81 
82 	nni = get_nni_by_name(&valid_checksums, name, len);
83 
84 	if (!nni) {
85 		rprintf(FERROR, "unknown checksum name: %s\n", name);
86 		exit_cleanup(RERR_UNSUPPORTED);
87 	}
88 
89 	return nni->num;
90 }
91 
checksum_name(int num)92 static const char *checksum_name(int num)
93 {
94 	struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
95 
96 	return nni ? nni->name : num < CSUM_MD4 ? "md4" : "UNKNOWN";
97 }
98 
parse_checksum_choice(int final_call)99 void parse_checksum_choice(int final_call)
100 {
101 	if (valid_checksums.negotiated_name)
102 		xfersum_type = checksum_type = valid_checksums.negotiated_num;
103 	else {
104 		char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
105 		if (cp) {
106 			xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
107 			checksum_type = parse_csum_name(cp+1, -1);
108 		} else
109 			xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
110 		if (am_server && checksum_choice)
111 			validate_choice_vs_env(NSTR_CHECKSUM, xfersum_type, checksum_type);
112 	}
113 
114 	if (xfersum_type == CSUM_NONE)
115 		whole_file = 1;
116 
117 	/* Snag the checksum name for both write_batch's option output & the following debug output. */
118 	if (valid_checksums.negotiated_name)
119 		checksum_choice = valid_checksums.negotiated_name;
120 	else if (checksum_choice == NULL)
121 		checksum_choice = checksum_name(xfersum_type);
122 
123 	if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
124 		rprintf(FINFO, "%s%s checksum: %s\n",
125 			am_server ? "Server" : "Client",
126 			valid_checksums.negotiated_name ? " negotiated" : "",
127 			checksum_choice);
128 	}
129 }
130 
csum_len_for_type(int cst,BOOL flist_csum)131 int csum_len_for_type(int cst, BOOL flist_csum)
132 {
133 	switch (cst) {
134 	  case CSUM_NONE:
135 		return 1;
136 	  case CSUM_MD4_ARCHAIC:
137 		/* The oldest checksum code is rather weird: the file-list code only sent
138 		 * 2-byte checksums, but all other checksums were full MD4 length. */
139 		return flist_csum ? 2 : MD4_DIGEST_LEN;
140 	  case CSUM_MD4:
141 	  case CSUM_MD4_OLD:
142 	  case CSUM_MD4_BUSTED:
143 		return MD4_DIGEST_LEN;
144 	  case CSUM_MD5:
145 		return MD5_DIGEST_LEN;
146 	  case CSUM_XXH64:
147 	  case CSUM_XXH3_64:
148 		return 64/8;
149 	  case CSUM_XXH3_128:
150 		return 128/8;
151 	  default: /* paranoia to prevent missing case values */
152 		exit_cleanup(RERR_UNSUPPORTED);
153 	}
154 	return 0;
155 }
156 
157 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
158  * Returns 1 if the public sum order matches our internal sum order.
159  * Returns -1 if the public sum order is the reverse of our internal sum order.
160  */
canonical_checksum(int csum_type)161 int canonical_checksum(int csum_type)
162 {
163 	switch (csum_type) {
164 	  case CSUM_NONE:
165 	  case CSUM_MD4_ARCHAIC:
166 	  case CSUM_MD4_OLD:
167 	  case CSUM_MD4_BUSTED:
168 		break;
169 	  case CSUM_MD4:
170 	  case CSUM_MD5:
171 		return -1;
172 	  case CSUM_XXH64:
173 	  case CSUM_XXH3_64:
174 	  case CSUM_XXH3_128:
175 		return 1;
176 	  default: /* paranoia to prevent missing case values */
177 		exit_cleanup(RERR_UNSUPPORTED);
178 	}
179 	return 0;
180 }
181 
182 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
183 /*
184   a simple 32 bit checksum that can be updated from either end
185   (inspired by Mark Adler's Adler-32 checksum)
186   */
get_checksum1(char * buf1,int32 len)187 uint32 get_checksum1(char *buf1, int32 len)
188 {
189 	int32 i;
190 	uint32 s1, s2;
191 	schar *buf = (schar *)buf1;
192 
193 	s1 = s2 = 0;
194 	for (i = 0; i < (len-4); i+=4) {
195 		s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
196 		s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
197 	}
198 	for (; i < len; i++) {
199 		s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
200 	}
201 	return (s1 & 0xffff) + (s2 << 16);
202 }
203 #endif
204 
get_checksum2(char * buf,int32 len,char * sum)205 void get_checksum2(char *buf, int32 len, char *sum)
206 {
207 	switch (xfersum_type) {
208 #ifdef SUPPORT_XXHASH
209 	  case CSUM_XXH64:
210 		SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
211 		break;
212 #endif
213 #ifdef SUPPORT_XXH3
214 	  case CSUM_XXH3_64:
215 		SIVAL64(sum, 0, XXH3_64bits_withSeed(buf, len, checksum_seed));
216 		break;
217 	  case CSUM_XXH3_128: {
218 		XXH128_hash_t digest = XXH3_128bits_withSeed(buf, len, checksum_seed);
219 		SIVAL64(sum, 0, digest.low64);
220 		SIVAL64(sum, 8, digest.high64);
221 		break;
222 	  }
223 #endif
224 	  case CSUM_MD5: {
225 		MD5_CTX m5;
226 		uchar seedbuf[4];
227 		MD5_Init(&m5);
228 		if (proper_seed_order) {
229 			if (checksum_seed) {
230 				SIVALu(seedbuf, 0, checksum_seed);
231 				MD5_Update(&m5, seedbuf, 4);
232 			}
233 			MD5_Update(&m5, (uchar *)buf, len);
234 		} else {
235 			MD5_Update(&m5, (uchar *)buf, len);
236 			if (checksum_seed) {
237 				SIVALu(seedbuf, 0, checksum_seed);
238 				MD5_Update(&m5, seedbuf, 4);
239 			}
240 		}
241 		MD5_Final((uchar *)sum, &m5);
242 		break;
243 	  }
244 	  case CSUM_MD4:
245 #ifdef USE_OPENSSL
246 	  {
247 		MD4_CTX m4;
248 		MD4_Init(&m4);
249 		MD4_Update(&m4, (uchar *)buf, len);
250 		if (checksum_seed) {
251 			uchar seedbuf[4];
252 			SIVALu(seedbuf, 0, checksum_seed);
253 			MD4_Update(&m4, seedbuf, 4);
254 		}
255 		MD4_Final((uchar *)sum, &m4);
256 		break;
257 	  }
258 #endif
259 	  case CSUM_MD4_OLD:
260 	  case CSUM_MD4_BUSTED:
261 	  case CSUM_MD4_ARCHAIC: {
262 		md_context m;
263 		int32 i;
264 		static char *buf1;
265 		static int32 len1;
266 
267 		mdfour_begin(&m);
268 
269 		if (len > len1) {
270 			if (buf1)
271 				free(buf1);
272 			buf1 = new_array(char, len+4);
273 			len1 = len;
274 		}
275 
276 		memcpy(buf1, buf, len);
277 		if (checksum_seed) {
278 			SIVAL(buf1,len,checksum_seed);
279 			len += 4;
280 		}
281 
282 		for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
283 			mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
284 
285 		/*
286 		 * Prior to version 27 an incorrect MD4 checksum was computed
287 		 * by failing to call mdfour_tail() for block sizes that
288 		 * are multiples of 64.  This is fixed by calling mdfour_update()
289 		 * even when there are no more bytes.
290 		 */
291 		if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
292 			mdfour_update(&m, (uchar *)(buf1+i), len-i);
293 
294 		mdfour_result(&m, (uchar *)sum);
295 		break;
296 	  }
297 	  default: /* paranoia to prevent missing case values */
298 		exit_cleanup(RERR_UNSUPPORTED);
299 	}
300 }
301 
file_checksum(const char * fname,const STRUCT_STAT * st_p,char * sum)302 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
303 {
304 	struct map_struct *buf;
305 	OFF_T i, len = st_p->st_size;
306 	int32 remainder;
307 	int fd;
308 
309 	memset(sum, 0, MAX_DIGEST_LEN);
310 
311 	fd = do_open(fname, O_RDONLY, 0);
312 	if (fd == -1)
313 		return;
314 
315 	buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
316 
317 	switch (checksum_type) {
318 #ifdef SUPPORT_XXHASH
319 	  case CSUM_XXH64: {
320 		static XXH64_state_t* state = NULL;
321 		if (!state && !(state = XXH64_createState()))
322 			out_of_memory("file_checksum");
323 
324 		XXH64_reset(state, 0);
325 
326 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
327 			XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
328 
329 		remainder = (int32)(len - i);
330 		if (remainder > 0)
331 			XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
332 
333 		SIVAL64(sum, 0, XXH64_digest(state));
334 		break;
335 	  }
336 #endif
337 #ifdef SUPPORT_XXH3
338 	  case CSUM_XXH3_64: {
339 		static XXH3_state_t* state = NULL;
340 		if (!state && !(state = XXH3_createState()))
341 			out_of_memory("file_checksum");
342 
343 		XXH3_64bits_reset(state);
344 
345 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
346 			XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
347 
348 		remainder = (int32)(len - i);
349 		if (remainder > 0)
350 			XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
351 
352 		SIVAL64(sum, 0, XXH3_64bits_digest(state));
353 		break;
354 	  }
355 	  case CSUM_XXH3_128: {
356 		XXH128_hash_t digest;
357 		static XXH3_state_t* state = NULL;
358 		if (!state && !(state = XXH3_createState()))
359 			out_of_memory("file_checksum");
360 
361 		XXH3_128bits_reset(state);
362 
363 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
364 			XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
365 
366 		remainder = (int32)(len - i);
367 		if (remainder > 0)
368 			XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
369 
370 		digest = XXH3_128bits_digest(state);
371 		SIVAL64(sum, 0, digest.low64);
372 		SIVAL64(sum, 8, digest.high64);
373 		break;
374 	  }
375 #endif
376 	  case CSUM_MD5: {
377 		MD5_CTX m5;
378 
379 		MD5_Init(&m5);
380 
381 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
382 			MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
383 
384 		remainder = (int32)(len - i);
385 		if (remainder > 0)
386 			MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
387 
388 		MD5_Final((uchar *)sum, &m5);
389 		break;
390 	  }
391 	  case CSUM_MD4:
392 #ifdef USE_OPENSSL
393 	  {
394 		MD4_CTX m4;
395 
396 		MD4_Init(&m4);
397 
398 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
399 			MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
400 
401 		remainder = (int32)(len - i);
402 		if (remainder > 0)
403 			MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
404 
405 		MD4_Final((uchar *)sum, &m4);
406 		break;
407 	  }
408 #endif
409 	  case CSUM_MD4_OLD:
410 	  case CSUM_MD4_BUSTED:
411 	  case CSUM_MD4_ARCHAIC: {
412 		md_context m;
413 
414 		mdfour_begin(&m);
415 
416 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
417 			mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
418 
419 		/* Prior to version 27 an incorrect MD4 checksum was computed
420 		 * by failing to call mdfour_tail() for block sizes that
421 		 * are multiples of 64.  This is fixed by calling mdfour_update()
422 		 * even when there are no more bytes. */
423 		remainder = (int32)(len - i);
424 		if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
425 			mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
426 
427 		mdfour_result(&m, (uchar *)sum);
428 		break;
429 	  }
430 	  default:
431 		rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
432 			checksum_name(checksum_type), checksum_type);
433 		exit_cleanup(RERR_UNSUPPORTED);
434 	}
435 
436 	close(fd);
437 	unmap_file(buf);
438 }
439 
440 static int32 sumresidue;
441 static union {
442 	md_context md;
443 #ifdef USE_OPENSSL
444 	MD4_CTX m4;
445 #endif
446 	MD5_CTX m5;
447 } ctx;
448 #ifdef SUPPORT_XXHASH
449 static XXH64_state_t* xxh64_state;
450 #endif
451 #ifdef SUPPORT_XXH3
452 static XXH3_state_t* xxh3_state;
453 #endif
454 static int cursum_type;
455 
sum_init(int csum_type,int seed)456 void sum_init(int csum_type, int seed)
457 {
458 	char s[4];
459 
460 	if (csum_type < 0)
461 		csum_type = parse_csum_name(NULL, 0);
462 	cursum_type = csum_type;
463 
464 	switch (csum_type) {
465 #ifdef SUPPORT_XXHASH
466 	  case CSUM_XXH64:
467 		if (!xxh64_state && !(xxh64_state = XXH64_createState()))
468 			out_of_memory("sum_init");
469 		XXH64_reset(xxh64_state, 0);
470 		break;
471 #endif
472 #ifdef SUPPORT_XXH3
473 	  case CSUM_XXH3_64:
474 		if (!xxh3_state && !(xxh3_state = XXH3_createState()))
475 			out_of_memory("sum_init");
476 		XXH3_64bits_reset(xxh3_state);
477 		break;
478 	  case CSUM_XXH3_128:
479 		if (!xxh3_state && !(xxh3_state = XXH3_createState()))
480 			out_of_memory("sum_init");
481 		XXH3_128bits_reset(xxh3_state);
482 		break;
483 #endif
484 	  case CSUM_MD5:
485 		MD5_Init(&ctx.m5);
486 		break;
487 	  case CSUM_MD4:
488 #ifdef USE_OPENSSL
489 		MD4_Init(&ctx.m4);
490 #else
491 		mdfour_begin(&ctx.md);
492 		sumresidue = 0;
493 #endif
494 		break;
495 	  case CSUM_MD4_OLD:
496 	  case CSUM_MD4_BUSTED:
497 	  case CSUM_MD4_ARCHAIC:
498 		mdfour_begin(&ctx.md);
499 		sumresidue = 0;
500 		SIVAL(s, 0, seed);
501 		sum_update(s, 4);
502 		break;
503 	  case CSUM_NONE:
504 		break;
505 	  default: /* paranoia to prevent missing case values */
506 		exit_cleanup(RERR_UNSUPPORTED);
507 	}
508 }
509 
510 /**
511  * Feed data into an MD4 accumulator, md.  The results may be
512  * retrieved using sum_end().  md is used for different purposes at
513  * different points during execution.
514  *
515  * @todo Perhaps get rid of md and just pass in the address each time.
516  * Very slightly clearer and slower.
517  **/
sum_update(const char * p,int32 len)518 void sum_update(const char *p, int32 len)
519 {
520 	switch (cursum_type) {
521 #ifdef SUPPORT_XXHASH
522 	  case CSUM_XXH64:
523 		XXH64_update(xxh64_state, p, len);
524 		break;
525 #endif
526 #ifdef SUPPORT_XXH3
527 	  case CSUM_XXH3_64:
528 		XXH3_64bits_update(xxh3_state, p, len);
529 		break;
530 	  case CSUM_XXH3_128:
531 		XXH3_128bits_update(xxh3_state, p, len);
532 		break;
533 #endif
534 	  case CSUM_MD5:
535 		MD5_Update(&ctx.m5, (uchar *)p, len);
536 		break;
537 	  case CSUM_MD4:
538 #ifdef USE_OPENSSL
539 		MD4_Update(&ctx.m4, (uchar *)p, len);
540 		break;
541 #endif
542 	  case CSUM_MD4_OLD:
543 	  case CSUM_MD4_BUSTED:
544 	  case CSUM_MD4_ARCHAIC:
545 		if (len + sumresidue < CSUM_CHUNK) {
546 			memcpy(ctx.md.buffer + sumresidue, p, len);
547 			sumresidue += len;
548 			break;
549 		}
550 
551 		if (sumresidue) {
552 			int32 i = CSUM_CHUNK - sumresidue;
553 			memcpy(ctx.md.buffer + sumresidue, p, i);
554 			mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
555 			len -= i;
556 			p += i;
557 		}
558 
559 		while (len >= CSUM_CHUNK) {
560 			mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
561 			len -= CSUM_CHUNK;
562 			p += CSUM_CHUNK;
563 		}
564 
565 		sumresidue = len;
566 		if (sumresidue)
567 			memcpy(ctx.md.buffer, p, sumresidue);
568 		break;
569 	  case CSUM_NONE:
570 		break;
571 	  default: /* paranoia to prevent missing case values */
572 		exit_cleanup(RERR_UNSUPPORTED);
573 	}
574 }
575 
576 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
577  * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
578  * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
579  * into the "sum" buffer. */
sum_end(char * sum)580 int sum_end(char *sum)
581 {
582 	switch (cursum_type) {
583 #ifdef SUPPORT_XXHASH
584 	  case CSUM_XXH64:
585 		SIVAL64(sum, 0, XXH64_digest(xxh64_state));
586 		break;
587 #endif
588 #ifdef SUPPORT_XXH3
589 	  case CSUM_XXH3_64:
590 		SIVAL64(sum, 0, XXH3_64bits_digest(xxh3_state));
591 		break;
592 	  case CSUM_XXH3_128: {
593 		XXH128_hash_t digest = XXH3_128bits_digest(xxh3_state);
594 		SIVAL64(sum, 0, digest.low64);
595 		SIVAL64(sum, 8, digest.high64);
596 		break;
597 	  }
598 #endif
599 	  case CSUM_MD5:
600 		MD5_Final((uchar *)sum, &ctx.m5);
601 		break;
602 	  case CSUM_MD4:
603 #ifdef USE_OPENSSL
604 		MD4_Final((uchar *)sum, &ctx.m4);
605 		break;
606 #endif
607 	  case CSUM_MD4_OLD:
608 		mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
609 		mdfour_result(&ctx.md, (uchar *)sum);
610 		break;
611 	  case CSUM_MD4_BUSTED:
612 	  case CSUM_MD4_ARCHAIC:
613 		if (sumresidue)
614 			mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
615 		mdfour_result(&ctx.md, (uchar *)sum);
616 		break;
617 	  case CSUM_NONE:
618 		*sum = '\0';
619 		break;
620 	  default: /* paranoia to prevent missing case values */
621 		exit_cleanup(RERR_UNSUPPORTED);
622 	}
623 
624 	return csum_len_for_type(cursum_type, 0);
625 }
626