1 /* vifm
2  * Copyright (C) 2016 xaizek.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 #include "compare.h"
20 
21 #include <assert.h> /* assert() */
22 #include <stddef.h> /* size_t */
23 #include <stdint.h> /* INTPTR_MAX INT64_MAX */
24 #include <stdio.h> /* FILE fclose() feof() fopen() fread() */
25 #include <stdlib.h> /* free() malloc() */
26 #include <string.h> /* memcmp() */
27 
28 #include "compat/fs_limits.h"
29 #include "compat/os.h"
30 #include "compat/reallocarray.h"
31 #include "modes/dialogs/msg_dialog.h"
32 #include "ui/cancellation.h"
33 #include "ui/statusbar.h"
34 #include "ui/ui.h"
35 #include "utils/dynarray.h"
36 #include "utils/fs.h"
37 #include "utils/fsdata.h"
38 #include "utils/macros.h"
39 #include "utils/path.h"
40 #include "utils/str.h"
41 #include "utils/string_array.h"
42 #include "utils/trie.h"
43 #include "utils/utils.h"
44 #include "filelist.h"
45 #include "fops_cpmv.h"
46 #include "fops_misc.h"
47 #include "running.h"
48 
49 /* This is the only unit that uses xxhash, so import it directly here. */
50 #define XXH_PRIVATE_API
51 #include "utils/xxhash.h"
52 
53 /* Amount of data to read at once. */
54 #define BLOCK_SIZE (32*1024)
55 
56 /* Amount of data to hash for coarse comparison. */
57 #define PREFIX_SIZE (4*1024)
58 
59 /* Entry in singly-bounded list of files that have matched fingerprints. */
60 typedef struct compare_record_t
61 {
62 	char *path;                    /* Full path to file with sample content. */
63 	int id;                        /* Chosen id. */
64 	struct compare_record_t *next; /* Next entry in the list. */
65 }
66 compare_record_t;
67 
68 static void make_unique_lists(entries_t curr, entries_t other);
69 static void leave_only_dups(entries_t *curr, entries_t *other);
70 static int is_not_duplicate(view_t *view, const dir_entry_t *entry, void *arg);
71 static void fill_side_by_side(entries_t curr, entries_t other, int group_paths);
72 static int id_sorter(const void *first, const void *second);
73 static void put_or_free(view_t *view, dir_entry_t *entry, int id, int take);
74 static entries_t make_diff_list(trie_t *trie, view_t *view, int *next_id,
75 		CompareType ct, int skip_empty, int dups_only);
76 static void list_view_entries(const view_t *view, strlist_t *list);
77 static int append_valid_nodes(const char name[], int valid,
78 		const void *parent_data, void *data, void *arg);
79 static void list_files_recursively(const char path[], int skip_dot_files,
80 		strlist_t *list);
81 static char * get_file_fingerprint(const char path[], const dir_entry_t *entry,
82 		CompareType ct);
83 static char * get_contents_fingerprint(const char path[],
84 		const dir_entry_t *entry);
85 static int get_file_id(trie_t *trie, const char path[],
86 		const char fingerprint[], int *id, CompareType ct);
87 static int files_are_identical(const char a[], const char b[]);
88 static void put_file_id(trie_t *trie, const char path[],
89 		const char fingerprint[], int id, CompareType ct);
90 static void free_compare_records(void *ptr);
91 
92 int
compare_two_panes(CompareType ct,ListType lt,int group_paths,int skip_empty)93 compare_two_panes(CompareType ct, ListType lt, int group_paths, int skip_empty)
94 {
95 	/* We don't compare lists of files, so skip the check if at least one of the
96 	 * views is a custom one. */
97 	if(!flist_custom_active(&lwin) && !flist_custom_active(&rwin) &&
98 			paths_are_same(flist_get_dir(&lwin), flist_get_dir(&rwin)))
99 	{
100 		ui_sb_err("Both views are at the same location");
101 		return 1;
102 	}
103 
104 	int next_id = 1;
105 	entries_t curr, other;
106 
107 	trie_t *const trie = trie_create();
108 	ui_cancellation_push_on();
109 
110 	curr = make_diff_list(trie, curr_view, &next_id, ct, skip_empty, 0);
111 	other = make_diff_list(trie, other_view, &next_id, ct, skip_empty,
112 			lt == LT_DUPS);
113 
114 	ui_cancellation_pop();
115 	trie_free_with_data(trie, &free_compare_records);
116 
117 	/* Clear progress message displayed by make_diff_list(). */
118 	ui_sb_quick_msg_clear();
119 
120 	if(ui_cancellation_requested())
121 	{
122 		free_dir_entries(curr_view, &curr.entries, &curr.nentries);
123 		free_dir_entries(other_view, &other.entries, &other.nentries);
124 		ui_sb_msg("Comparison has been cancelled");
125 		return 1;
126 	}
127 
128 	if(!group_paths || lt != LT_ALL)
129 	{
130 		/* Sort both lists according to unique file numbers to group identical files
131 		 * (sorting is stable, tags are set in make_diff_list()). */
132 		safe_qsort(curr.entries, curr.nentries, sizeof(*curr.entries), &id_sorter);
133 		safe_qsort(other.entries, other.nentries, sizeof(*other.entries),
134 				&id_sorter);
135 	}
136 
137 	if(lt == LT_UNIQUE)
138 	{
139 		make_unique_lists(curr, other);
140 		return 0;
141 	}
142 
143 	if(lt == LT_DUPS)
144 	{
145 		leave_only_dups(&curr, &other);
146 	}
147 
148 	flist_custom_start(curr_view, lt == LT_ALL ? "diff" : "dups diff");
149 	flist_custom_start(other_view, lt == LT_ALL ? "diff" : "dups diff");
150 
151 	fill_side_by_side(curr, other, group_paths);
152 
153 	if(flist_custom_finish(curr_view, CV_DIFF, 0) != 0)
154 	{
155 		show_error_msg("Comparison", "No results to display");
156 		return 0;
157 	}
158 	if(flist_custom_finish(other_view, CV_DIFF, 0) != 0)
159 	{
160 		assert(0 && "The error shouldn't be happening here.");
161 	}
162 
163 	curr_view->list_pos = 0;
164 	other_view->list_pos = 0;
165 	curr_view->custom.diff_cmp_type = ct;
166 	other_view->custom.diff_cmp_type = ct;
167 	curr_view->custom.diff_path_group = group_paths;
168 	other_view->custom.diff_path_group = group_paths;
169 
170 	assert(curr_view->list_rows == other_view->list_rows &&
171 			"Diff views must be in sync!");
172 
173 	ui_view_schedule_redraw(curr_view);
174 	ui_view_schedule_redraw(other_view);
175 	return 0;
176 }
177 
178 /* Composes two views containing only files that are unique to each of them.
179  * Assumes that both lists are sorted by id. */
180 static void
make_unique_lists(entries_t curr,entries_t other)181 make_unique_lists(entries_t curr, entries_t other)
182 {
183 	int i, j = 0;
184 
185 	flist_custom_start(curr_view, "unique");
186 	flist_custom_start(other_view, "unique");
187 
188 	for(i = 0; i < other.nentries; ++i)
189 	{
190 		const int id = other.entries[i].id;
191 
192 		while(j < curr.nentries && curr.entries[j].id < id)
193 		{
194 			flist_custom_put(curr_view, &curr.entries[j]);
195 			++j;
196 		}
197 
198 		if(j >= curr.nentries || curr.entries[j].id != id)
199 		{
200 			flist_custom_put(other_view, &other.entries[i]);
201 			continue;
202 		}
203 
204 		while(j < curr.nentries && curr.entries[j].id == id)
205 		{
206 			fentry_free(curr_view, &curr.entries[j++]);
207 		}
208 		while(i < other.nentries && other.entries[i].id == id)
209 		{
210 			fentry_free(other_view, &other.entries[i++]);
211 		}
212 		--i;
213 	}
214 
215 	/* Entries' data has been moved out of them or freed, so need to free only the
216 	 * lists. */
217 	dynarray_free(curr.entries);
218 	dynarray_free(other.entries);
219 
220 	(void)flist_custom_finish(curr_view, CV_REGULAR, 1);
221 	(void)flist_custom_finish(other_view, CV_REGULAR, 1);
222 
223 	curr_view->list_pos = 0;
224 	other_view->list_pos = 0;
225 
226 	ui_view_schedule_redraw(curr_view);
227 	ui_view_schedule_redraw(other_view);
228 }
229 
230 /* Synchronizes two lists of entries so that they contain only items that
231  * present in both of the lists.  Assumes that both lists are sorted by id. */
232 static void
leave_only_dups(entries_t * curr,entries_t * other)233 leave_only_dups(entries_t *curr, entries_t *other)
234 {
235 	int new_id = 0;
236 	int i = 0, j = 0;
237 
238 	/* Skip leading unmatched files. */
239 	while(i < other->nentries && other->entries[i].id == -1)
240 	{
241 		++i;
242 	}
243 
244 	/* Process sequences of files in two lists. */
245 	while(i < other->nentries)
246 	{
247 		const int id = other->entries[i].id;
248 
249 		/* Skip sequence of unmatched files. */
250 		while(j < curr->nentries && curr->entries[j].id < id)
251 		{
252 			curr->entries[j++].id = -1;
253 		}
254 
255 		if(j < curr->nentries && curr->entries[j].id == id)
256 		{
257 			/* Allocate next id when we find a matching pair of files in both
258 			 * lists. */
259 			++new_id;
260 			/* Update sequence of identical ids for other list. */
261 			do
262 			{
263 				other->entries[i++].id = new_id;
264 			}
265 			while(i < other->nentries && other->entries[i].id == id);
266 
267 			/* Update sequence of identical ids for current list. */
268 			do
269 			{
270 				curr->entries[j++].id = new_id;
271 			}
272 			while(j < curr->nentries && curr->entries[j].id == id);
273 		}
274 	}
275 
276 	/* Exclude unprocessed files in the tail. */
277 	while(j < curr->nentries)
278 	{
279 		curr->entries[j++].id = -1;
280 	}
281 
282 	(void)zap_entries(other_view, other->entries, &other->nentries,
283 			&is_not_duplicate, NULL, 1, 0);
284 	(void)zap_entries(curr_view, curr->entries, &curr->nentries,
285 			&is_not_duplicate, NULL, 1, 0);
286 }
287 
288 /* zap_entries() filter to filter-out files marked for removal.  Returns
289  * non-zero if entry is to be kept and zero otherwise. */
290 static int
is_not_duplicate(view_t * view,const dir_entry_t * entry,void * arg)291 is_not_duplicate(view_t *view, const dir_entry_t *entry, void *arg)
292 {
293 	return entry->id != -1;
294 }
295 
296 /* Composes side-by-side comparison of files in two views. */
297 static void
fill_side_by_side(entries_t curr,entries_t other,int group_paths)298 fill_side_by_side(entries_t curr, entries_t other, int group_paths)
299 {
300 	enum { UP, LEFT, DIAG };
301 
302 	int i, j;
303 	/* Describes results of solving sub-problems. */
304 	int (*d)[other.nentries + 1] =
305 		reallocarray(NULL, curr.nentries + 1, sizeof(*d));
306 	/* Describes paths (backtracking handles ambiguity badly). */
307 	char (*p)[other.nentries + 1] =
308 		reallocarray(NULL, curr.nentries + 1, sizeof(*p));
309 
310 	for(i = 0; i <= curr.nentries; ++i)
311 	{
312 		for(j = 0; j <= other.nentries; ++j)
313 		{
314 			if(i == 0)
315 			{
316 				d[i][j] = j;
317 				p[i][j] = LEFT;
318 			}
319 			else if(j == 0)
320 			{
321 				d[i][j] = i;
322 				p[i][j] = UP;
323 			}
324 			else
325 			{
326 				const dir_entry_t *centry = &curr.entries[curr.nentries - i];
327 				const dir_entry_t *oentry = &other.entries[other.nentries - j];
328 
329 				d[i][j] = MIN(d[i - 1][j] + 1, d[i][j - 1] + 1);
330 				p[i][j] = d[i][j] == d[i - 1][j] + 1 ? UP : LEFT;
331 
332 				if((centry->id == oentry->id ||
333 							(group_paths && stroscmp(centry->name, oentry->name) == 0)) &&
334 						d[i - 1][j - 1] <= d[i][j])
335 				{
336 					d[i][j] = d[i - 1][j - 1];
337 					p[i][j] = DIAG;
338 				}
339 			}
340 		}
341 	}
342 
343 	i = curr.nentries;
344 	j = other.nentries;
345 	while(i != 0 || j != 0)
346 	{
347 		switch(p[i][j])
348 		{
349 			dir_entry_t *e;
350 
351 			case UP:
352 				e = &curr.entries[curr.nentries - 1 - --i];
353 				flist_custom_put(curr_view, e);
354 				flist_custom_add_separator(other_view, e->id);
355 				break;
356 			case LEFT:
357 				e = &other.entries[other.nentries - 1 - --j];
358 				flist_custom_put(other_view, e);
359 				flist_custom_add_separator(curr_view, e->id);
360 				break;
361 			case DIAG:
362 				flist_custom_put(curr_view, &curr.entries[curr.nentries - 1 - --i]);
363 				flist_custom_put(other_view, &other.entries[other.nentries - 1 - --j]);
364 				break;
365 		}
366 	}
367 
368 	free(d);
369 	free(p);
370 
371 	/* Entries' data has been moved out of them, so need to free only the
372 	 * lists. */
373 	dynarray_free(curr.entries);
374 	dynarray_free(other.entries);
375 }
376 
377 int
compare_one_pane(view_t * view,CompareType ct,ListType lt,int skip_empty)378 compare_one_pane(view_t *view, CompareType ct, ListType lt, int skip_empty)
379 {
380 	int i, dup_id;
381 	view_t *other = (view == curr_view) ? other_view : curr_view;
382 	const char *const title = (lt == LT_ALL)  ? "compare"
383 	                        : (lt == LT_DUPS) ? "dups" : "nondups";
384 
385 	int next_id = 1;
386 	entries_t curr;
387 
388 	trie_t *trie = trie_create();
389 	ui_cancellation_push_on();
390 
391 	curr = make_diff_list(trie, view, &next_id, ct, skip_empty, 0);
392 
393 	ui_cancellation_pop();
394 	trie_free_with_data(trie, &free_compare_records);
395 
396 	/* Clear progress message displayed by make_diff_list(). */
397 	ui_sb_quick_msg_clear();
398 
399 	if(ui_cancellation_requested())
400 	{
401 		free_dir_entries(view, &curr.entries, &curr.nentries);
402 		ui_sb_msg("Comparison has been cancelled");
403 		return 1;
404 	}
405 
406 	safe_qsort(curr.entries, curr.nentries, sizeof(*curr.entries), &id_sorter);
407 
408 	flist_custom_start(view, title);
409 
410 	dup_id = -1;
411 	next_id = 0;
412 	for(i = 0; i < curr.nentries; ++i)
413 	{
414 		dir_entry_t *entry = &curr.entries[i];
415 
416 		if(lt == LT_ALL)
417 		{
418 			flist_custom_put(view, entry);
419 			continue;
420 		}
421 
422 		if(entry->id == dup_id)
423 		{
424 			put_or_free(view, entry, next_id, lt == LT_DUPS);
425 			continue;
426 		}
427 
428 		dup_id = (i < curr.nentries - 1 && entry[0].id == entry[1].id)
429 		       ? entry->id
430 		       : -1;
431 
432 		if(entry->id == dup_id)
433 		{
434 			put_or_free(view, entry, ++next_id, lt == LT_DUPS);
435 			continue;
436 		}
437 
438 		put_or_free(view, entry, next_id, lt == LT_UNIQUE);
439 	}
440 
441 	/* Entries' data has been moved out of them or freed, so need to free only the
442 	 * list. */
443 	dynarray_free(curr.entries);
444 
445 	if(flist_custom_finish(view, lt == LT_UNIQUE ? CV_REGULAR : CV_COMPARE,
446 				0) != 0)
447 	{
448 		show_error_msg("Comparison", "No results to display");
449 		return 0;
450 	}
451 
452 	/* Leave the other pane, if it's in the CV_DIFF mode, two panes are needed for
453 	 * this. */
454 	if(other->custom.type == CV_DIFF)
455 	{
456 		rn_leave(other, 1);
457 	}
458 
459 	view->list_pos = 0;
460 	ui_view_schedule_redraw(view);
461 	return 0;
462 }
463 
464 /* qsort() comparer that stable sorts entries in ascending order.  Returns
465  * standard -1, 0, 1 for comparisons. */
466 static int
id_sorter(const void * first,const void * second)467 id_sorter(const void *first, const void *second)
468 {
469 	const dir_entry_t *a = first;
470 	const dir_entry_t *b = second;
471 	return a->id == b->id ? a->tag - b->tag : a->id - b->id;
472 }
473 
474 /* Either puts the entry into the view or frees it (depends on the take
475  * argument). */
476 static void
put_or_free(view_t * view,dir_entry_t * entry,int id,int take)477 put_or_free(view_t *view, dir_entry_t *entry, int id, int take)
478 {
479 	if(take)
480 	{
481 		entry->id = id;
482 		flist_custom_put(view, entry);
483 	}
484 	else
485 	{
486 		fentry_free(view, entry);
487 	}
488 }
489 
490 /* Makes sorted by path list of entries that.  The trie is used to keep track of
491  * identical files.  With non-zero dups_only, new files aren't added to the
492  * trie. */
493 static entries_t
make_diff_list(trie_t * trie,view_t * view,int * next_id,CompareType ct,int skip_empty,int dups_only)494 make_diff_list(trie_t *trie, view_t *view, int *next_id, CompareType ct,
495 		int skip_empty, int dups_only)
496 {
497 	int i;
498 	strlist_t files = {};
499 	entries_t r = {};
500 	int last_progress = 0;
501 
502 	show_progress("Listing...", 0);
503 	if(flist_custom_active(view) &&
504 			ONE_OF(view->custom.type, CV_REGULAR, CV_VERY))
505 	{
506 		list_view_entries(view, &files);
507 	}
508 	else
509 	{
510 		list_files_recursively(flist_get_dir(view), view->hide_dot, &files);
511 	}
512 
513 	show_progress("Querying...", 0);
514 	for(i = 0; i < files.nitems && !ui_cancellation_requested(); ++i)
515 	{
516 		int progress;
517 		int existing_id;
518 		char *fingerprint;
519 		const char *const path = files.items[i];
520 		dir_entry_t *const entry = entry_list_add(view, &r.entries, &r.nentries,
521 				path);
522 
523 		if(skip_empty && entry->size == 0)
524 		{
525 			fentry_free(view, entry);
526 			--r.nentries;
527 			continue;
528 		}
529 
530 		fingerprint = get_file_fingerprint(path, entry, ct);
531 		/* In case we couldn't obtain fingerprint (e.g., comparing by contents and
532 		 * files isn't readable), ignore the file and keep going. */
533 		if(is_null_or_empty(fingerprint))
534 		{
535 			free(fingerprint);
536 			fentry_free(view, entry);
537 			--r.nentries;
538 			continue;
539 		}
540 
541 		entry->tag = i;
542 		if(get_file_id(trie, path, fingerprint, &existing_id, ct))
543 		{
544 			entry->id = existing_id;
545 		}
546 		else if(dups_only)
547 		{
548 			entry->id = -1;
549 		}
550 		else
551 		{
552 			entry->id = *next_id;
553 			++*next_id;
554 			put_file_id(trie, path, fingerprint, entry->id, ct);
555 		}
556 
557 		free(fingerprint);
558 
559 		progress = (i*100)/files.nitems;
560 		if(progress != last_progress)
561 		{
562 			char progress_msg[128];
563 
564 			last_progress = progress;
565 			snprintf(progress_msg, sizeof(progress_msg), "Querying... %d (% 2d%%)", i,
566 					progress);
567 			show_progress(progress_msg, -1);
568 		}
569 	}
570 
571 	free_string_array(files.items, files.nitems);
572 	return r;
573 }
574 
575 /* Fills the list with entries of the view in hierarchical order (pre-order tree
576  * traversal). */
577 static void
list_view_entries(const view_t * view,strlist_t * list)578 list_view_entries(const view_t *view, strlist_t *list)
579 {
580 	int i;
581 
582 	fsdata_t *const tree = fsdata_create(0, 0);
583 
584 	for(i = 0; i < view->list_rows; ++i)
585 	{
586 		if(!fentry_is_dir(&view->dir_entry[i]))
587 		{
588 			char full_path[PATH_MAX + 1];
589 			void *data = &view->dir_entry[i];
590 			get_full_path_of(&view->dir_entry[i], sizeof(full_path), full_path);
591 			fsdata_set(tree, full_path, &data, sizeof(data));
592 		}
593 	}
594 
595 	fsdata_traverse(tree, &append_valid_nodes, list);
596 
597 	fsdata_free(tree);
598 }
599 
600 /* fsdata_traverse() callback that collects names of existing files into a
601  * list.  Should return non-zero to stop traverser. */
602 static int
append_valid_nodes(const char name[],int valid,const void * parent_data,void * data,void * arg)603 append_valid_nodes(const char name[], int valid, const void *parent_data,
604 		void *data, void *arg)
605 {
606 	strlist_t *const list = arg;
607 	dir_entry_t *const entry = *(dir_entry_t **)data;
608 
609 	if(valid)
610 	{
611 		char full_path[PATH_MAX + 1];
612 		get_full_path_of(entry, sizeof(full_path), full_path);
613 		list->nitems = add_to_string_array(&list->items, list->nitems, full_path);
614 	}
615 	return 0;
616 }
617 
618 /* Collects files under specified file system tree. */
619 static void
list_files_recursively(const char path[],int skip_dot_files,strlist_t * list)620 list_files_recursively(const char path[], int skip_dot_files, strlist_t *list)
621 {
622 	int i;
623 
624 	/* Obtain sorted list of files. */
625 	int len;
626 	char **lst = list_sorted_files(path, &len);
627 	if(len < 0)
628 	{
629 		return;
630 	}
631 
632 	/* Visit all subdirectories ignoring symbolic links to directories. */
633 	for(i = 0; i < len && !ui_cancellation_requested(); ++i)
634 	{
635 		char *full_path;
636 		if(skip_dot_files && lst[i][0] == '.')
637 		{
638 			update_string(&lst[i], NULL);
639 			continue;
640 		}
641 
642 		full_path = join_paths(path, lst[i]);
643 		if(is_dir(full_path))
644 		{
645 			if(!is_symlink(full_path))
646 			{
647 				list_files_recursively(full_path, skip_dot_files, list);
648 			}
649 			free(full_path);
650 			update_string(&lst[i], NULL);
651 		}
652 		else
653 		{
654 			free(lst[i]);
655 			lst[i] = full_path;
656 		}
657 
658 		show_progress("Listing...", 1000);
659 	}
660 
661 	/* Append files. */
662 	for(i = 0; i < len; ++i)
663 	{
664 		if(lst[i] != NULL)
665 		{
666 			list->nitems = put_into_string_array(&list->items, list->nitems, lst[i]);
667 		}
668 	}
669 
670 	free(lst);
671 }
672 
673 /* Computes fingerprint of the file specified by path and entry.  Type of the
674  * fingerprint is determined by ct parameter.  Returns newly allocated string
675  * with the fingerprint, which is empty or NULL on error. */
676 static char *
get_file_fingerprint(const char path[],const dir_entry_t * entry,CompareType ct)677 get_file_fingerprint(const char path[], const dir_entry_t *entry,
678 		CompareType ct)
679 {
680 	switch(ct)
681 	{
682 		char name[NAME_MAX + 1];
683 
684 		case CT_NAME:
685 			if(case_sensitive_paths(path))
686 			{
687 				return strdup(entry->name);
688 			}
689 			str_to_lower(entry->name, name, sizeof(name));
690 			return strdup(name);
691 		case CT_SIZE:
692 			return format_str("%" PRINTF_ULL, (unsigned long long)entry->size);
693 		case CT_CONTENTS:
694 			return get_contents_fingerprint(path, entry);
695 	}
696 	assert(0 && "Unexpected diffing type.");
697 	return strdup("");
698 }
699 
700 /* Makes fingerprint of file contents (all or part of it of fixed size).
701  * Returns the fingerprint as a string, which is empty or NULL on error. */
702 static char *
get_contents_fingerprint(const char path[],const dir_entry_t * entry)703 get_contents_fingerprint(const char path[], const dir_entry_t *entry)
704 {
705 #if INTPTR_MAX == INT64_MAX
706 #define XX_BITS 64
707 #else
708 #define XX_BITS 32
709 #endif
710 #define XX__(name, bits) XXH ## bits ## _ ## name
711 #define XX_(name, bits) XX__(name, bits)
712 #define XX(name) XX_(name, XX_BITS)
713 
714 	XX(state_t) st;
715 	char block[BLOCK_SIZE];
716 	size_t to_read = PREFIX_SIZE;
717 	FILE *in = os_fopen(path, "rb");
718 	if(in == NULL)
719 	{
720 		return strdup("");
721 	}
722 
723 	XX(reset)(&st, 0U);
724 	while(to_read != 0U)
725 	{
726 		const size_t portion = MIN(sizeof(block), to_read);
727 		const size_t nread = fread(&block, 1, portion, in);
728 		if(nread == 0U)
729 		{
730 			break;
731 		}
732 
733 		XX(update)(&st, block, nread);
734 		to_read -= nread;
735 	}
736 	fclose(in);
737 
738 	return format_str("%" PRINTF_ULL "|%" PRINTF_ULL,
739 			(unsigned long long)entry->size, (unsigned long long)XX(digest)(&st));
740 
741 #undef XX_BITS
742 #undef XX__
743 #undef XX_
744 #undef XX
745 }
746 
747 /* Retrieves file from the trie by its fingerprint.  Returns non-zero if it was
748  * in the trie and sets *id, otherwise zero is returned. */
749 static int
get_file_id(trie_t * trie,const char path[],const char fingerprint[],int * id,CompareType ct)750 get_file_id(trie_t *trie, const char path[], const char fingerprint[], int *id,
751 		CompareType ct)
752 {
753 	void *data;
754 	compare_record_t *record;
755 	if(trie_get(trie, fingerprint, &data) != 0)
756 	{
757 		return 0;
758 	}
759 	record = data;
760 
761 	/* Comparison by contents is the only one when we need to resolve fingerprint
762 	 * conflicts. */
763 	if(ct != CT_CONTENTS)
764 	{
765 		*id = record->id;
766 		return 1;
767 	}
768 
769 	/* Fingerprint does not guarantee a match, go through files and find file with
770 	 * identical content. */
771 	do
772 	{
773 		if(files_are_identical(path, record->path))
774 		{
775 			*id = record->id;
776 			return 1;
777 		}
778 		record = record->next;
779 	}
780 	while(record != NULL);
781 
782 	return 0;
783 }
784 
785 /* Checks whether two files specified by their names hold identical content.
786  * Returns non-zero if so, otherwise zero is returned. */
787 static int
files_are_identical(const char a[],const char b[])788 files_are_identical(const char a[], const char b[])
789 {
790 	char a_block[BLOCK_SIZE], b_block[BLOCK_SIZE];
791 	FILE *const a_file = fopen(a, "rb");
792 	FILE *const b_file = fopen(b, "rb");
793 
794 	if(a_file == NULL || b_file == NULL)
795 	{
796 		if(a_file != NULL)
797 		{
798 			fclose(a_file);
799 		}
800 		if(b_file != NULL)
801 		{
802 			fclose(b_file);
803 		}
804 		return 0;
805 	}
806 
807 	while(1)
808 	{
809 		const size_t a_read = fread(&a_block, 1, sizeof(a_block), a_file);
810 		const size_t b_read = fread(&b_block, 1, sizeof(b_block), b_file);
811 		if(a_read == 0U && b_read == 0U && feof(a_file) && feof(b_file))
812 		{
813 			/* Ends of both files are reached. */
814 			break;
815 		}
816 
817 		if(a_read == 0 || b_read == 0U || a_read != b_read ||
818 				memcmp(a_block, b_block, a_read) != 0)
819 		{
820 			fclose(a_file);
821 			fclose(b_file);
822 			return 0;
823 		}
824 	}
825 
826 	fclose(a_file);
827 	fclose(b_file);
828 	return 1;
829 }
830 
831 /* Stores id of a file with given fingerprint in the trie. */
832 static void
put_file_id(trie_t * trie,const char path[],const char fingerprint[],int id,CompareType ct)833 put_file_id(trie_t *trie, const char path[], const char fingerprint[], int id,
834 		CompareType ct)
835 {
836 	compare_record_t *const record = malloc(sizeof(*record));
837 	void *data = NULL;
838 	(void)trie_get(trie, fingerprint, &data);
839 
840 	record->id = id;
841 	record->next = data;
842 
843 	/* Comparison by contents is the only one when we need to resolve fingerprint
844 	 * conflicts. */
845 	record->path = (ct == CT_CONTENTS ? strdup(path) : NULL);
846 
847 	if(trie_set(trie, fingerprint, record) < 0)
848 	{
849 		free(record->path);
850 		free(record);
851 	}
852 }
853 
854 /* Frees list of compare entries.  Implements data free function for
855  * trie_free_with_data(). */
856 static void
free_compare_records(void * ptr)857 free_compare_records(void *ptr)
858 {
859 	compare_record_t *record = ptr;
860 	while(record != NULL)
861 	{
862 		compare_record_t *const current = record;
863 		record = record->next;
864 		free(current->path);
865 		free(current);
866 	}
867 }
868 
869 int
compare_move(view_t * from,view_t * to)870 compare_move(view_t *from, view_t *to)
871 {
872 	char from_path[PATH_MAX + 1], to_path[PATH_MAX + 1];
873 	char *from_fingerprint, *to_fingerprint;
874 
875 	const CompareType ct = from->custom.diff_cmp_type;
876 
877 	dir_entry_t *const curr = &from->dir_entry[from->list_pos];
878 	dir_entry_t *const other = &to->dir_entry[from->list_pos];
879 
880 	if(from->custom.type != CV_DIFF || !from->custom.diff_path_group)
881 	{
882 		ui_sb_err("Not in diff mode with path grouping");
883 		return 1;
884 	}
885 
886 	if(curr->id == other->id && !fentry_is_fake(curr) && !fentry_is_fake(other))
887 	{
888 		/* Nothing to do if files are already equal. */
889 		return 0;
890 	}
891 
892 	/* We're going at least to try to update one of views (which might refer to
893 	 * the same directory), so schedule a reload. */
894 	ui_view_schedule_reload(from);
895 	ui_view_schedule_reload(to);
896 
897 	if(fentry_is_fake(curr))
898 	{
899 		/* Just remove the other file (it can't be fake entry too). */
900 		return fops_delete_current(to, 1, 0);
901 	}
902 
903 	get_full_path_of(curr, sizeof(from_path), from_path);
904 	get_full_path_of(other, sizeof(to_path), to_path);
905 
906 	if(fentry_is_fake(other))
907 	{
908 		char to_path[PATH_MAX + 1];
909 		char canonical[PATH_MAX + 1];
910 		snprintf(to_path, sizeof(to_path), "%s/%s/%s", flist_get_dir(to),
911 				curr->origin + strlen(flist_get_dir(from)), curr->name);
912 		canonicalize_path(to_path, canonical, sizeof(canonical));
913 
914 		/* Copy current file to position of the other one using relative path with
915 		 * different base. */
916 		fops_replace(from, canonical, 0);
917 
918 		/* Update the other entry to not be fake. */
919 		remove_last_path_component(canonical);
920 		replace_string(&other->name, curr->name);
921 		replace_string(&other->origin, canonical);
922 	}
923 	else
924 	{
925 		/* Overwrite file in the other pane with corresponding file from current
926 		 * pane. */
927 		fops_replace(from, to_path, 1);
928 	}
929 
930 	/* Obtaining file fingerprint relies on size field of entries, so try to load
931 	 * it and ignore if it fails. */
932 	other->size = get_file_size(to_path);
933 
934 	/* Try to update id of the other entry by computing fingerprint of both files
935 	 * and checking if they match. */
936 
937 	from_fingerprint = get_file_fingerprint(from_path, curr, ct);
938 	to_fingerprint = get_file_fingerprint(to_path, other, ct);
939 
940 	if(!is_null_or_empty(from_fingerprint) && !is_null_or_empty(to_fingerprint))
941 	{
942 		int match = (strcmp(from_fingerprint, to_fingerprint) == 0);
943 		if(match && ct == CT_CONTENTS)
944 		{
945 			match = files_are_identical(from_path, to_path);
946 		}
947 		if(match)
948 		{
949 			other->id = curr->id;
950 		}
951 	}
952 
953 	free(from_fingerprint);
954 	free(to_fingerprint);
955 
956 	return 0;
957 }
958 
959 /* vim: set tabstop=2 softtabstop=2 shiftwidth=2 noexpandtab cinoptions-=(0 : */
960 /* vim: set cinoptions+=t0 : */
961