1 /*
2 Copyright 2016-2018 Jyri J. Virkki <jyri@virkki.com>
3
4 This file is part of dupd.
5
6 dupd is free software: you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 dupd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with dupd. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26
27 #include "dirtree.h"
28 #include "main.h"
29 #include "readlist.h"
30 #include "stats.h"
31 #include "utils.h"
32
33 #define INITIAL_READ_LIST_SIZE 100000
34
35 struct read_list_entry * read_list = NULL;
36 long read_list_end;
37 static long read_list_size;
38
39 struct read_list_entry * inode_read_list = NULL;
40 static long inode_read_list_end;
41 static long inode_read_list_size;
42
43
44 /** ***************************************************************************
45 * Dump read list.
46 *
47 */
48 // LCOV_EXCL_START
dump_read_list(int with_path_list)49 static void dump_read_list(int with_path_list)
50 {
51 printf("--- dumping read_list ---\n");
52 for (int i = 0; i < read_list_end; i++) {
53 printf("[%d] inode: %8ld %" PRIu64 "\n",
54 i, (long)read_list[i].inode, read_list[i].block);
55 if (with_path_list) {
56 dump_path_list("---", 0, read_list[i].pathlist_head, 1);
57 }
58 }
59 }
60 // LCOV_EXCL_STOP
61
62
63 /** ***************************************************************************
64 * Sort function used by sort_read_list() when sorting by inode.
65 *
66 */
rl_compare_i(const void * a,const void * b)67 static int rl_compare_i(const void * a, const void * b)
68 {
69 struct read_list_entry * f = (struct read_list_entry *)a;
70 struct read_list_entry * s = (struct read_list_entry *)b;
71
72 return f->inode - s->inode;
73 }
74
75
76 /** ***************************************************************************
77 * Sort function used by sort_read_list() when sorting by block.
78 *
79 */
rl_compare_b(const void * a,const void * b)80 static int rl_compare_b(const void * a, const void * b)
81 {
82 struct read_list_entry * f = (struct read_list_entry *)a;
83 struct read_list_entry * s = (struct read_list_entry *)b;
84
85 if (f->block > s->block) { return 1; }
86 if (f->block < s->block) { return -1; }
87 return 0;
88 }
89
90
91 /** ***************************************************************************
92 * Public function, see readlist.h
93 *
94 */
init_read_list()95 void init_read_list()
96 {
97 read_list_size = INITIAL_READ_LIST_SIZE;
98
99 if (x_small_buffers) { read_list_size = 8; }
100
101 read_list_end = 0;
102
103 read_list = (struct read_list_entry *)calloc(read_list_size,
104 sizeof(struct read_list_entry));
105
106 if (hardlink_is_unique) {
107 inode_read_list_size = INITIAL_READ_LIST_SIZE;
108 if (x_small_buffers) { inode_read_list_size = 8; }
109 inode_read_list_end = 0;
110 inode_read_list =
111 (struct read_list_entry *)calloc(inode_read_list_size,
112 sizeof(struct read_list_entry));
113 }
114 }
115
116
117 /** ***************************************************************************
118 * Public function, see readlist.h
119 *
120 */
free_read_list()121 void free_read_list()
122 {
123 if (read_list != NULL) {
124 free(read_list);
125 read_list = NULL;
126 read_list_size = 0;
127 read_list_end = 0;
128 }
129 }
130
131
132 /** ***************************************************************************
133 * Free the inode_read_list.
134 *
135 */
free_inode_read_list()136 static void free_inode_read_list()
137 {
138 if (inode_read_list != NULL) {
139 free(inode_read_list);
140 inode_read_list = NULL;
141 inode_read_list_size = 0;
142 inode_read_list_end = 0;
143 }
144 }
145
146
147 /** ***************************************************************************
148 * Add a single entry to the read list.
149 *
150 */
add_one_to_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode,uint64_t block)151 static void add_one_to_read_list(struct path_list_head * head,
152 struct path_list_entry * entry,
153 ino_t inode, uint64_t block)
154 {
155 read_list[read_list_end].pathlist_head = head;
156 read_list[read_list_end].pathlist_self = entry;
157 read_list[read_list_end].block = block;
158 read_list[read_list_end].inode = inode;
159 read_list[read_list_end].done = 0;
160
161 read_list_end++;
162 if (read_list_end == read_list_size) {
163 read_list_size *= 2;
164 read_list = (struct read_list_entry *)
165 realloc(read_list, sizeof(struct read_list_entry) * read_list_size);
166 }
167 }
168
169
170 /** ***************************************************************************
171 * Add a single entry to the inode read list.
172 *
173 */
add_one_to_inode_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode)174 static void add_one_to_inode_read_list(struct path_list_head * head,
175 struct path_list_entry * entry,
176 ino_t inode)
177 {
178 if (hardlink_is_unique) {
179 inode_read_list[inode_read_list_end].pathlist_head = head;
180 inode_read_list[inode_read_list_end].pathlist_self = entry;
181 inode_read_list[inode_read_list_end].block = inode;
182 inode_read_list[inode_read_list_end].inode = inode;
183 inode_read_list[inode_read_list_end].done = 0;
184
185 inode_read_list_end++;
186 if (inode_read_list_end == inode_read_list_size) {
187 inode_read_list_size *= 2;
188 inode_read_list = (struct read_list_entry *)
189 realloc(inode_read_list, sizeof(struct read_list_entry) *
190 inode_read_list_size);
191 }
192 }
193 }
194
195
196 /** ***************************************************************************
197 * Public function, see readlist.h
198 *
199 */
add_to_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode)200 void add_to_read_list(struct path_list_head * head,
201 struct path_list_entry * entry, ino_t inode)
202 {
203 if (entry->blocks == NULL) { // LCOV_EXCL_START
204 printf("error: add_to_read_list but no block(s)\n");
205 dump_path_list("block missing", 0, head, 1);
206 exit(1);
207 } // LCOV_EXCL_STOP
208
209 // If using_fiemap, may add multiple entries for the same file, one for
210 // each block. If not, there is only one "block", the inode.
211
212 for (int b = 0; b < entry->blocks->count; b++) {
213 add_one_to_read_list(head, entry, inode, entry->blocks->entry[b].block);
214 }
215
216 // When hardlink_is_unique, keep a separate inode_read_list which can
217 // only have one entry per file. It might seem we can filter out
218 // hardlinks by looking at blocks and that is usually true. However
219 // (see get_block_info_from_path()) sometimes block can be zero even
220 // if it isn't, so this may fail. Easiest reliable way is to keep this
221 // separate inode list.
222 if (hardlink_is_unique) {
223 add_one_to_inode_read_list(head, entry, inode);
224 }
225 }
226
227
228 /** ***************************************************************************
229 * Public function, see readlist.h
230 *
231 */
sort_read_list()232 void sort_read_list()
233 {
234 switch (sort_bypass) {
235 case SORT_BY_NONE:
236 return;
237 case SORT_BY_INODE:
238 qsort(read_list, read_list_end,
239 sizeof(struct read_list_entry), rl_compare_i);
240 return;
241 case SORT_BY_BLOCK:
242 qsort(read_list, read_list_end,
243 sizeof(struct read_list_entry), rl_compare_b);
244 return;
245 }
246
247 if (hardlink_is_unique) {
248
249 qsort(inode_read_list, inode_read_list_end,
250 sizeof(struct read_list_entry), rl_compare_i);
251
252 // Now that the inode_read_list is ordered, remove any paths which
253 // are duplicate inode given that we don't care about them.
254
255 char path[DUPD_PATH_MAX];
256 long i;
257 ino_t p_inode = 0;
258
259 for (i = 0; i < inode_read_list_end; i++) {
260
261 if (inode_read_list[i].inode == p_inode) {
262
263 build_path(inode_read_list[i].pathlist_self, path);
264 LOG(L_SKIPPED, "Skipping [%s] due to duplicate inode.\n", path);
265
266 int before = inode_read_list[i].pathlist_head->list_size;
267 int after = mark_path_entry_invalid(inode_read_list[i].pathlist_head,
268 inode_read_list[i].pathlist_self);
269 s_files_hl_skip += (before - after);
270 }
271 p_inode = inode_read_list[i].inode;
272 }
273
274 free_inode_read_list();
275 }
276
277 // If we ran into a substantial number of files where the physical block(s)
278 // were reported as zero, give up on using fiemap ordering.
279
280 if (using_fiemap) {
281 int zeropct = (100 * stats_fiemap_zero_blocks) / stats_fiemap_total_blocks;
282 if (zeropct > 5 && s_total_files_seen > 100) {
283 using_fiemap = 0;
284 LOG(L_PROGRESS, "Turning off using_fiemap, %d%% zero blocks\n", zeropct);
285 }
286 }
287
288 if (using_fiemap) {
289 qsort(read_list, read_list_end,
290 sizeof(struct read_list_entry), rl_compare_b);
291 } else {
292 qsort(read_list, read_list_end,
293 sizeof(struct read_list_entry), rl_compare_i);
294 }
295
296 LOG_MORE_TRACE {
297 printf("read_list after final sort\n");
298 dump_read_list(0);
299 }
300 }
301