1 /*
2   Copyright 2016-2018 Jyri J. Virkki <jyri@virkki.com>
3 
4   This file is part of dupd.
5 
6   dupd is free software: you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation, either version 3 of the License, or
9   (at your option) any later version.
10 
11   dupd is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with dupd.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <unistd.h>
26 
27 #include "dirtree.h"
28 #include "main.h"
29 #include "readlist.h"
30 #include "stats.h"
31 #include "utils.h"
32 
33 #define INITIAL_READ_LIST_SIZE 100000
34 
35 struct read_list_entry * read_list = NULL;
36 long read_list_end;
37 static long read_list_size;
38 
39 struct read_list_entry * inode_read_list = NULL;
40 static long inode_read_list_end;
41 static long inode_read_list_size;
42 
43 
44 /** ***************************************************************************
45  * Dump read list.
46  *
47  */
48                                                              // LCOV_EXCL_START
dump_read_list(int with_path_list)49 static void dump_read_list(int with_path_list)
50 {
51   printf("--- dumping read_list ---\n");
52   for (int i = 0; i < read_list_end; i++) {
53     printf("[%d] inode: %8ld  %" PRIu64 "\n",
54            i, (long)read_list[i].inode, read_list[i].block);
55     if (with_path_list) {
56       dump_path_list("---", 0, read_list[i].pathlist_head, 1);
57     }
58   }
59 }
60                                                              // LCOV_EXCL_STOP
61 
62 
63 /** ***************************************************************************
64  * Sort function used by sort_read_list() when sorting by inode.
65  *
66  */
rl_compare_i(const void * a,const void * b)67 static int rl_compare_i(const void * a, const void * b)
68 {
69   struct read_list_entry * f = (struct read_list_entry *)a;
70   struct read_list_entry * s = (struct read_list_entry *)b;
71 
72   return f->inode - s->inode;
73 }
74 
75 
76 /** ***************************************************************************
77  * Sort function used by sort_read_list() when sorting by block.
78  *
79  */
rl_compare_b(const void * a,const void * b)80 static int rl_compare_b(const void * a, const void * b)
81 {
82   struct read_list_entry * f = (struct read_list_entry *)a;
83   struct read_list_entry * s = (struct read_list_entry *)b;
84 
85   if (f->block > s->block) { return 1; }
86   if (f->block < s->block) { return -1; }
87   return 0;
88 }
89 
90 
91 /** ***************************************************************************
92  * Public function, see readlist.h
93  *
94  */
init_read_list()95 void init_read_list()
96 {
97   read_list_size = INITIAL_READ_LIST_SIZE;
98 
99   if (x_small_buffers) { read_list_size = 8; }
100 
101   read_list_end = 0;
102 
103   read_list = (struct read_list_entry *)calloc(read_list_size,
104                                                sizeof(struct read_list_entry));
105 
106   if (hardlink_is_unique) {
107     inode_read_list_size = INITIAL_READ_LIST_SIZE;
108     if (x_small_buffers) { inode_read_list_size = 8; }
109     inode_read_list_end = 0;
110     inode_read_list =
111       (struct read_list_entry *)calloc(inode_read_list_size,
112                                        sizeof(struct read_list_entry));
113   }
114 }
115 
116 
117 /** ***************************************************************************
118  * Public function, see readlist.h
119  *
120  */
free_read_list()121 void free_read_list()
122 {
123   if (read_list != NULL) {
124     free(read_list);
125     read_list = NULL;
126     read_list_size = 0;
127     read_list_end = 0;
128   }
129 }
130 
131 
132 /** ***************************************************************************
133  * Free the inode_read_list.
134  *
135  */
free_inode_read_list()136 static void free_inode_read_list()
137 {
138   if (inode_read_list != NULL) {
139     free(inode_read_list);
140     inode_read_list = NULL;
141     inode_read_list_size = 0;
142     inode_read_list_end = 0;
143   }
144 }
145 
146 
147 /** ***************************************************************************
148  * Add a single entry to the read list.
149  *
150  */
add_one_to_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode,uint64_t block)151 static void add_one_to_read_list(struct path_list_head * head,
152                                  struct path_list_entry * entry,
153                                  ino_t inode, uint64_t block)
154 {
155   read_list[read_list_end].pathlist_head = head;
156   read_list[read_list_end].pathlist_self = entry;
157   read_list[read_list_end].block = block;
158   read_list[read_list_end].inode = inode;
159   read_list[read_list_end].done = 0;
160 
161   read_list_end++;
162   if (read_list_end == read_list_size) {
163     read_list_size *= 2;
164     read_list = (struct read_list_entry *)
165       realloc(read_list, sizeof(struct read_list_entry) * read_list_size);
166   }
167 }
168 
169 
170 /** ***************************************************************************
171  * Add a single entry to the inode read list.
172  *
173  */
add_one_to_inode_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode)174 static void add_one_to_inode_read_list(struct path_list_head * head,
175                                        struct path_list_entry * entry,
176                                        ino_t inode)
177 {
178   if (hardlink_is_unique) {
179     inode_read_list[inode_read_list_end].pathlist_head = head;
180     inode_read_list[inode_read_list_end].pathlist_self = entry;
181     inode_read_list[inode_read_list_end].block = inode;
182     inode_read_list[inode_read_list_end].inode = inode;
183     inode_read_list[inode_read_list_end].done = 0;
184 
185     inode_read_list_end++;
186     if (inode_read_list_end == inode_read_list_size) {
187       inode_read_list_size *= 2;
188       inode_read_list = (struct read_list_entry *)
189         realloc(inode_read_list, sizeof(struct read_list_entry) *
190                 inode_read_list_size);
191     }
192   }
193 }
194 
195 
196 /** ***************************************************************************
197  * Public function, see readlist.h
198  *
199  */
add_to_read_list(struct path_list_head * head,struct path_list_entry * entry,ino_t inode)200 void add_to_read_list(struct path_list_head * head,
201                       struct path_list_entry * entry, ino_t inode)
202 {
203   if (entry->blocks == NULL) {                               // LCOV_EXCL_START
204     printf("error: add_to_read_list but no block(s)\n");
205     dump_path_list("block missing", 0, head, 1);
206     exit(1);
207   }                                                          // LCOV_EXCL_STOP
208 
209   // If using_fiemap, may add multiple entries for the same file, one for
210   // each block. If not, there is only one "block", the inode.
211 
212   for (int b = 0; b < entry->blocks->count; b++) {
213     add_one_to_read_list(head, entry, inode, entry->blocks->entry[b].block);
214   }
215 
216   // When hardlink_is_unique, keep a separate inode_read_list which can
217   // only have one entry per file. It might seem we can filter out
218   // hardlinks by looking at blocks and that is usually true. However
219   // (see get_block_info_from_path()) sometimes block can be zero even
220   // if it isn't, so this may fail. Easiest reliable way is to keep this
221   // separate inode list.
222   if (hardlink_is_unique) {
223     add_one_to_inode_read_list(head, entry, inode);
224   }
225 }
226 
227 
228 /** ***************************************************************************
229  * Public function, see readlist.h
230  *
231  */
sort_read_list()232 void sort_read_list()
233 {
234   switch (sort_bypass) {
235   case SORT_BY_NONE:
236     return;
237   case SORT_BY_INODE:
238     qsort(read_list, read_list_end,
239           sizeof(struct read_list_entry), rl_compare_i);
240     return;
241   case SORT_BY_BLOCK:
242     qsort(read_list, read_list_end,
243           sizeof(struct read_list_entry), rl_compare_b);
244     return;
245   }
246 
247   if (hardlink_is_unique) {
248 
249     qsort(inode_read_list, inode_read_list_end,
250           sizeof(struct read_list_entry), rl_compare_i);
251 
252     // Now that the inode_read_list is ordered, remove any paths which
253     // are duplicate inode given that we don't care about them.
254 
255     char path[DUPD_PATH_MAX];
256     long i;
257     ino_t p_inode = 0;
258 
259     for (i = 0; i < inode_read_list_end; i++) {
260 
261       if (inode_read_list[i].inode == p_inode) {
262 
263         build_path(inode_read_list[i].pathlist_self, path);
264         LOG(L_SKIPPED, "Skipping [%s] due to duplicate inode.\n", path);
265 
266         int before = inode_read_list[i].pathlist_head->list_size;
267         int after = mark_path_entry_invalid(inode_read_list[i].pathlist_head,
268                                             inode_read_list[i].pathlist_self);
269         s_files_hl_skip += (before - after);
270       }
271       p_inode = inode_read_list[i].inode;
272     }
273 
274     free_inode_read_list();
275   }
276 
277   // If we ran into a substantial number of files where the physical block(s)
278   // were reported as zero, give up on using fiemap ordering.
279 
280   if (using_fiemap) {
281     int zeropct = (100 * stats_fiemap_zero_blocks) / stats_fiemap_total_blocks;
282     if (zeropct > 5 && s_total_files_seen > 100) {
283       using_fiemap = 0;
284       LOG(L_PROGRESS, "Turning off using_fiemap, %d%% zero blocks\n", zeropct);
285     }
286   }
287 
288   if (using_fiemap) {
289     qsort(read_list, read_list_end,
290           sizeof(struct read_list_entry), rl_compare_b);
291   } else {
292     qsort(read_list, read_list_end,
293           sizeof(struct read_list_entry), rl_compare_i);
294   }
295 
296   LOG_MORE_TRACE {
297     printf("read_list after final sort\n");
298     dump_read_list(0);
299   }
300 }
301