1 /*
2 Copyright 2012-2018 Jyri J. Virkki <jyri@virkki.com>
3
4 This file is part of dupd.
5
6 dupd is free software: you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 dupd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with dupd. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <assert.h>
21 #include <fcntl.h>
22 #include <pthread.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <strings.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <unistd.h>
31
32 #include "main.h"
33 #include "stats.h"
34 #include "utils.h"
35
36 pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
37 pthread_mutex_t counters_lock = PTHREAD_MUTEX_INITIALIZER;
38
39 int stats_sets_processed[ROUNDS] = { 0,0 };
40 int stats_sets_dup_done[ROUNDS] = { 0,0 };
41 int stats_sets_dup_not[ROUNDS] = { 0,0 };
42 int stats_sets_full_read[ROUNDS] = { 0,0 };
43 int stats_sets_part_read[ROUNDS] = { 0,0 };
44 long stats_round_start[ROUNDS] = { -1,-1 };
45 int stats_round_duration[ROUNDS] = { -1,-1 };
46 int stats_duplicate_groups = 0;
47 int stats_reader_loops[ROUNDS] = { 0,0 };
48 int stats_hasher_loops[ROUNDS][MAX_HASHER_THREADS] = { {0,0}, {0,0} };
49 int stats_hasher_queue_len[MAX_HASHER_THREADS] = { 0,0 };
50
51 long stats_process_start = -1;
52 long stats_process_duration = -1;
53
54 uint64_t stats_total_bytes = 0;
55 uint64_t stats_total_bytes_read = 0;
56 uint64_t stats_total_bytes_hashed = 0;
57 uint64_t stats_comparison_bytes_read = 0;
58 uint32_t stats_max_pathlist = 0;
59 uint64_t stats_max_pathlist_size = 0;
60 uint32_t stats_path_list_entries = 0;
61 int stats_most_dups = 0;
62 int stats_duplicate_files = 0;
63
64 int stats_full_hash_first = 0;
65 int stats_full_hash_second = 0;
66 int stats_partial_hash_second = 0;
67 int stats_one_block_hash_first = 0;
68
69
70 int stats_size_list_done = 0;
71 int stats_three_file_compare = 0;
72 int stats_two_file_compare = 0;
73 int stats_uniques_saved = 0;
74 long stats_size_list_avg = 0;
75
76 int stats_files_ignored = 0;
77 int stats_files_error = 0;
78 long stats_time_scan = -1;
79 long stats_time_process = 0;
80 long stats_time_total = 0;
81 long stats_main_start = 0;
82 int path_buffer_realloc = 0;
83 int stats_hashlist_path_realloc = 0;
84 int stats_hash_list_len_inc = 0;
85 int scan_list_usage_max = 0;
86 int scan_list_resizes = 0;
87 uint64_t stats_read_buffers_allocated = 0;
88 int stats_flusher_active = 0;
89 uint32_t stats_fiemap_total_blocks = 0;
90 uint32_t stats_fiemap_zero_blocks = 0;
91
92 uint32_t count_sets_first_read = 0;
93 uint32_t count_files_completed = 0;
94 uint32_t stats_sets_first_read_completed = 0;
95
96
97
98 // Keep from here after revamp
99 uint32_t s_stats_size_list_count = 0; // Total size sets processed
100
101 uint32_t s_total_files_seen = 0; // All file entries seen during scan
102 uint32_t s_files_skip_error = 0; // Files skipped due to error
103 uint32_t s_files_skip_notfile = 0; // Files skipped, not a file
104 uint32_t s_files_skip_badsep = 0; // Files skipped, separator conflict
105 uint32_t s_files_cant_read = 0; // Files skipped, can't read
106 uint32_t s_files_hl_skip = 0; // Files skipped, hardlink-is-unique
107 uint32_t s_files_too_small = 0; // Files skipped, too small
108 uint32_t s_files_in_sizetree = 0; // Files added to size tree
109 uint32_t s_files_processed = 0; // Files entered to path list
110 uint32_t s_files_completed_dups = 0; // Files processed, found to be dups
111 uint32_t s_files_completed_unique = 0; // Files processed, found to be unique
112
113 int current_open_files = 0;
114
115
116 /** ***************************************************************************
117 * Public function, see header file.
118 *
119 */
report_stats()120 void report_stats()
121 {
122 LOG_BASE {
123 printf("\n");
124 char timebuf[20];
125 time_string(timebuf, 20, get_current_time_millis() - stats_main_start);
126 printf("Total duplicates: %d files in %d groups in %s\n",
127 s_files_completed_dups, stats_duplicate_groups, timebuf);
128 if (write_db && stats_duplicate_files > 0) {
129 printf("Run 'dupd report' to list duplicates.\n");
130 }
131 }
132
133 uint32_t files_accepted = s_total_files_seen - s_files_too_small -
134 s_files_skip_notfile - s_files_skip_error - s_files_skip_badsep -
135 s_files_hl_skip;
136 uint32_t unique_files = s_files_in_sizetree - s_files_processed;
137
138 LOG_MORE {
139 printf("\n");
140 printf("Total files seen: %" PRIu32 "\n", s_total_files_seen);
141 printf(" (too small: %" PRIu32 ", not file: %"
142 PRIu32 ", errors: %" PRIu32 ", skip: %" PRIu32 ", hl_skip: %"
143 PRIu32 ")\n",
144 s_files_too_small, s_files_skip_notfile,
145 s_files_skip_error, s_files_skip_badsep, s_files_hl_skip);
146
147 printf("Files queued for processing: %" PRIu32 " in %" PRIu32 " sets\n",
148 files_accepted, s_stats_size_list_count);
149
150
151 printf(" (files with unique size: %" PRIu32 ")\n", unique_files);
152 printf("Total files to process: %" PRIu32 "\n", s_files_processed);
153 printf(" Duplicate files: %" PRIu32 "\n", s_files_completed_dups);
154 printf(" Unique files: %" PRIu32 "\n", s_files_completed_unique);
155 printf(" Unable to read: %" PRIu32 "\n", s_files_cant_read);
156 if (hardlink_is_unique) {
157 printf(" Skipped hardlinks: %" PRIu32 "\n", s_files_hl_skip);
158 }
159 }
160
161 if (files_accepted != s_files_in_sizetree - s_files_hl_skip) {
162 printf("error: mismatch files_accepted: %" PRIu32
163 " != files in sizetree: %" PRIu32 "\n",
164 files_accepted, s_files_in_sizetree - s_files_hl_skip);
165 exit(1);
166 }
167 }
168
169
170 /** ***************************************************************************
171 * Public function, see header file.
172 *
173 */
save_stats()174 void save_stats()
175 {
176 FILE * fp = fopen(stats_file, "a");
177 // TODO needs cleaning up
178 fprintf(fp, "using_fiemap %d\n", using_fiemap);
179 fprintf(fp, "fiemap_total_blocks %" PRIu32 "\n", stats_fiemap_total_blocks);
180 fprintf(fp, "fiemap_zero_blocks %" PRIu32 "\n", stats_fiemap_zero_blocks);
181 fprintf(fp, "duplicate_files %" PRIu32 "\n", s_files_completed_dups);
182 fprintf(fp, "duplicate_groups %" PRIu32 "\n", stats_duplicate_groups);
183
184 fprintf(fp, "\n");
185 fclose(fp);
186 }
187
188
189 /** ***************************************************************************
190 * Public function, see header file.
191 *
192 */
inc_stats_read_buffers_allocated(int bytes)193 void inc_stats_read_buffers_allocated(int bytes)
194 {
195 d_mutex_lock(&stats_lock, "increasing buffers");
196 stats_read_buffers_allocated += bytes;
197 d_mutex_unlock(&stats_lock);
198 }
199
200
201 /** ***************************************************************************
202 * Public function, see header file.
203 *
204 */
dec_stats_read_buffers_allocated(int bytes)205 void dec_stats_read_buffers_allocated(int bytes)
206 {
207 d_mutex_lock(&stats_lock, "decreasing buffers");
208 stats_read_buffers_allocated -= bytes;
209 d_mutex_unlock(&stats_lock);
210 }
211
212
213 /** ***************************************************************************
214 * Public function, see header file.
215 *
216 */
increase_unique_counter(int n)217 void increase_unique_counter(int n)
218 {
219 d_mutex_lock(&counters_lock, "counters");
220 s_files_completed_unique += n;
221 d_mutex_unlock(&counters_lock);
222 }
223
224
225 /** ***************************************************************************
226 * Public function, see header file.
227 *
228 */
increase_dup_counter(int n)229 void increase_dup_counter(int n)
230 {
231 d_mutex_lock(&counters_lock, "counters");
232 s_files_completed_dups += n;
233 d_mutex_unlock(&counters_lock);
234 }
235
236
237 /** ***************************************************************************
238 * Public function, see header file.
239 *
240 */
increase_sets_first_read()241 void increase_sets_first_read()
242 {
243 d_mutex_lock(&counters_lock, "counters");
244 count_sets_first_read++;
245 d_mutex_unlock(&counters_lock);
246 }
247
248
249 /** ***************************************************************************
250 * Public function, see header file.
251 *
252 */
increase_sets_first_read_completed()253 void increase_sets_first_read_completed()
254 {
255 d_mutex_lock(&counters_lock, "counters");
256 stats_sets_first_read_completed++;
257 d_mutex_unlock(&counters_lock);
258 }
259
260
261 /** ***************************************************************************
262 * Public function, see header file.
263 *
264 */
update_open_files(int n)265 void update_open_files(int n)
266 {
267 d_mutex_lock(&counters_lock, "counters");
268 current_open_files += n;
269 d_mutex_unlock(&counters_lock);
270 }
271