1 /*
2   Copyright 2012-2018 Jyri J. Virkki <jyri@virkki.com>
3 
4   This file is part of dupd.
5 
6   dupd is free software: you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation, either version 3 of the License, or
9   (at your option) any later version.
10 
11   dupd is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with dupd.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include <assert.h>
21 #include <fcntl.h>
22 #include <pthread.h>
23 #include <stdint.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <strings.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <unistd.h>
31 
32 #include "main.h"
33 #include "stats.h"
34 #include "utils.h"
35 
36 pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER;
37 pthread_mutex_t counters_lock = PTHREAD_MUTEX_INITIALIZER;
38 
39 int stats_sets_processed[ROUNDS] = { 0,0 };
40 int stats_sets_dup_done[ROUNDS] = { 0,0 };
41 int stats_sets_dup_not[ROUNDS] = { 0,0 };
42 int stats_sets_full_read[ROUNDS] = { 0,0 };
43 int stats_sets_part_read[ROUNDS] = { 0,0 };
44 long stats_round_start[ROUNDS] = { -1,-1 };
45 int stats_round_duration[ROUNDS] = { -1,-1 };
46 int stats_duplicate_groups = 0;
47 int stats_reader_loops[ROUNDS] = { 0,0 };
48 int stats_hasher_loops[ROUNDS][MAX_HASHER_THREADS] = { {0,0}, {0,0} };
49 int stats_hasher_queue_len[MAX_HASHER_THREADS] = { 0,0 };
50 
51 long stats_process_start = -1;
52 long stats_process_duration = -1;
53 
54 uint64_t stats_total_bytes = 0;
55 uint64_t stats_total_bytes_read = 0;
56 uint64_t stats_total_bytes_hashed = 0;
57 uint64_t stats_comparison_bytes_read = 0;
58 uint32_t stats_max_pathlist = 0;
59 uint64_t stats_max_pathlist_size = 0;
60 uint32_t stats_path_list_entries = 0;
61 int stats_most_dups = 0;
62 int stats_duplicate_files = 0;
63 
64 int stats_full_hash_first = 0;
65 int stats_full_hash_second = 0;
66 int stats_partial_hash_second = 0;
67 int stats_one_block_hash_first = 0;
68 
69 
70 int stats_size_list_done = 0;
71 int stats_three_file_compare = 0;
72 int stats_two_file_compare = 0;
73 int stats_uniques_saved = 0;
74 long stats_size_list_avg = 0;
75 
76 int stats_files_ignored = 0;
77 int stats_files_error = 0;
78 long stats_time_scan = -1;
79 long stats_time_process = 0;
80 long stats_time_total = 0;
81 long stats_main_start = 0;
82 int path_buffer_realloc = 0;
83 int stats_hashlist_path_realloc = 0;
84 int stats_hash_list_len_inc = 0;
85 int scan_list_usage_max = 0;
86 int scan_list_resizes = 0;
87 uint64_t stats_read_buffers_allocated = 0;
88 int stats_flusher_active = 0;
89 uint32_t stats_fiemap_total_blocks = 0;
90 uint32_t stats_fiemap_zero_blocks = 0;
91 
92 uint32_t count_sets_first_read = 0;
93 uint32_t count_files_completed = 0;
94 uint32_t stats_sets_first_read_completed = 0;
95 
96 
97 
98 // Keep from here after revamp
99 uint32_t s_stats_size_list_count = 0;   // Total size sets processed
100 
101 uint32_t s_total_files_seen = 0;        // All file entries seen during scan
102 uint32_t s_files_skip_error = 0;        // Files skipped due to error
103 uint32_t s_files_skip_notfile = 0;      // Files skipped, not a file
104 uint32_t s_files_skip_badsep = 0;       // Files skipped, separator conflict
105 uint32_t s_files_cant_read = 0;         // Files skipped, can't read
106 uint32_t s_files_hl_skip = 0;           // Files skipped, hardlink-is-unique
107 uint32_t s_files_too_small = 0;         // Files skipped, too small
108 uint32_t s_files_in_sizetree = 0;       // Files added to size tree
109 uint32_t s_files_processed = 0;         // Files entered to path list
110 uint32_t s_files_completed_dups = 0;    // Files processed, found to be dups
111 uint32_t s_files_completed_unique = 0;  // Files processed, found to be unique
112 
113 int current_open_files = 0;
114 
115 
116 /** ***************************************************************************
117  * Public function, see header file.
118  *
119  */
report_stats()120 void report_stats()
121 {
122   LOG_BASE {
123     printf("\n");
124     char timebuf[20];
125     time_string(timebuf, 20, get_current_time_millis() - stats_main_start);
126     printf("Total duplicates: %d files in %d groups in %s\n",
127            s_files_completed_dups, stats_duplicate_groups, timebuf);
128     if (write_db && stats_duplicate_files > 0) {
129       printf("Run 'dupd report' to list duplicates.\n");
130     }
131   }
132 
133   uint32_t files_accepted = s_total_files_seen - s_files_too_small -
134     s_files_skip_notfile - s_files_skip_error - s_files_skip_badsep -
135     s_files_hl_skip;
136   uint32_t unique_files = s_files_in_sizetree - s_files_processed;
137 
138   LOG_MORE {
139     printf("\n");
140     printf("Total files seen: %" PRIu32 "\n", s_total_files_seen);
141     printf(" (too small: %" PRIu32 ", not file: %"
142            PRIu32 ", errors: %" PRIu32 ", skip: %" PRIu32 ", hl_skip: %"
143            PRIu32 ")\n",
144            s_files_too_small, s_files_skip_notfile,
145            s_files_skip_error, s_files_skip_badsep, s_files_hl_skip);
146 
147     printf("Files queued for processing: %" PRIu32 " in %" PRIu32 " sets\n",
148            files_accepted, s_stats_size_list_count);
149 
150 
151     printf(" (files with unique size: %" PRIu32 ")\n", unique_files);
152     printf("Total files to process: %" PRIu32 "\n", s_files_processed);
153     printf(" Duplicate files: %" PRIu32 "\n", s_files_completed_dups);
154     printf(" Unique files: %" PRIu32 "\n", s_files_completed_unique);
155     printf(" Unable to read: %" PRIu32 "\n", s_files_cant_read);
156     if (hardlink_is_unique) {
157       printf(" Skipped hardlinks: %" PRIu32 "\n", s_files_hl_skip);
158     }
159   }
160 
161   if (files_accepted != s_files_in_sizetree - s_files_hl_skip) {
162     printf("error: mismatch files_accepted: %" PRIu32
163            " != files in sizetree: %" PRIu32 "\n",
164            files_accepted, s_files_in_sizetree - s_files_hl_skip);
165     exit(1);
166   }
167 }
168 
169 
170 /** ***************************************************************************
171  * Public function, see header file.
172  *
173  */
save_stats()174 void save_stats()
175 {
176   FILE * fp = fopen(stats_file, "a");
177   // TODO needs cleaning up
178   fprintf(fp, "using_fiemap %d\n", using_fiemap);
179   fprintf(fp, "fiemap_total_blocks %" PRIu32 "\n", stats_fiemap_total_blocks);
180   fprintf(fp, "fiemap_zero_blocks %" PRIu32 "\n", stats_fiemap_zero_blocks);
181   fprintf(fp, "duplicate_files %" PRIu32 "\n", s_files_completed_dups);
182   fprintf(fp, "duplicate_groups %" PRIu32 "\n", stats_duplicate_groups);
183 
184   fprintf(fp, "\n");
185   fclose(fp);
186 }
187 
188 
189 /** ***************************************************************************
190  * Public function, see header file.
191  *
192  */
inc_stats_read_buffers_allocated(int bytes)193 void inc_stats_read_buffers_allocated(int bytes)
194 {
195   d_mutex_lock(&stats_lock, "increasing buffers");
196   stats_read_buffers_allocated += bytes;
197   d_mutex_unlock(&stats_lock);
198 }
199 
200 
201 /** ***************************************************************************
202  * Public function, see header file.
203  *
204  */
dec_stats_read_buffers_allocated(int bytes)205 void dec_stats_read_buffers_allocated(int bytes)
206 {
207   d_mutex_lock(&stats_lock, "decreasing buffers");
208   stats_read_buffers_allocated -= bytes;
209   d_mutex_unlock(&stats_lock);
210 }
211 
212 
213 /** ***************************************************************************
214  * Public function, see header file.
215  *
216  */
increase_unique_counter(int n)217 void increase_unique_counter(int n)
218 {
219   d_mutex_lock(&counters_lock, "counters");
220   s_files_completed_unique += n;
221   d_mutex_unlock(&counters_lock);
222 }
223 
224 
225 /** ***************************************************************************
226  * Public function, see header file.
227  *
228  */
increase_dup_counter(int n)229 void increase_dup_counter(int n)
230 {
231   d_mutex_lock(&counters_lock, "counters");
232   s_files_completed_dups += n;
233   d_mutex_unlock(&counters_lock);
234 }
235 
236 
237 /** ***************************************************************************
238  * Public function, see header file.
239  *
240  */
increase_sets_first_read()241 void increase_sets_first_read()
242 {
243   d_mutex_lock(&counters_lock, "counters");
244   count_sets_first_read++;
245   d_mutex_unlock(&counters_lock);
246 }
247 
248 
249 /** ***************************************************************************
250  * Public function, see header file.
251  *
252  */
increase_sets_first_read_completed()253 void increase_sets_first_read_completed()
254 {
255   d_mutex_lock(&counters_lock, "counters");
256   stats_sets_first_read_completed++;
257   d_mutex_unlock(&counters_lock);
258 }
259 
260 
261 /** ***************************************************************************
262  * Public function, see header file.
263  *
264  */
update_open_files(int n)265 void update_open_files(int n)
266 {
267   d_mutex_lock(&counters_lock, "counters");
268   current_open_files += n;
269   d_mutex_unlock(&counters_lock);
270 }
271