1 /*
2 * This file is part of rmlint.
3 *
4 * rmlint is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * rmlint is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with rmlint. If not, see <http://www.gnu.org/licenses/>.
16 *
17 * Authors:
18 *
19 * - Christopher <sahib> Pahl 2010-2020 (https://github.com/sahib)
20 * - Daniel <SeeSpotRun> T. 2014-2020 (https://github.com/SeeSpotRun)
21 *
22 * Hosted on http://github.com/sahib/rmlint
23 *
24 */
25
26 #include "../checksums/murmur3.h"
27 #include "../formats.h"
28 #include "../preprocess.h"
29 #include "../utilities.h"
30 #include "../treemerge.h"
31
32 #include <glib.h>
33 #include <stdio.h>
34 #include <string.h>
35
36 typedef struct RmFmtHandlerJSON {
37 /* must be first */
38 RmFmtHandler parent;
39
40 /* More human readable output? */
41 bool pretty;
42
43 /* set of already existing ids */
44 GHashTable *id_set;
45 } RmFmtHandlerJSON;
46
47 //////////////////////////////////////////
48 // FILE ID GENERATOR //
49 //////////////////////////////////////////
50
rm_fmt_json_generate_id(RmFmtHandlerJSON * self,RmFile * file,const char * file_path,char * cksum)51 static guint32 rm_fmt_json_generate_id(RmFmtHandlerJSON *self, RmFile *file,
52 const char *file_path, char *cksum) {
53 guint32 hash = 0;
54 hash = file->inode ^ file->dev;
55 hash ^= file->actual_file_size;
56
57 for(int i = 0; i < 8192; ++i) {
58 hash ^= MurmurHash3_x86_32(file_path, strlen(file_path), i);
59 if(cksum != NULL) {
60 hash ^= MurmurHash3_x86_32(cksum, strlen(cksum), i);
61 }
62
63 if(!g_hash_table_contains(self->id_set, GUINT_TO_POINTER(hash))) {
64 break;
65 }
66 }
67
68 g_hash_table_add(self->id_set, GUINT_TO_POINTER(hash));
69 return hash;
70 }
71
72 //////////////////////////////////////////
73 // POOR MAN'S JSON FORMATTING TOOLBOX //
74 //////////////////////////////////////////
75
rm_fmt_json_key(FILE * out,const char * key,const char * value)76 static void rm_fmt_json_key(FILE *out, const char *key, const char *value) {
77 fprintf(out, "\"%s\": \"%s\"", key, value);
78 }
79
rm_fmt_json_key_bool(FILE * out,const char * key,bool value)80 static void rm_fmt_json_key_bool(FILE *out, const char *key, bool value) {
81 fprintf(out, "\"%s\": %s", key, value ? "true" : "false");
82 }
83
rm_fmt_json_key_int(FILE * out,const char * key,RmOff value)84 static void rm_fmt_json_key_int(FILE *out, const char *key, RmOff value) {
85 fprintf(out, "\"%s\": %" LLU "", key, value);
86 }
87
rm_fmt_json_key_float(FILE * out,const char * key,gdouble value)88 static void rm_fmt_json_key_float(FILE *out, const char *key, gdouble value) {
89 // Make sure that the floating point number gets printed with a '.',
90 // not with a comma as usual in e.g. the german language.
91 gchar buf[G_ASCII_DTOSTR_BUF_SIZE];
92 fprintf(out, "\"%s\": %s", key, g_ascii_dtostr(buf, sizeof(buf) - 1, value));
93 }
94
rm_fmt_json_fix(const char * string,char * fixed,size_t fixed_len)95 static bool rm_fmt_json_fix(const char *string, char *fixed, size_t fixed_len) {
96 /* More information here:
97 *
98 * http://stackoverflow.com/questions/4901133/json-and-escaping-characters/4908960#4908960
99 */
100
101 int n = strlen(string);
102 char *safe_iter = fixed;
103
104 for(int i = 0; i < n && (size_t)(safe_iter - fixed) < fixed_len; ++i) {
105 unsigned char *curr = (unsigned char *)&string[i];
106
107 char text[20];
108 memset(text, 0, sizeof(text));
109
110 if(*curr == '"' || *curr == '\\') {
111 /* Printable, but needs to be escaped */
112 text[0] = '\\';
113 text[1] = *curr;
114 } else if((*curr > 0 && *curr < 0x1f) || *curr == 0x7f) {
115 /* Something unprintable */
116 switch(*curr) {
117 case '\b':
118 g_snprintf(text, sizeof(text), "\\b");
119 break;
120 case '\f':
121 g_snprintf(text, sizeof(text), "\\f");
122 break;
123 case '\n':
124 g_snprintf(text, sizeof(text), "\\n");
125 break;
126 case '\r':
127 g_snprintf(text, sizeof(text), "\\r");
128 break;
129 case '\t':
130 g_snprintf(text, sizeof(text), "\\t");
131 break;
132 default:
133 g_snprintf(text, sizeof(text), "\\u00%02x", (guint)*curr);
134 break;
135 }
136 } else {
137 /* Take it unmodified */
138 text[0] = *curr;
139 }
140
141 safe_iter = g_stpcpy(safe_iter, text);
142 }
143
144 return (size_t)(safe_iter - fixed) < fixed_len;
145 }
146
rm_fmt_json_key_unsafe(FILE * out,const char * key,const char * value)147 static void rm_fmt_json_key_unsafe(FILE *out, const char *key, const char *value) {
148 char safe_value[PATH_MAX + 4 + 1];
149 memset(safe_value, 0, sizeof(safe_value));
150
151 if(rm_fmt_json_fix(value, safe_value, sizeof(safe_value))) {
152 fprintf(out, "\"%s\": \"%s\"", key, safe_value);
153 } else {
154 /* This should never happen but give at least means of debugging */
155 fprintf(out, "\"%s\": \"<BROKEN PATH>\"", key);
156 }
157 }
158
rm_fmt_json_open(RmFmtHandlerJSON * self,FILE * out)159 static void rm_fmt_json_open(RmFmtHandlerJSON *self, FILE *out) {
160 fprintf(out, "{%s", self->pretty ? "\n " : "");
161 }
162
rm_fmt_json_close(RmFmtHandlerJSON * self,FILE * out)163 static void rm_fmt_json_close(RmFmtHandlerJSON *self, FILE *out) {
164 if(self->pretty) {
165 fprintf(out, "\n}, ");
166 } else {
167 fprintf(out, "},\n");
168 }
169 }
170
rm_fmt_json_sep(RmFmtHandlerJSON * self,FILE * out)171 static void rm_fmt_json_sep(RmFmtHandlerJSON *self, FILE *out) {
172 fprintf(out, ",%s", self->pretty ? "\n " : "");
173 }
174
175 /////////////////////////
176 // ACTUAL CALLBACKS //
177 /////////////////////////
178
rm_fmt_head(RmSession * session,_UNUSED RmFmtHandler * parent,FILE * out)179 static void rm_fmt_head(RmSession *session, _UNUSED RmFmtHandler *parent, FILE *out) {
180 fprintf(out, "[\n");
181
182 RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent;
183 self->id_set = g_hash_table_new(NULL, NULL);
184
185 if(rm_fmt_get_config_value(session->formats, "json", "oneline")) {
186 self->pretty = false;
187 }
188
189 if(!rm_fmt_get_config_value(session->formats, "json", "no_header")) {
190 rm_fmt_json_open(self, out);
191 {
192 rm_fmt_json_key(out, "description", "rmlint json-dump of lint files");
193 rm_fmt_json_sep(self, out);
194 rm_fmt_json_key(out, "cwd", session->cfg->iwd);
195 rm_fmt_json_sep(self, out);
196 rm_fmt_json_key(out, "args", session->cfg->joined_argv);
197 rm_fmt_json_sep(self, out);
198 rm_fmt_json_key(out, "version", RM_VERSION);
199 rm_fmt_json_sep(self, out);
200 rm_fmt_json_key(out, "rev", RM_VERSION_GIT_REVISION);
201 rm_fmt_json_sep(self, out);
202 rm_fmt_json_key_int(out, "progress", 0); /* Header is always first. */
203 rm_fmt_json_sep(self, out);
204 rm_fmt_json_key(out, "checksum_type",
205 rm_digest_type_to_string(session->cfg->checksum_type));
206 if(session->hash_seed) {
207 rm_fmt_json_sep(self, out);
208 rm_fmt_json_key_int(out, "hash_seed", session->hash_seed);
209 }
210
211 rm_fmt_json_sep(self, out);
212 rm_fmt_json_key_bool(out, "merge_directories", session->cfg->merge_directories);
213 }
214 rm_fmt_json_close(self, out);
215 }
216 }
217
rm_fmt_foot(_UNUSED RmSession * session,RmFmtHandler * parent,FILE * out)218 static void rm_fmt_foot(_UNUSED RmSession *session, RmFmtHandler *parent, FILE *out) {
219 RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent;
220
221 if(rm_fmt_get_config_value(session->formats, "json", "no_footer")) {
222 fprintf(out, "{}");
223 } else {
224 rm_fmt_json_open(self, out);
225 {
226 rm_fmt_json_key_bool(out, "aborted", rm_session_was_aborted());
227 rm_fmt_json_sep(self, out);
228 rm_fmt_json_key_int(out, "progress", 100); /* Footer is always last. */
229 rm_fmt_json_sep(self, out);
230 rm_fmt_json_key_int(out, "total_files", session->total_files);
231 rm_fmt_json_sep(self, out);
232 rm_fmt_json_key_int(out, "ignored_files", session->ignored_files);
233 rm_fmt_json_sep(self, out);
234 rm_fmt_json_key_int(out, "ignored_folders", session->ignored_folders);
235 rm_fmt_json_sep(self, out);
236 rm_fmt_json_key_int(out, "duplicates", session->dup_counter);
237 rm_fmt_json_sep(self, out);
238 rm_fmt_json_key_int(out, "duplicate_sets", session->dup_group_counter);
239 rm_fmt_json_sep(self, out);
240 rm_fmt_json_key_int(out, "total_lint_size", session->total_lint_size);
241 }
242 if(self->pretty) {
243 fprintf(out, "\n}");
244 } else {
245 fprintf(out, "}\n");
246 }
247 }
248
249 fprintf(out, "]\n");
250 g_hash_table_unref(self->id_set);
251 }
252
rm_fmt_json_cksum(RmFile * file,char * checksum_str,size_t size)253 static void rm_fmt_json_cksum(RmFile *file, char *checksum_str, size_t size) {
254 memset(checksum_str, '0', size);
255 checksum_str[size - 1] = 0;
256 rm_digest_hexstring(file->digest, checksum_str);
257 }
258
rm_fmt_elem(RmSession * session,_UNUSED RmFmtHandler * parent,FILE * out,RmFile * file)259 static void rm_fmt_elem(RmSession *session, _UNUSED RmFmtHandler *parent, FILE *out, RmFile *file) {
260 if(rm_fmt_get_config_value(session->formats, "json", "no_body")) {
261 return;
262 }
263
264 if(file->lint_type == RM_LINT_TYPE_UNIQUE_FILE) {
265 if(!rm_fmt_get_config_value(session->formats, "json", "unique")) {
266 if(!file->digest || !session->cfg->write_unfinished) {
267 return;
268 }
269 }
270
271 if(session->cfg->keep_all_tagged && !file->is_prefd) {
272 /* don't list 'untagged' files as unique */
273 file->is_original = false;
274 } else if(session->cfg->keep_all_untagged && file->is_prefd) {
275 /* don't list 'tagged' files as unique */
276 file->is_original = false;
277 } else {
278 file->is_original = true;
279 }
280 }
281
282 char *checksum_str = NULL;
283 size_t checksum_size = 0;
284
285 if(file->digest != NULL) {
286 checksum_size = rm_digest_get_bytes(file->digest) * 2 + 1;
287 checksum_str = g_slice_alloc0(checksum_size);
288 rm_fmt_json_cksum(file, checksum_str, checksum_size);
289 checksum_str[checksum_size - 1] = 0;
290 }
291
292 RmFmtHandlerJSON *self = (RmFmtHandlerJSON *)parent;
293
294 /* Make it look like a json element */
295 rm_fmt_json_open(self, out);
296 {
297 RM_DEFINE_PATH(file);
298
299 rm_fmt_json_key_int(out, "id",
300 rm_fmt_json_generate_id(self, file, file_path, checksum_str));
301 rm_fmt_json_sep(self, out);
302 rm_fmt_json_key(out, "type", rm_file_lint_type_to_string(file->lint_type));
303 rm_fmt_json_sep(self, out);
304
305 gdouble progress = 0;
306 if(session->shred_bytes_after_preprocess) {
307 progress = CLAMP(
308 100 - 100 * (
309 (gdouble)session->shred_bytes_remaining /
310 (gdouble)session->shred_bytes_after_preprocess
311 ),
312 0,
313 100
314 );
315 }
316 rm_fmt_json_key_int(out, "progress", progress);
317 rm_fmt_json_sep(self, out);
318
319 if(file->digest) {
320 rm_fmt_json_key(out, "checksum", checksum_str);
321 rm_fmt_json_sep(self, out);
322 }
323
324 rm_fmt_json_key_unsafe(out, "path", file_path);
325 rm_fmt_json_sep(self, out);
326 rm_fmt_json_key_int(out, "size", file->actual_file_size);
327 rm_fmt_json_sep(self, out);
328 rm_fmt_json_key_int(out, "depth", file->depth);
329 rm_fmt_json_sep(self, out);
330 rm_fmt_json_key_int(out, "inode", file->inode);
331 rm_fmt_json_sep(self, out);
332 rm_fmt_json_key_int(out, "disk_id", file->dev);
333 rm_fmt_json_sep(self, out);
334 rm_fmt_json_key_bool(out, "is_original", file->is_original);
335 rm_fmt_json_sep(self, out);
336
337 if(file->lint_type == RM_LINT_TYPE_DUPE_DIR_CANDIDATE) {
338 rm_fmt_json_key_int(out, "n_children", file->n_children);
339 rm_fmt_json_sep(self, out);
340 }
341
342 if(file->lint_type != RM_LINT_TYPE_UNIQUE_FILE) {
343 if(file->twin_count >= 0) {
344 rm_fmt_json_key_int(out, "twins", file->twin_count);
345 rm_fmt_json_sep(self, out);
346 }
347
348
349 if(file->lint_type == RM_LINT_TYPE_PART_OF_DIRECTORY && file->parent_dir) {
350 rm_fmt_json_key_unsafe(out, "parent_path", rm_directory_get_dirname(file->parent_dir));
351 rm_fmt_json_sep(self, out);
352
353 }
354
355 if(session->cfg->find_hardlinked_dupes) {
356 RmFile *hardlink_head = RM_FILE_HARDLINK_HEAD(file);
357
358 if(hardlink_head && hardlink_head != file && file->digest) {
359 char orig_checksum_str[rm_digest_get_bytes(file->digest) * 2 + 1];
360 rm_fmt_json_cksum(hardlink_head, orig_checksum_str,
361 sizeof(orig_checksum_str));
362
363 RM_DEFINE_PATH(hardlink_head);
364
365 guint32 orig_id = rm_fmt_json_generate_id(
366 self, hardlink_head, hardlink_head_path, orig_checksum_str);
367
368 rm_fmt_json_key_int(out, "hardlink_of", orig_id);
369 rm_fmt_json_sep(self, out);
370 }
371 }
372 }
373
374 rm_fmt_json_key_float(out, "mtime", file->mtime);
375 }
376 rm_fmt_json_close(self, out);
377
378 if(checksum_str != NULL) {
379 g_slice_free1(checksum_size, checksum_str);
380 }
381 }
382
383 static RmFmtHandlerJSON JSON_HANDLER_IMPL = {
384 /* Initialize parent */
385 .parent =
386 {
387 .size = sizeof(JSON_HANDLER_IMPL),
388 .name = "json",
389 .head = rm_fmt_head,
390 .elem = rm_fmt_elem,
391 .prog = NULL,
392 .foot = rm_fmt_foot,
393 .valid_keys = {"no_header", "no_footer", "no_body", "oneline", "unique", NULL},
394 },
395 .pretty = true};
396
397 RmFmtHandler *JSON_HANDLER = (RmFmtHandler *)&JSON_HANDLER_IMPL;
398