1 /* vim: set sw=4 ts=4 noexpandtab : */
2 /*
3 * Copyright (C) 2003, by Keith J. Jones.
4 * Copyright (C) 2007-2019 Abel Cheung.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "config.h"
33
34 #include <errno.h>
35 #include <stdlib.h>
36
37 #include "utils.h"
38 #ifdef G_OS_WIN32
39 # include "utils-win.h"
40 #endif
41
42 #include <glib/gi18n.h>
43 #include <glib/gstdio.h>
44
45 #include "rifiuti.h"
46
47
48 static r2status exit_status = EXIT_SUCCESS;
49 static metarecord meta;
50 extern char *legacy_encoding;
51
52 /* 0-25 => A-Z, 26 => '\', 27 or above is erraneous */
53 unsigned char driveletters[28] =
54 {
55 'A', 'B', 'C', 'D', 'E', 'F', 'G',
56 'H', 'I', 'J', 'K', 'L', 'M', 'N',
57 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
58 'V', 'W', 'X', 'Y', 'Z', '\\', '?'
59 };
60
61 /*!
62 * Check if index file has sufficient amount of data for reading
63 * 0 = success, all other return status = error
64 * If success, infile will be set to file pointer and other args
65 * will be filled, otherwise file pointer = NULL
66 */
67 static r2status
validate_index_file(const char * filename,FILE ** infile)68 validate_index_file (const char *filename,
69 FILE **infile)
70 {
71 void *buf;
72 FILE *fp = NULL;
73 uint32_t ver;
74 int e, ret;
75
76 g_debug ("Start file validation...");
77
78 g_return_val_if_fail ( infile != NULL, R2_ERR_INTERNAL );
79 *infile = NULL;
80
81 if ( !(fp = g_fopen (filename, "rb")) )
82 {
83 e = errno;
84 g_printerr (_("Error opening file '%s' for reading: %s"),
85 filename, g_strerror (e));
86 g_printerr ("\n");
87 return R2_ERR_OPEN_FILE;
88 }
89
90 buf = g_malloc (RECORD_START_OFFSET);
91
92 if ( 1 > fread (buf, RECORD_START_OFFSET, 1, fp) )
93 {
94 /* TRANSLATOR COMMENT: file size must be at least 20 bytes */
95 g_critical (_("File size less than minimum allowed (%d bytes)"), RECORD_START_OFFSET);
96 ret = R2_ERR_BROKEN_FILE;
97 goto validation_broken;
98 }
99
100 copy_field (&ver, VERSION, KEPT_ENTRY);
101 ver = GUINT32_FROM_LE (ver);
102
103 /* total_entry only meaningful for 95 and NT4, on other versions
104 * it's junk memory data, don't bother copying */
105 if ( ( ver == VERSION_NT4 ) || ( ver == VERSION_WIN95 ) ) {
106 copy_field (&meta.total_entry, TOTAL_ENTRY, RECORD_SIZE);
107 meta.total_entry = GUINT32_FROM_LE (meta.total_entry);
108 }
109
110 copy_field (&meta.recordsize, RECORD_SIZE, FILESIZE_SUM);
111 meta.recordsize = GUINT32_FROM_LE (meta.recordsize);
112
113 g_free (buf);
114
115 /* Turns out version is not reliable indicator. Use size instead */
116 switch (meta.recordsize)
117 {
118 case LEGACY_RECORD_SIZE:
119
120 meta.has_unicode_path = FALSE;
121
122 if ( ( ver != VERSION_ME_03 ) && /* ME still use 280 byte record */
123 ( ver != VERSION_WIN98 ) &&
124 ( ver != VERSION_WIN95 ) )
125 {
126 g_printerr (_("Unsupported file version, or probably not an INFO2 file at all."));
127 g_printerr ("\n");
128 ret = R2_ERR_BROKEN_FILE;
129 goto validation_broken;
130 }
131
132 if (!legacy_encoding)
133 {
134 g_printerr (_("This INFO2 file was produced on a legacy system "
135 "without Unicode file name (Windows ME or earlier). "
136 "Please specify codepage of concerned system with "
137 "'-l' or '--legacy-filename' option."));
138 g_printerr ("\n\n");
139 /* TRANSLATOR COMMENT: can choose example from YOUR language & code page */
140 g_printerr (_("For example, if recycle bin is expected to come from West "
141 "European versions of Windows, use '-l CP1252' option; "
142 "or in case of Japanese Windows, use '-l CP932'."));
143 g_printerr ("\n");
144
145 ret = R2_ERR_ARG;
146 goto validation_broken;
147 }
148
149 switch (ver)
150 {
151 case VERSION_WIN95: meta.os_guess = OS_GUESS_95; break;
152 case VERSION_WIN98: meta.os_guess = OS_GUESS_98; break;
153 case VERSION_ME_03: meta.os_guess = OS_GUESS_ME; break;
154 }
155
156 break;
157
158 case UNICODE_RECORD_SIZE:
159
160 meta.has_unicode_path = TRUE;
161 if ( ( ver != VERSION_ME_03 ) && ( ver != VERSION_NT4 ) )
162 {
163 g_printerr (_("Unsupported file version, or probably not an INFO2 file at all."));
164 g_printerr ("\n");
165 ret = R2_ERR_BROKEN_FILE;
166 goto validation_broken;
167 }
168 /* guess is not complete yet for latter case, see populate_record_data */
169 meta.os_guess = (ver == VERSION_NT4) ? OS_GUESS_NT4 : OS_GUESS_2K_03;
170 break;
171
172 default:
173 ret = R2_ERR_BROKEN_FILE;
174 goto validation_broken;
175 }
176
177 rewind (fp);
178 *infile = fp;
179 meta.version = (int64_t) ver;
180
181 return EXIT_SUCCESS;
182
183 validation_broken:
184
185 fclose (fp);
186 return ret;
187 }
188
189
190 static rbin_struct *
populate_record_data(void * buf)191 populate_record_data (void *buf)
192 {
193 rbin_struct *record;
194 uint64_t win_filetime;
195 uint32_t drivenum;
196 size_t read;
197 char *legacy_fname;
198
199 record = g_malloc0 (sizeof (rbin_struct));
200
201 /* Guarantees null-termination by allocating extra byte; same goes with
202 * unicode filename */
203 legacy_fname = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET + 1);
204 copy_field (legacy_fname, LEGACY_FILENAME, RECORD_INDEX);
205
206 /* Index number associated with the record */
207 copy_field (&record->index_n, RECORD_INDEX, DRIVE_LETTER);
208 record->index_n = GUINT32_FROM_LE (record->index_n);
209 g_debug ("index=%u", record->index_n);
210
211 /* Number representing drive letter */
212 copy_field (&drivenum, DRIVE_LETTER, FILETIME);
213 drivenum = GUINT32_FROM_LE (drivenum);
214 g_debug ("drive=%u", drivenum);
215 if (drivenum >= sizeof (driveletters) - 1)
216 g_warning (_("Invalid drive number (0x%X) for record %u."),
217 drivenum, record->index_n);
218 record->drive = driveletters[MIN (drivenum, sizeof (driveletters) - 1)];
219
220 record->emptied = FALSE;
221 /* first byte will be removed from filename if file is not in recycle bin */
222 if (!*legacy_fname)
223 {
224 record->emptied = TRUE;
225 *legacy_fname = record->drive;
226 }
227
228 /* File deletion time */
229 copy_field (&win_filetime, FILETIME, FILESIZE);
230 win_filetime = GUINT64_FROM_LE (win_filetime);
231 record->deltime = win_filetime_to_epoch (win_filetime);
232
233 /* File size or occupied cluster size */
234 /* BEWARE! This is 32bit data casted to 64bit struct member */
235 copy_field (&record->filesize, FILESIZE, UNICODE_FILENAME);
236 record->filesize = GUINT64_FROM_LE (record->filesize);
237 g_debug ("filesize=%" G_GUINT64_FORMAT, record->filesize);
238
239 /*
240 * 1. Only bother populating legacy path if users need it,
241 * because otherwise we don't know which encoding to use
242 * 2. Enclose with angle brackets because they are not allowed
243 * in Windows file name, therefore stands out better that
244 * the escaped hex sequences are not part of real file name
245 */
246 if (legacy_encoding)
247 {
248 record->legacy_path = conv_path_to_utf8_with_tmpl (
249 legacy_fname, legacy_encoding, "<\\%02X>", &read, &exit_status);
250
251 if (record->legacy_path == NULL) {
252 g_warning (_("(Record %u) Error converting legacy path to UTF-8."),
253 record->index_n);
254 record->legacy_path = "";
255 }
256 }
257
258 g_free (legacy_fname);
259
260 if (! meta.has_unicode_path)
261 return record;
262
263 /*******************************************
264 * Part below deals with unicode path only *
265 *******************************************/
266
267 record->uni_path = conv_path_to_utf8_with_tmpl (
268 (char *) (buf + UNICODE_FILENAME_OFFSET), NULL,
269 "<\\u%04X>", &read, &exit_status);
270
271 if (record->uni_path == NULL) {
272 g_warning (_("(Record %u) Error converting unicode path to UTF-8."),
273 record->index_n);
274 record->uni_path = "";
275 }
276
277 /*
278 * We check for junk memory filling the padding area after
279 * unicode path, using it as the indicator of OS generating this
280 * INFO2 file. (server 2000 / 2003)
281 *
282 * The padding area after legacy path is no good; experiment
283 * shows that legacy path *always* contain non-zero bytes after
284 * null terminator if path contains double-byte character,
285 * regardless of OS.
286 *
287 * Those non-zero bytes resemble partial end of full path.
288 * Looks like an ANSI codepage full path is filled in
289 * legacy path field, then overwritten in place by a 8.3
290 * version of path whenever applicable (which was always shorter).
291 */
292 if (! meta.fill_junk)
293 {
294 void *ptr;
295
296 for (ptr = buf + UNICODE_FILENAME_OFFSET + read;
297 ptr < buf + UNICODE_RECORD_SIZE; ptr++)
298 {
299 if ( *(char *) ptr != '\0' )
300 {
301 g_debug ("Junk detected at offset 0x%tx of unicode path",
302 ptr - buf - UNICODE_FILENAME_OFFSET);
303 meta.fill_junk = TRUE;
304 break;
305 }
306 }
307 }
308
309 return record;
310 }
311
312
313 static void
parse_record_cb(char * index_file,GSList ** recordlist)314 parse_record_cb (char *index_file,
315 GSList **recordlist)
316 {
317 rbin_struct *record;
318 FILE *infile;
319 size_t size;
320 void *buf = NULL;
321
322 exit_status = validate_index_file (index_file, &infile);
323 if ( exit_status != EXIT_SUCCESS )
324 {
325 g_printerr (_("File '%s' fails validation."), index_file);
326 g_printerr ("\n");
327 return;
328 }
329
330 g_debug ("Start populating record for '%s'...", index_file);
331
332 /*
333 * Add padding bytes as null-termination of unicode file name.
334 * Normally Windows should have done the null termination within
335 * WIN_PATH_MAX limit, but on 98/ME/2000 programmers were sloppy
336 * and use junk memory as padding, so just play safe.
337 */
338 buf = g_malloc0 (meta.recordsize + sizeof(gunichar2));
339
340 fseek (infile, RECORD_START_OFFSET, SEEK_SET);
341
342 meta.is_empty = TRUE;
343 while (meta.recordsize == (size = fread (buf, 1, meta.recordsize, infile)))
344 {
345 record = populate_record_data (buf);
346 record->meta = &meta;
347 /* INFO2 already sort entries by time */
348 *recordlist = g_slist_append (*recordlist, record);
349 meta.is_empty = FALSE;
350 }
351 g_free (buf);
352
353 /* do this only when all entries are scanned */
354 if ( ! meta.is_empty && ( meta.os_guess == OS_GUESS_2K_03 ) )
355 meta.os_guess = meta.fill_junk ? OS_GUESS_2K : OS_GUESS_XP_03;
356
357 if ( ferror (infile) )
358 {
359 g_critical (_("Failed to read record at position %li: %s"),
360 ftell (infile), strerror (errno));
361 exit_status = R2_ERR_OPEN_FILE;
362 }
363 if ( feof (infile) && size && ( size < meta.recordsize ) )
364 {
365 g_warning (_("Premature end of file, last record (%zu bytes) discarded"), size);
366 exit_status = R2_ERR_BROKEN_FILE;
367 }
368
369 fclose (infile);
370 }
371
372 int
main(int argc,char ** argv)373 main (int argc,
374 char **argv)
375 {
376 GSList *filelist = NULL;
377 GSList *recordlist = NULL;
378 GOptionContext *context;
379
380 extern char **fileargs;
381
382 rifiuti_init (argv[0]);
383
384 /* TRANSLATOR: appears in help text short summary */
385 context = g_option_context_new (N_("INFO2"));
386 g_option_context_set_summary (context, N_(
387 "Parse INFO2 file and dump recycle bin data."));
388 rifiuti_setup_opt_ctx (&context, RECYCLE_BIN_TYPE_FILE);
389 exit_status = rifiuti_parse_opt_ctx (&context, &argc, &argv);
390 if (exit_status != EXIT_SUCCESS)
391 goto cleanup;
392
393 exit_status = check_file_args (fileargs[0], &filelist, RECYCLE_BIN_TYPE_FILE);
394 if (exit_status != EXIT_SUCCESS)
395 goto cleanup;
396
397 /* To be overwritten in parse_record_cb() when appropriate */
398 meta.os_guess = OS_GUESS_UNKNOWN;
399
400 /*
401 * TODO May be silly for single file, but would be useful in future
402 * when reading multiple files from live system
403 */
404 g_slist_foreach (filelist, (GFunc) parse_record_cb, &recordlist);
405
406 meta.type = RECYCLE_BIN_TYPE_FILE;
407 meta.filename = fileargs[0];
408 /*
409 * Keeping deleted entry is only available since 98
410 * Note: always set this variable after parse_record_cb() because
411 * meta.version is not set beforehand
412 */
413 meta.keep_deleted_entry = ( meta.version >= VERSION_WIN98 );
414
415 if ( !meta.is_empty && (recordlist == NULL) )
416 {
417 g_printerr ("%s", _("Recycle bin file has no valid record.\n"));
418 exit_status = R2_ERR_BROKEN_FILE;
419 goto cleanup;
420 }
421
422 /* Print everything */
423 {
424 r2status s = prepare_output_handle ();
425 if (s != EXIT_SUCCESS) {
426 exit_status = s;
427 goto cleanup;
428 }
429 }
430
431 print_header (meta);
432 g_slist_foreach (recordlist, (GFunc) print_record_cb, NULL);
433 print_footer ();
434
435 close_output_handle ();
436
437 /* file descriptor should have been closed at this point */
438 {
439 r2status s = move_temp_file ();
440 if ( s != EXIT_SUCCESS )
441 exit_status = s;
442 }
443
444 cleanup:
445
446 /* Last minute error messages for accumulated non-fatal errors */
447 switch (exit_status)
448 {
449 case R2_ERR_USER_ENCODING:
450 if (legacy_encoding) {
451 g_printerr (_("Some entries could not be interpreted in %s encoding."
452 " The concerned characters are displayed in hex value instead."
453 " Very likely the (localised) Windows generating the recycle bin "
454 "artifact does not use specified codepage."), legacy_encoding);
455 } else {
456 g_printerr (_("Some entries could not be presented as correct "
457 "unicode path. The concerned characters are displayed "
458 "in escaped unicode sequences."));
459 }
460 g_printerr ("\n");
461 break;
462
463 default:
464 break;
465 }
466 g_debug ("Cleaning up...");
467
468 g_slist_free_full (recordlist, (GDestroyNotify) free_record_cb);
469 g_slist_free_full (filelist , (GDestroyNotify) g_free );
470 free_vars ();
471
472 return exit_status;
473 }
474