1 /* vim: set sw=4 ts=4 noexpandtab : */
2 /*
3  * Copyright (C) 2003, by Keith J. Jones.
4  * Copyright (C) 2007-2019 Abel Cheung.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "config.h"
33 
34 #include <errno.h>
35 #include <stdlib.h>
36 
37 #include "utils.h"
38 #ifdef G_OS_WIN32
39 #  include "utils-win.h"
40 #endif
41 
42 #include <glib/gi18n.h>
43 #include <glib/gstdio.h>
44 
45 #include "rifiuti.h"
46 
47 
48 static r2status     exit_status          = EXIT_SUCCESS;
49 static metarecord   meta;
50 extern char        *legacy_encoding;
51 
52 /* 0-25 => A-Z, 26 => '\', 27 or above is erraneous */
53 unsigned char   driveletters[28] =
54 {
55 	'A', 'B', 'C', 'D', 'E', 'F', 'G',
56 	'H', 'I', 'J', 'K', 'L', 'M', 'N',
57 	'O', 'P', 'Q', 'R', 'S', 'T', 'U',
58 	'V', 'W', 'X', 'Y', 'Z', '\\', '?'
59 };
60 
61 /*!
62  * Check if index file has sufficient amount of data for reading
63  * 0 = success, all other return status = error
64  * If success, infile will be set to file pointer and other args
65  * will be filled, otherwise file pointer = NULL
66  */
67 static r2status
validate_index_file(const char * filename,FILE ** infile)68 validate_index_file (const char  *filename,
69                      FILE       **infile)
70 {
71 	void           *buf;
72 	FILE           *fp = NULL;
73 	uint32_t        ver;
74 	int             e, ret;
75 
76 	g_debug ("Start file validation...");
77 
78 	g_return_val_if_fail ( infile != NULL, R2_ERR_INTERNAL );
79 	*infile = NULL;
80 
81 	if ( !(fp = g_fopen (filename, "rb")) )
82 	{
83 		e = errno;
84 		g_printerr (_("Error opening file '%s' for reading: %s"),
85 			filename, g_strerror (e));
86 		g_printerr ("\n");
87 		return R2_ERR_OPEN_FILE;
88 	}
89 
90 	buf = g_malloc (RECORD_START_OFFSET);
91 
92 	if ( 1 > fread (buf, RECORD_START_OFFSET, 1, fp) )
93 	{
94 		/* TRANSLATOR COMMENT: file size must be at least 20 bytes */
95 		g_critical (_("File size less than minimum allowed (%d bytes)"), RECORD_START_OFFSET);
96 		ret = R2_ERR_BROKEN_FILE;
97 		goto validation_broken;
98 	}
99 
100 	copy_field (&ver, VERSION, KEPT_ENTRY);
101 	ver = GUINT32_FROM_LE (ver);
102 
103 	/* total_entry only meaningful for 95 and NT4, on other versions
104 	 * it's junk memory data, don't bother copying */
105 	if ( ( ver == VERSION_NT4 ) || ( ver == VERSION_WIN95 ) ) {
106 		copy_field (&meta.total_entry, TOTAL_ENTRY, RECORD_SIZE);
107 		meta.total_entry = GUINT32_FROM_LE (meta.total_entry);
108 	}
109 
110 	copy_field (&meta.recordsize, RECORD_SIZE, FILESIZE_SUM);
111 	meta.recordsize = GUINT32_FROM_LE (meta.recordsize);
112 
113 	g_free (buf);
114 
115 	/* Turns out version is not reliable indicator. Use size instead */
116 	switch (meta.recordsize)
117 	{
118 	  case LEGACY_RECORD_SIZE:
119 
120 		meta.has_unicode_path = FALSE;
121 
122 		if ( ( ver != VERSION_ME_03 ) &&  /* ME still use 280 byte record */
123 		     ( ver != VERSION_WIN98 ) &&
124 		     ( ver != VERSION_WIN95 ) )
125 		{
126 			g_printerr (_("Unsupported file version, or probably not an INFO2 file at all."));
127 			g_printerr ("\n");
128 			ret = R2_ERR_BROKEN_FILE;
129 			goto validation_broken;
130 		}
131 
132 		if (!legacy_encoding)
133 		{
134 			g_printerr (_("This INFO2 file was produced on a legacy system "
135 			              "without Unicode file name (Windows ME or earlier). "
136 			              "Please specify codepage of concerned system with "
137 			              "'-l' or '--legacy-filename' option."));
138 			g_printerr ("\n\n");
139 			/* TRANSLATOR COMMENT: can choose example from YOUR language & code page */
140 			g_printerr (_("For example, if recycle bin is expected to come from West "
141 			              "European versions of Windows, use '-l CP1252' option; "
142 			              "or in case of Japanese Windows, use '-l CP932'."));
143 			g_printerr ("\n");
144 
145 			ret = R2_ERR_ARG;
146 			goto validation_broken;
147 		}
148 
149 		switch (ver)
150 		{
151 			case VERSION_WIN95: meta.os_guess = OS_GUESS_95; break;
152 			case VERSION_WIN98: meta.os_guess = OS_GUESS_98; break;
153 			case VERSION_ME_03: meta.os_guess = OS_GUESS_ME; break;
154 		}
155 
156 		break;
157 
158 	  case UNICODE_RECORD_SIZE:
159 
160 		meta.has_unicode_path = TRUE;
161 		if ( ( ver != VERSION_ME_03 ) && ( ver != VERSION_NT4 ) )
162 		{
163 			g_printerr (_("Unsupported file version, or probably not an INFO2 file at all."));
164 			g_printerr ("\n");
165 			ret = R2_ERR_BROKEN_FILE;
166 			goto validation_broken;
167 		}
168 		/* guess is not complete yet for latter case, see populate_record_data */
169 		meta.os_guess = (ver == VERSION_NT4) ? OS_GUESS_NT4 : OS_GUESS_2K_03;
170 		break;
171 
172 	  default:
173 		ret = R2_ERR_BROKEN_FILE;
174 		goto validation_broken;
175 	}
176 
177 	rewind (fp);
178 	*infile = fp;
179 	meta.version = (int64_t) ver;
180 
181 	return EXIT_SUCCESS;
182 
183   validation_broken:
184 
185 	fclose (fp);
186 	return ret;
187 }
188 
189 
190 static rbin_struct *
populate_record_data(void * buf)191 populate_record_data (void *buf)
192 {
193 	rbin_struct    *record;
194 	uint64_t        win_filetime;
195 	uint32_t        drivenum;
196 	size_t          read;
197 	char           *legacy_fname;
198 
199 	record = g_malloc0 (sizeof (rbin_struct));
200 
201 	/* Guarantees null-termination by allocating extra byte; same goes with
202 	 * unicode filename */
203 	legacy_fname = g_malloc0 (RECORD_INDEX_OFFSET - LEGACY_FILENAME_OFFSET + 1);
204 	copy_field (legacy_fname, LEGACY_FILENAME, RECORD_INDEX);
205 
206 	/* Index number associated with the record */
207 	copy_field (&record->index_n, RECORD_INDEX, DRIVE_LETTER);
208 	record->index_n = GUINT32_FROM_LE (record->index_n);
209 	g_debug ("index=%u", record->index_n);
210 
211 	/* Number representing drive letter */
212 	copy_field (&drivenum, DRIVE_LETTER, FILETIME);
213 	drivenum = GUINT32_FROM_LE (drivenum);
214 	g_debug ("drive=%u", drivenum);
215 	if (drivenum >= sizeof (driveletters) - 1)
216 		g_warning (_("Invalid drive number (0x%X) for record %u."),
217 		           drivenum, record->index_n);
218 	record->drive = driveletters[MIN (drivenum, sizeof (driveletters) - 1)];
219 
220 	record->emptied = FALSE;
221 	/* first byte will be removed from filename if file is not in recycle bin */
222 	if (!*legacy_fname)
223 	{
224 		record->emptied = TRUE;
225 		*legacy_fname = record->drive;
226 	}
227 
228 	/* File deletion time */
229 	copy_field (&win_filetime, FILETIME, FILESIZE);
230 	win_filetime = GUINT64_FROM_LE (win_filetime);
231 	record->deltime = win_filetime_to_epoch (win_filetime);
232 
233 	/* File size or occupied cluster size */
234 	/* BEWARE! This is 32bit data casted to 64bit struct member */
235 	copy_field (&record->filesize, FILESIZE, UNICODE_FILENAME);
236 	record->filesize = GUINT64_FROM_LE (record->filesize);
237 	g_debug ("filesize=%" G_GUINT64_FORMAT, record->filesize);
238 
239 	/*
240 	 * 1. Only bother populating legacy path if users need it,
241 	 *    because otherwise we don't know which encoding to use
242 	 * 2. Enclose with angle brackets because they are not allowed
243 	 *    in Windows file name, therefore stands out better that
244 	 *    the escaped hex sequences are not part of real file name
245 	 */
246 	if (legacy_encoding)
247 	{
248 		record->legacy_path = conv_path_to_utf8_with_tmpl (
249 			legacy_fname, legacy_encoding, "<\\%02X>", &read, &exit_status);
250 
251 		if (record->legacy_path == NULL) {
252 			g_warning (_("(Record %u) Error converting legacy path to UTF-8."),
253 				record->index_n);
254 			record->legacy_path = "";
255 		}
256 	}
257 
258 	g_free (legacy_fname);
259 
260 	if (! meta.has_unicode_path)
261 		return record;
262 
263 	/*******************************************
264 	 * Part below deals with unicode path only *
265 	 *******************************************/
266 
267 	record->uni_path = conv_path_to_utf8_with_tmpl (
268 		(char *) (buf + UNICODE_FILENAME_OFFSET), NULL,
269 		"<\\u%04X>", &read, &exit_status);
270 
271 	if (record->uni_path == NULL) {
272 		g_warning (_("(Record %u) Error converting unicode path to UTF-8."),
273 			record->index_n);
274 		record->uni_path = "";
275 	}
276 
277 	/*
278 	 * We check for junk memory filling the padding area after
279 	 * unicode path, using it as the indicator of OS generating this
280 	 * INFO2 file. (server 2000 / 2003)
281 	 *
282 	 * The padding area after legacy path is no good; experiment
283 	 * shows that legacy path *always* contain non-zero bytes after
284 	 * null terminator if path contains double-byte character,
285 	 * regardless of OS.
286 	 *
287 	 * Those non-zero bytes resemble partial end of full path.
288 	 * Looks like an ANSI codepage full path is filled in
289 	 * legacy path field, then overwritten in place by a 8.3
290 	 * version of path whenever applicable (which was always shorter).
291 	 */
292 	if (! meta.fill_junk)
293 	{
294 		void *ptr;
295 
296 		for (ptr = buf + UNICODE_FILENAME_OFFSET + read;
297 			ptr < buf + UNICODE_RECORD_SIZE; ptr++)
298 		{
299 			if ( *(char *) ptr != '\0' )
300 			{
301 				g_debug ("Junk detected at offset 0x%tx of unicode path",
302 					ptr - buf - UNICODE_FILENAME_OFFSET);
303 				meta.fill_junk = TRUE;
304 				break;
305 			}
306 		}
307 	}
308 
309 	return record;
310 }
311 
312 
313 static void
parse_record_cb(char * index_file,GSList ** recordlist)314 parse_record_cb (char    *index_file,
315                  GSList **recordlist)
316 {
317 	rbin_struct *record;
318 	FILE        *infile;
319 	size_t       size;
320 	void        *buf = NULL;
321 
322 	exit_status = validate_index_file (index_file, &infile);
323 	if ( exit_status != EXIT_SUCCESS )
324 	{
325 		g_printerr (_("File '%s' fails validation."), index_file);
326 		g_printerr ("\n");
327 		return;
328 	}
329 
330 	g_debug ("Start populating record for '%s'...", index_file);
331 
332 	/*
333 	 * Add padding bytes as null-termination of unicode file name.
334 	 * Normally Windows should have done the null termination within
335 	 * WIN_PATH_MAX limit, but on 98/ME/2000 programmers were sloppy
336 	 * and use junk memory as padding, so just play safe.
337 	 */
338 	buf = g_malloc0 (meta.recordsize + sizeof(gunichar2));
339 
340 	fseek (infile, RECORD_START_OFFSET, SEEK_SET);
341 
342 	meta.is_empty = TRUE;
343 	while (meta.recordsize == (size = fread (buf, 1, meta.recordsize, infile)))
344 	{
345 		record = populate_record_data (buf);
346 		record->meta = &meta;
347 		/* INFO2 already sort entries by time */
348 		*recordlist = g_slist_append (*recordlist, record);
349 		meta.is_empty = FALSE;
350 	}
351 	g_free (buf);
352 
353 	/* do this only when all entries are scanned */
354 	if ( ! meta.is_empty && ( meta.os_guess == OS_GUESS_2K_03 ) )
355 		meta.os_guess = meta.fill_junk ? OS_GUESS_2K : OS_GUESS_XP_03;
356 
357 	if ( ferror (infile) )
358 	{
359 		g_critical (_("Failed to read record at position %li: %s"),
360 				   ftell (infile), strerror (errno));
361 		exit_status = R2_ERR_OPEN_FILE;
362 	}
363 	if ( feof (infile) && size && ( size < meta.recordsize ) )
364 	{
365 		g_warning (_("Premature end of file, last record (%zu bytes) discarded"), size);
366 		exit_status = R2_ERR_BROKEN_FILE;
367 	}
368 
369 	fclose (infile);
370 }
371 
372 int
main(int argc,char ** argv)373 main (int    argc,
374       char **argv)
375 {
376 	GSList             *filelist   = NULL;
377 	GSList             *recordlist = NULL;
378 	GOptionContext     *context;
379 
380 	extern char       **fileargs;
381 
382 	rifiuti_init (argv[0]);
383 
384 	/* TRANSLATOR: appears in help text short summary */
385 	context = g_option_context_new (N_("INFO2"));
386 	g_option_context_set_summary (context, N_(
387 		"Parse INFO2 file and dump recycle bin data."));
388 	rifiuti_setup_opt_ctx (&context, RECYCLE_BIN_TYPE_FILE);
389 	exit_status = rifiuti_parse_opt_ctx (&context, &argc, &argv);
390 	if (exit_status != EXIT_SUCCESS)
391 		goto cleanup;
392 
393 	exit_status = check_file_args (fileargs[0], &filelist, RECYCLE_BIN_TYPE_FILE);
394 	if (exit_status != EXIT_SUCCESS)
395 		goto cleanup;
396 
397 	/* To be overwritten in parse_record_cb() when appropriate */
398 	meta.os_guess = OS_GUESS_UNKNOWN;
399 
400 	/*
401 	 * TODO May be silly for single file, but would be useful in future
402 	 * when reading multiple files from live system
403 	 */
404 	g_slist_foreach (filelist, (GFunc) parse_record_cb, &recordlist);
405 
406 	meta.type     = RECYCLE_BIN_TYPE_FILE;
407 	meta.filename = fileargs[0];
408 	/*
409 	 * Keeping deleted entry is only available since 98
410 	 * Note: always set this variable after parse_record_cb() because
411 	 * meta.version is not set beforehand
412 	 */
413 	meta.keep_deleted_entry = ( meta.version >= VERSION_WIN98 );
414 
415 	if ( !meta.is_empty && (recordlist == NULL) )
416 	{
417 		g_printerr ("%s", _("Recycle bin file has no valid record.\n"));
418 		exit_status = R2_ERR_BROKEN_FILE;
419 		goto cleanup;
420 	}
421 
422 	/* Print everything */
423 	{
424 		r2status s = prepare_output_handle ();
425 		if (s != EXIT_SUCCESS) {
426 			exit_status = s;
427 			goto cleanup;
428 		}
429 	}
430 
431 	print_header (meta);
432 	g_slist_foreach (recordlist, (GFunc) print_record_cb, NULL);
433 	print_footer ();
434 
435 	close_output_handle ();
436 
437 	/* file descriptor should have been closed at this point */
438 	{
439 		r2status s = move_temp_file ();
440 		if ( s != EXIT_SUCCESS )
441 			exit_status = s;
442 	}
443 
444 	cleanup:
445 
446 	/* Last minute error messages for accumulated non-fatal errors */
447 	switch (exit_status)
448 	{
449 		case R2_ERR_USER_ENCODING:
450 		if (legacy_encoding) {
451 			g_printerr (_("Some entries could not be interpreted in %s encoding."
452 				"  The concerned characters are displayed in hex value instead."
453 				"  Very likely the (localised) Windows generating the recycle bin "
454 				"artifact does not use specified codepage."), legacy_encoding);
455 		} else {
456 			g_printerr (_("Some entries could not be presented as correct "
457 				"unicode path.  The concerned characters are displayed "
458 				"in escaped unicode sequences."));
459 		}
460 			g_printerr ("\n");
461 			break;
462 
463 		default:
464 			break;
465 	}
466 	g_debug ("Cleaning up...");
467 
468 	g_slist_free_full (recordlist, (GDestroyNotify) free_record_cb);
469 	g_slist_free_full (filelist  , (GDestroyNotify) g_free        );
470 	free_vars ();
471 
472 	return exit_status;
473 }
474