1 /* file.c - file abstraction layer */
2 
3 /* use 64-bit off_t.
4  * these macros must be defined before any included file */
5 #undef _LARGEFILE64_SOURCE
6 #undef _FILE_OFFSET_BITS
7 #define _LARGEFILE64_SOURCE
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "file.h"
11 #include "common_func.h"
12 #include "parse_cmdline.h"
13 #include "platform.h"
14 #include "win_utils.h"
15 #include <assert.h>
16 #include <errno.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/stat.h>
20 #include <fcntl.h>  /* _O_RDONLY, _O_BINARY, posix_fadvise */
21 
22 #if defined(_WIN32) || defined(__CYGWIN__)
23 # include <windows.h>
24 #if !defined(__CYGWIN__)
25 # include <share.h> /* for _SH_DENYWR */
26 #endif
27 # include <io.h>
28 #endif
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #define IS_ANY_SLASH(c) ((c) == '/' || (c) == '\\')
35 #define IS_ANY_TSLASH(c) ((c) == RSH_T('/') || (c) == RSH_T('\\'))
36 #define IS_DOT_STR(s) ((s)[0] == '.' && (s)[1] == 0)
37 #define IS_DOT_TSTR(s) ((s)[0] == '.' && (s)[1] == 0)
38 
39 #ifdef _WIN32
40 /* auxiliary function */
str_is_ascii(const char * str)41 static int str_is_ascii(const char* str)
42 {
43 	for (; *str; str++)
44 		if ((unsigned char)*str >= 0x80)
45 			return 0;
46 	return 1;
47 }
48 #endif
49 
50 /*=========================================================================
51  * Path functions
52  *=========================================================================*/
53 
54 /**
55  * Return file name without path.
56  *
57  * @param path file path
58  * @return file name
59  */
get_basename(const char * path)60 static const char* get_basename(const char* path)
61 {
62 	const char* p;
63 	if (!path)
64 		return NULL;
65 	for (p = path + strlen(path); p > path && !IS_PATH_SEPARATOR(*(p - 1)); p--);
66 	return p;
67 }
68 
69 /**
70  * Return filepath, obtained by concatinating a directory path and a sub-path.
71  *
72  * @param dir_path (nullable) directory path
73  * @param sub_path the filepath to append to the directory
74  * @param user_path_separator flag, 1 to use user-defined path separator,
75  *        0 to use system path separator
76  * @return concatinated file path
77  */
make_path(const char * dir_path,const char * sub_path,int user_path_separator)78 char* make_path(const char* dir_path, const char* sub_path, int user_path_separator)
79 {
80 	char* buf;
81 	size_t dir_len;
82 	assert(sub_path);
83 	if (sub_path[0] == '.' && IS_ANY_SLASH(sub_path[1]))
84 		sub_path += 2;
85 	if (!dir_path)
86 		return rsh_strdup(sub_path);
87 	/* remove leading path delimiters from sub_path */
88 	for (; IS_ANY_SLASH(*sub_path); sub_path++);
89 	if (dir_path[0] == 0 || IS_DOT_STR(dir_path)) {
90 		/* do not extend sub_path for dir_path="." */
91 		return rsh_strdup(sub_path);
92 	}
93 	/* remove trailing path delimiters from the directory path */
94 	for (dir_len = strlen(dir_path); dir_len > 0 && IS_ANY_SLASH(dir_path[dir_len - 1]); dir_len--);
95 	/* copy directory path */
96 	buf = (char*)rsh_malloc(dir_len + strlen(sub_path) + 2);
97 	memcpy(buf, dir_path, dir_len);
98 	/* insert path separator */
99 	buf[dir_len++] = (user_path_separator && opt.path_separator ? opt.path_separator : SYS_PATH_SEPARATOR);
100 	strcpy(buf + dir_len, sub_path); /* append sub_path */
101 	return buf;
102 }
103 
104 #ifdef _WIN32
105 /**
106  * Return wide-string filepath, obtained by concatinating a directory path and a sub-path.
107  *
108  * @param dir_path (nullable) directory path
109  * @param dir_len length of directory path in characters
110  * @param sub_path the filepath to append to the directory
111  * @return concatinated file path
112  */
make_wpath(ctpath_t dir_path,size_t dir_len,ctpath_t sub_path)113 tpath_t make_wpath(ctpath_t dir_path, size_t dir_len, ctpath_t sub_path)
114 {
115 	wchar_t* result;
116 	size_t len;
117 	if (dir_path == 0 || IS_DOT_TSTR(dir_path))
118 		dir_len = 0;
119 	else {
120 		if (IS_UNC_PREFIX(sub_path))
121 			sub_path += UNC_PREFIX_SIZE;
122 		if (sub_path[0] == L'.' && IS_PATH_SEPARATOR_W(sub_path[1]))
123 			sub_path += 2;
124 		/* remove leading path separators from sub_path */
125 		for (; IS_PATH_SEPARATOR_W(*sub_path); sub_path++);
126 		if (dir_len == (size_t)-1)
127 			dir_len = wcslen(dir_path);
128 	}
129 	len = wcslen(sub_path);
130 	result = (wchar_t*)rsh_malloc((dir_len + len + 2) * sizeof(wchar_t));
131 	if (dir_len > 0) {
132 		memcpy(result, dir_path, dir_len * sizeof(wchar_t));
133 		if (result[dir_len - 1] != L'\\' && sub_path[0]) {
134 			/* append path separator to the directory */
135 			result[dir_len++] = L'\\';
136 		}
137 	}
138 	/* append sub_path */
139 	memcpy(result + dir_len, sub_path, (len + 1) * sizeof(wchar_t));
140 	return result;
141 }
142 
143 /**
144  * Return wide-string filepath, obtained by concatinating a directory path and a sub-path.
145  * Windows UNC path is returned if the resulting path is too long.
146  *
147  * @param dir_path (nullable) directory path
148  * @param sub_path the filepath to append to the directory
149  * @return concatinated file path
150  */
make_wpath_unc(ctpath_t dir_path,wchar_t * sub_path)151 static tpath_t make_wpath_unc(ctpath_t dir_path, wchar_t* sub_path)
152 {
153 	wchar_t* path = make_wpath(dir_path, (size_t)-1, sub_path);
154 	wchar_t* long_path = get_long_path_if_needed(path);
155 	if (!long_path)
156 		return path;
157 	free(path);
158 	return long_path;
159 }
160 #endif /* _WIN32 */
161 
162 /**
163  * Compare paths.
164  *
165  * @param path the first path
166  * @param file the second path
167  * @return 1 if paths a equal, 0 otherwise
168  */
are_paths_equal(ctpath_t path,file_t * file)169 int are_paths_equal(ctpath_t path, file_t* file)
170 {
171 	ctpath_t fpath;
172 	if (!path || !file || !file->real_path) return 0;
173 	fpath = file->real_path;
174 	if (path[0] == RSH_T('.') && IS_ANY_TSLASH(path[1])) path += 2;
175 	if (fpath[0] == RSH_T('.') && IS_ANY_TSLASH(fpath[1])) fpath += 2;
176 
177 	for (; *path; ++path, ++fpath) {
178 		if (*path != *fpath && (!IS_ANY_TSLASH(*path) || !IS_ANY_TSLASH(*fpath))) {
179 			/* paths are different */
180 			return 0;
181 		}
182 	}
183 	/* check if both paths terminated */
184 	return (*path == *fpath);
185 }
186 
187 #ifndef _WIN32
188 /**
189  * Convert a windows file path to a UNIX one, replacing '\\' by '/'.
190  *
191  * @param path the path to convert
192  * @return converted path
193  */
convert_backslashes_to_unix(char * path)194 static void convert_backslashes_to_unix(char* path)
195 {
196 	for (; *path; path++) {
197 		if (*path == '\\')
198 			*path = '/';
199 	}
200 }
201 #endif /* _WIN32 */
202 
203 /**
204  * Check if a path points to a regular file.
205  *
206  * @param path the path to check
207  * @return 1 if file exists an is a regular file, 0 otherwise
208  */
is_regular_file(const char * path)209 int is_regular_file(const char* path)
210 {
211 	int is_regular = 0;
212 	file_t file;
213 	file_init_by_print_path(&file, NULL, path, FileInitReusePath);
214 	if (file_stat(&file, 0) >= 0) {
215 		is_regular = FILE_ISREG(&file);
216 	}
217 	file_cleanup(&file);
218 	return is_regular;
219 }
220 
221 /*=========================================================================
222  * file_t functions
223  *=========================================================================*/
224 
225 enum FileMemoryModeBits {
226 	FileDontFreeRealPath = 0x1000,
227 	FileDontFreePrintPath = 0x2000,
228 	FileDontFreeNativePath = 0x4000,
229 	FileMemoryModeMask = (FileDontFreeRealPath | FileDontFreePrintPath | FileDontFreeNativePath),
230 	FileIsAsciiPrintPath = 0x10000,
231 	FileDontUsePrintPath = 0x20000,
232 	FileDontUseNativePath = 0x40000,
233 	FileConversionMask = (FileIsAsciiPrintPath | FileDontUsePrintPath | FileDontUseNativePath)
234 };
235 
236 /**
237  * Initialize file_t structure, associating it with the given file path.
238  *
239  * @param file the file_t structure to initialize
240  * @param path the file path
241  * @param init_flags initialization flags
242  */
file_init(file_t * file,ctpath_t path,unsigned init_flags)243 int file_init(file_t* file, ctpath_t path, unsigned init_flags)
244 {
245 #ifdef _WIN32
246 	tpath_t long_path = get_long_path_if_needed(path);
247 #endif
248 	memset(file, 0, sizeof(*file));
249 	if (path[0] == RSH_T('.') && IS_ANY_TSLASH(path[1]))
250 		path += 2;
251 	file->real_path = (tpath_t)path;
252 	file->mode = (init_flags & FileMaskModeBits) | FileDontFreeRealPath;
253 	if (((init_flags & FileMaskUpdatePrintPath) && opt.path_separator) IF_WINDOWS( || long_path))
254 	{
255 		/* initialize print_path using the path argument */
256 		if (!file_get_print_path(file, FPathUtf8 | (init_flags & FileMaskUpdatePrintPath)))
257 		{
258 			IF_WINDOWS(free(long_path));
259 			return -1;
260 		}
261 	}
262 #ifdef _WIN32
263 	if (long_path)
264 	{
265 		file->real_path = long_path;
266 		file->mode = init_flags & FileMaskModeBits;
267 	}
268 	else
269 #endif
270 	{
271 		if ((init_flags & FileInitReusePath) == 0)
272 		{
273 			file->mode = init_flags & FileMaskModeBits;
274 			file->real_path = rsh_tstrdup(path);
275 #ifndef _WIN32
276 			if ((init_flags & FileInitUseRealPathAsIs) == 0)
277 				convert_backslashes_to_unix(file->real_path);
278 #endif
279 		}
280 	}
281 	if ((init_flags & (FileInitRunFstat | FileInitRunLstat)) &&
282 			file_stat(file, (init_flags & FileInitRunLstat)) < 0)
283 		return -1;
284 	return 0;
285 }
286 
287 #ifdef _WIN32
288 static int file_statw(file_t* file);
289 
290 /**
291  * Detect path encoding, by trying file_statw() the file in available encodings.
292  * The order of encodings is detected by init_flags bit mask.
293  * On success detection file->real_path is allocated.
294  *
295  * @param file the file to store
296  * @param dir_path (nullable) directory path to prepend to printable path
297  * @param print_path printable path, which encoding shall be detected
298  * @param init_flags bit flags, helping to detect the encoding
299  * @return encoding on success, -1 on fail with error code stored in errno
300  */
detect_path_encoding(file_t * file,wchar_t * dir_path,const char * print_path,unsigned init_flags)301 static int detect_path_encoding(file_t* file, wchar_t* dir_path, const char* print_path, unsigned init_flags)
302 {
303 	static unsigned encoding_flags[4] = { ConvertUtf8ToWcs | ConvertExact, ConvertNativeToWcs | ConvertExact,
304 		ConvertUtf8ToWcs, ConvertNativeToWcs };
305 	wchar_t* last_path = NULL;
306 	unsigned convert_path = (dir_path ? 0 : ConvertPath);
307 	int ascii = str_is_ascii(print_path);
308 	int primary_path_index = ((opt.flags & OPT_UTF8) || (init_flags & FileInitUtf8PrintPath) || ascii ? 0 : 1);
309 	int step = ((init_flags & FileInitUtf8PrintPath) || ascii ? 2 : 1);
310 	int i;
311 	assert(file && !file->real_path);
312 	file->mode &= ~FileMaskStatBits;
313 	if (ascii)
314 		file->mode |= FileIsAsciiPrintPath;
315 	/* detect encoding in two or four steps */
316 	for (i = 0; i < 4; i += step) {
317 		int path_index = i ^ primary_path_index;
318 		wchar_t* path = convert_str_to_wcs(print_path, encoding_flags[path_index] | convert_path);
319 		if (!path) {
320 			if (!last_path)
321 				continue;
322 			file->real_path = last_path;
323 			return primary_path_index;
324 		}
325 		if (dir_path) {
326 			file->real_path = make_wpath_unc(dir_path, path);
327 			free(path);
328 		} else
329 			file->real_path = path;
330 		if (i < 2) {
331 			if (file_statw(file) == 0 || errno == EACCES) {
332 				free(last_path);
333 				return (path_index & 1);
334 			}
335 			if (i == 0) {
336 				if (step == 2)
337 					return primary_path_index;
338 				last_path = file->real_path;
339 				continue;
340 			}
341 			free(file->real_path);
342 			file->real_path = last_path;
343 			if(file->real_path)
344 				return primary_path_index;
345 		} else if (file->real_path) {
346 			return (path_index & 1);
347 		}
348 		assert(last_path == NULL);
349 	}
350 	errno = EILSEQ;
351 	return -1;
352 }
353 #endif
354 
355 /**
356  * Initialize file_t structure from a printable file path.
357  *
358  * @param file the file_t structure to initialize
359  * @param prepend_dir the directory to prepend to the print_path, to construct the file path, can be NULL
360  * @param print_path the printable representation of the file path
361  * @param init_flags initialization flags
362  * @return 0 on success, -1 on fail with error code stored in errno
363  */
file_init_by_print_path(file_t * file,file_t * prepend_dir,const char * print_path,unsigned init_flags)364 int file_init_by_print_path(file_t* file, file_t* prepend_dir, const char* print_path, unsigned init_flags)
365 {
366 	assert(print_path);
367 	assert(!prepend_dir || prepend_dir->real_path);
368 	memset(file, 0, sizeof(file_t));
369 	file->mode = (init_flags & FileMaskModeBits);
370 	if (init_flags & (FileIsStdStream | FileIsData)) {
371 		file->print_path = print_path;
372 		file->mode |= FileDontFreePrintPath | FileIsAsciiPrintPath;
373 		return 0;
374 	}
375 	if (print_path[0] == '.' && IS_PATH_SEPARATOR(print_path[1]))
376 		print_path += 2;
377 #ifdef _WIN32
378 	{
379 		const char** primary_path;
380 		wchar_t* dir_path = (prepend_dir && !IS_DOT_TSTR(prepend_dir->real_path) ? prepend_dir->real_path : NULL);
381 		int encoding = detect_path_encoding(file, dir_path, print_path, init_flags);
382 		if (encoding < 0)
383 			return -1;
384 		if (encoding == 0) {
385 			primary_path = &file->print_path;
386 		} else {
387 			primary_path = &file->native_path;
388 		}
389 		if ((init_flags & (FileInitReusePath | FileMaskUpdatePrintPath)) == FileInitReusePath) {
390 			*primary_path = print_path;
391 			file->mode |= (encoding == 0 ? FileDontFreePrintPath : FileDontFreeNativePath);
392 		} else {
393 			*primary_path = rsh_strdup(print_path);
394 		}
395 	}
396 #else
397 	if (!prepend_dir || IS_DOT_STR(prepend_dir->real_path)) {
398 		file_init(file, print_path, init_flags & (FileInitReusePath | FileMaskModeBits));
399 	} else {
400 		file->real_path = make_path(prepend_dir->real_path, print_path, 0);
401 		file->mode = init_flags & FileMaskModeBits;
402 	}
403 	assert(file->print_path == NULL);
404 	if ((init_flags & (FileInitReusePath | FileMaskUpdatePrintPath)) == FileInitReusePath) {
405 		file->print_path = print_path;
406 		file->mode |= FileDontFreePrintPath;
407 	} else {
408 		file->print_path = rsh_strdup(print_path);
409 	}
410 #endif
411 	/* note: FileMaskUpdatePrintPath flags are used only with file_init() */
412 	assert((init_flags & FileMaskUpdatePrintPath) == 0);
413 	if ((init_flags & (FileInitRunFstat | FileInitRunLstat)) &&
414 			file_stat(file, (init_flags & FileInitRunLstat)) < 0)
415 		return -1;
416 	return 0;
417 }
418 
419 /**
420  * Transform the given file path, according to passed flags.
421  *
422  * @param path the file path to transform
423  * @param flags bitmask containing FPathBaseName, FPathNotNull and FileMaskUpdatePrintPath bit flags
424  * @return transformed path
425  */
handle_rest_of_path_flags(const char * path,unsigned flags)426 static const char* handle_rest_of_path_flags(const char* path, unsigned flags)
427 {
428 	if (path == NULL)
429 		return ((flags & FPathNotNull) ? (errno == EINVAL ? "(null)" : "(encoding error)") : NULL);
430 	if ((flags & FileMaskUpdatePrintPath) != 0 && opt.path_separator) {
431 		char* p = (char*)path - 1 + strlen(path);
432 		for (; p >= path; p--) {
433 			if (IS_ANY_SLASH(*p)) {
434 				*p = opt.path_separator;
435 				if ((flags & FileInitUpdatePrintPathLastSlash) != 0)
436 					break;
437 			}
438 		}
439 	}
440 	return (flags & FPathBaseName ? get_basename(path) : path);
441 }
442 
443 /**
444  * Get the print path of the file in utf8 or in a native encoding.
445  * Transformations specified by flags are applied.
446  * Encoding conversion on Windows can be lossy.
447  *
448  * @param file the file to get the path
449  * @param flags bitmask containing FPathUtf8, FPathNative, FPathBaseName, FPathNotNull
450  *              and FileMaskUpdatePrintPath bit flags
451  * @return transformed print path of the file. If FPathNotNull flag is not specified,
452  *         then NULL is returned on function fail with error code stored in errno.
453  *         If FPathNotNull flag is set, then error code is transformed to returned string.
454  */
file_get_print_path(file_t * file,unsigned flags)455 const char* file_get_print_path(file_t* file, unsigned flags)
456 {
457 #ifdef _WIN32
458 	unsigned convert_to;
459 	unsigned dont_use_bit;
460 	int is_utf8 = (opt.flags & OPT_UTF8 ? !(flags & FPathNative) : flags & FPathUtf8);
461 	const char* secondary_path;
462 	const char** primary_path = (is_utf8 || (file->mode & FileIsAsciiPrintPath) ? &file->print_path : &file->native_path);
463 	if (*primary_path)
464 		return handle_rest_of_path_flags(*primary_path, flags);
465 	if (is_utf8) {
466 		convert_to = ConvertToUtf8;
467 		dont_use_bit = FileDontUsePrintPath;
468 		secondary_path = file->native_path;
469 	} else {
470 		convert_to = ConvertToNative;
471 		dont_use_bit = FileDontUseNativePath;
472 		secondary_path = file->print_path;
473 	}
474 	if (secondary_path) {
475 		if ((file->mode & dont_use_bit) == 0) {
476 			*primary_path = convert_str_encoding(secondary_path, convert_to);
477 			if (!*primary_path)
478 				file->mode |= dont_use_bit;
479 		} else
480 			errno = EILSEQ;
481 		return handle_rest_of_path_flags(*primary_path, flags);
482 	}
483 	if (!file->real_path) {
484 		errno = EINVAL;
485 		return handle_rest_of_path_flags(NULL, flags);
486 	}
487 	*primary_path = convert_wcs_to_str(file->real_path, convert_to | ConvertPath);
488 	if (!*primary_path)
489 		return handle_rest_of_path_flags(NULL, flags);
490 	if (str_is_ascii(*primary_path)) {
491 		file->mode |= FileIsAsciiPrintPath;
492 		if (primary_path != &file->print_path) {
493 			file->print_path = *primary_path;
494 			file->native_path = NULL;
495 			primary_path = &file->print_path;
496 		}
497 	}
498 	return handle_rest_of_path_flags(*primary_path, flags);
499 #else
500 	if (!file->print_path && !file->real_path)
501 		errno = EINVAL;
502 	if (!file->print_path && (flags & FileMaskUpdatePrintPath))
503 		file->print_path = rsh_strdup(file->real_path);
504 	return handle_rest_of_path_flags((file->print_path ? file->print_path : file->real_path), flags);
505 #endif
506 }
507 
508 /**
509  * Free the memory allocated by the fields of the file_t structure.
510  *
511  * @param file the file_t structure to clean
512  */
file_cleanup(file_t * file)513 void file_cleanup(file_t* file)
514 {
515 	if (!(file->mode & FileDontFreeRealPath))
516 		free(file->real_path);
517 	file->real_path = NULL;
518 	if (!(file->mode & FileDontFreePrintPath))
519 		free((char*)file->print_path);
520 	file->print_path = NULL;
521 
522 #ifdef _WIN32
523 	if ((file->mode & FileDontFreeNativePath) == 0)
524 		free((char*)file->native_path);
525 	file->native_path = NULL;
526 #endif /* _WIN32 */
527 
528 	free(file->data);
529 	file->data = NULL;
530 	file->mtime = 0;
531 	file->size = 0;
532 	file->mode = 0;
533 }
534 
535 /**
536  * Clone existing file_t structure to another.
537  *
538  * @param file the file_t structure to clone to
539  * @param orig_file the file to clone
540  */
file_clone(file_t * file,const file_t * orig_file)541 void file_clone(file_t* file, const file_t* orig_file)
542 {
543 	memset(file, 0, sizeof(*file));
544 	file->mode = orig_file->mode & FileMaskModeBits;
545 	if (orig_file->real_path)
546 		file->real_path = rsh_tstrdup(orig_file->real_path);
547 	if (orig_file->print_path)
548 		file->print_path = rsh_strdup(orig_file->print_path);
549 #ifdef _WIN32
550 	if (orig_file->native_path)
551 		file->native_path = rsh_strdup(orig_file->native_path);
552 #endif
553 }
554 
555 /**
556  * Swap members of two file_t structures.
557  *
558  * @param first the first file
559  * @param second the second file
560  */
file_swap(file_t * first,file_t * second)561 void file_swap(file_t* first, file_t* second)
562 {
563 	file_t tmp;
564 	memcpy(&tmp, first, sizeof(file_t));
565 	memcpy(first, second, sizeof(file_t));
566 	memcpy(second, &tmp, sizeof(file_t));
567 }
568 
569 /**
570  * Get a modified file path.
571  *
572  * @param path the file path to modify
573  * @param str the string to insert into/append to the source file path
574  * @param operation the operation determinating how to modify the file path, can be one of the values
575  *                  FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
576  * @return allocated and modified file path on success, NULL on fail
577  */
get_modified_path(const char * path,const char * str,int operation)578 static char* get_modified_path(const char* path, const char* str, int operation)
579 {
580 	size_t start_pos = (size_t)-1;
581 	size_t end_pos = (size_t)-1;
582 	if (!path)
583 		return NULL;
584 	if (operation != FModifyAppendSuffix) {
585 		if (operation == FModifyGetParentDir) {
586 			end_pos = strlen(path);
587 			start_pos = (end_pos > 0 ? end_pos - 1 : 0);
588 			for (; start_pos > 0 && !IS_ANY_SLASH(path[start_pos]); start_pos--);
589 			if (start_pos == 0 && !IS_ANY_SLASH(path[start_pos]))
590 				return rsh_strdup(".");
591 			for (; start_pos > 0 && IS_ANY_SLASH(path[start_pos]); start_pos--);
592 			start_pos++;
593 		} else {
594 			char* point = strrchr(path, '.');
595 			if (!point)
596 				return NULL;
597 			start_pos = point - path;
598 			if (operation == FModifyInsertBeforeExtension)
599 				end_pos = start_pos;
600 		}
601 	}
602 	return str_replace_n(path, start_pos, end_pos, str);
603 }
604 
605 #ifdef _WIN32
606 /**
607  * Get a modified file path.
608  *
609  * @param path the file path to modify
610  * @param str the string to insert into/append to the source file path
611  * @param operation the operation determinating how to modify the file path, can be one of the values
612  *                  FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
613  * @return allocated and modified file path on success, NULL on fail
614  */
get_modified_tpath(ctpath_t path,const char * str,int operation)615 static tpath_t get_modified_tpath(ctpath_t path, const char* str, int operation)
616 {
617 	size_t start_pos = (size_t)-1;
618 	size_t end_pos = (size_t)-1;
619 	if (!path)
620 		return NULL;
621 	if (operation != FModifyAppendSuffix) {
622 		if (operation == FModifyGetParentDir) {
623 			end_pos = wcslen(path);
624 			start_pos = (end_pos > 0 ? end_pos - 1 : 0);
625 			for (; start_pos > 0 && !IS_ANY_TSLASH(path[start_pos]); start_pos--);
626 			if (start_pos == 0 && !IS_ANY_TSLASH(path[start_pos]))
627 				return rsh_wcsdup(L".");
628 			for (; start_pos > 0 && IS_ANY_TSLASH(path[start_pos]); start_pos--);
629 			start_pos++;
630 		} else {
631 			rsh_tchar* point = wcsrchr(path, L'.');
632 			if (!point)
633 				return NULL;
634 			start_pos = point - path;
635 			if (operation == FModifyInsertBeforeExtension)
636 				end_pos = start_pos;
637 		}
638 	}
639 	return wcs_replace_n(path, start_pos, end_pos, str);
640 }
641 #else
642 # define get_modified_tpath get_modified_path
643 #endif
644 
645 /**
646  * Initialize a (destination) file by modifying the path of another (source) file.
647  *
648  * @param dst destination file
649  * @param src source file
650  * @param str the string to insert into/append to the source file path
651  * @param operation the operation to do on src file, can be one of the values
652  *                  FModifyAppendSuffix, FModifyInsertBeforeExtension, FModifyRemoveExtension, FModifyGetParentDir
653  * @return 0 on success, -1 on fail
654  */
file_modify_path(file_t * dst,file_t * src,const char * str,int operation)655 int file_modify_path(file_t* dst, file_t* src, const char* str, int operation)
656 {
657 	if ((src->mode & (FileIsStdStream | FileIsData)) != 0)
658 		return -1;
659 	assert(operation == FModifyRemoveExtension || operation == FModifyGetParentDir || str);
660 	assert(operation == FModifyAppendSuffix || operation == FModifyInsertBeforeExtension || !str);
661 	memcpy(dst, src, sizeof(file_t));
662 	dst->mode &= ~FileMemoryModeMask;
663 	dst->print_path = NULL;
664 	IF_WINDOWS(dst->native_path = NULL);
665 	dst->real_path = get_modified_tpath(src->real_path, str, operation);
666 	if (!dst->real_path)
667 		return -1;
668 	dst->print_path = get_modified_path(src->print_path, str, operation);
669 	IF_WINDOWS(dst->native_path = get_modified_path(src->native_path, str, operation));
670 	return 0;
671 }
672 
673 #ifdef _WIN32
674 /**
675  * Retrieve file information (type, size, mtime) into file_t fields.
676  *
677  * @param file the file information
678  * @return 0 on success, -1 on fail with error code stored in errno
679  */
file_statw(file_t * file)680 static int file_statw(file_t* file)
681 {
682 	WIN32_FILE_ATTRIBUTE_DATA data;
683 
684 	/* read file attributes */
685 	if (GetFileAttributesExW(file->real_path, GetFileExInfoStandard, &data)) {
686 		uint64_t u;
687 		file->size  = (((uint64_t)data.nFileSizeHigh) << 32) + data.nFileSizeLow;
688 		file->mode |= (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ? FileIsDir : FileIsReg);
689 		if ((data.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0)
690 			file->mode |= FileIsLnk;
691 
692 		/* the number of 100-nanosecond intervals since January 1, 1601 */
693 		u = (((uint64_t)data.ftLastWriteTime.dwHighDateTime) << 32) + data.ftLastWriteTime.dwLowDateTime;
694 		/* convert to seconds and subtract the epoch difference */
695 		file->mtime = u / 10000000 - 11644473600LL;
696 		return 0;
697 	}
698 	file->mode |= FileIsInaccessible;
699 	set_errno_from_last_file_error();
700 	return -1;
701 }
702 #endif
703 
704 /**
705  * Retrieve file information (type, size, mtime) into file_t fields.
706  *
707  * @param file the file information
708  * @param fstat_flags bitmask consisting of FileStatModes bits
709  * @return 0 on success, -1 on fail with error code stored in errno
710  */
file_stat(file_t * file,int fstat_flags)711 int file_stat(file_t* file, int fstat_flags)
712 {
713 #ifdef _WIN32
714 	(void)fstat_flags; /* ignore on windows */
715 #else
716 	struct stat st;
717 #endif
718 	file->size  = 0;
719 	file->mtime = 0;
720 	file->mode &= ~FileMaskStatBits;
721 	if (FILE_ISDATA(file) || FILE_ISSTDSTREAM(file))
722 		return 0;
723 	else if (!file->real_path) {
724 		file->mode |= FileIsInaccessible;
725 		errno = EINVAL;
726 		return -1;
727 	}
728 #ifdef _WIN32
729 	return file_statw(file);
730 #else
731 	if (stat(file->real_path, &st)) {
732 		file->mode |= FileIsInaccessible;
733 		return -1;
734 	}
735 	file->size  = st.st_size;
736 	file->mtime = st.st_mtime;
737 
738 	if (S_ISDIR(st.st_mode)) {
739 		file->mode |= FileIsDir;
740 	} else if (S_ISREG(st.st_mode)) {
741 		/* it's a regular file or a symlink pointing to a regular file */
742 		file->mode |= FileIsReg;
743 	}
744 
745 	if ((fstat_flags & FUseLstat) && lstat(file->real_path, &st) == 0) {
746 		if (S_ISLNK(st.st_mode))
747 			file->mode |= FileIsLnk; /* it's a symlink */
748 	}
749 	return 0;
750 #endif
751 }
752 
753 /**
754  * Open the file and return its decriptor.
755  *
756  * @param file the file information, including the path
757  * @param fopen_flags bitmask consisting of FileFOpenModes bits
758  * @return file descriptor on success, NULL on fail with error code stored in errno
759  */
file_fopen(file_t * file,int fopen_flags)760 FILE* file_fopen(file_t* file, int fopen_flags)
761 {
762 	const file_tchar* possible_modes[8] = { 0, RSH_T("r"), RSH_T("w"), RSH_T("r+"),
763 		0, RSH_T("rb"), RSH_T("wb"), RSH_T("r+b") };
764 	const file_tchar* mode = possible_modes[fopen_flags & FOpenMask];
765 	FILE* fd;
766 	assert((fopen_flags & FOpenRW) != 0);
767 	if (!file->real_path) {
768 		errno = EINVAL;
769 		return NULL;
770 	}
771 #ifdef _WIN32
772 	{
773 		fd = _wfsopen(file->real_path, mode, _SH_DENYNO);
774 		if (!fd && errno == EINVAL)
775 			errno = ENOENT;
776 		return fd;
777 	}
778 #else
779 	fd = fopen(file->real_path, mode);
780 # if _POSIX_C_SOURCE >= 200112L && !defined(__STRICT_ANSI__)
781 	if(fd)
782 		posix_fadvise(fileno(fd), 0, 0, POSIX_FADV_SEQUENTIAL);
783 # endif /* _POSIX_C_SOURCE >= 200112L && !defined(__STRICT_ANSI__) */
784 	return fd;
785 #endif
786 }
787 
788 /**
789  * Rename or move the file. The source and destination paths should be on the same device.
790  *
791  * @param from the source file
792  * @param to the destination path
793  * @return 0 on success, -1 on fail with error code stored in errno
794  */
file_rename(const file_t * from,const file_t * to)795 int file_rename(const file_t* from, const file_t* to)
796 {
797 #ifdef _WIN32
798 	if (!from->real_path || !to->real_path) {
799 		errno = EINVAL;
800 		return -1;
801 	}
802 	/* Windows: file must be removed before overwriting it */
803 	_wunlink(to->real_path);
804 	return _wrename(from->real_path, to->real_path);
805 #else
806 	return rename(from->real_path, to->real_path);
807 #endif
808 }
809 
810 /**
811  * Rename a given file to *.bak, if it exists.
812  *
813  * @param file the file to move
814  * @return 0 on success, -1 on fail with error code stored in errno
815  */
file_move_to_bak(file_t * file)816 int file_move_to_bak(file_t* file)
817 {
818 	if (file_stat(file, 0) >= 0) {
819 		int res;
820 		int save_errno;
821 		file_t bak_file;
822 		file_modify_path(&bak_file, file, ".bak", FModifyAppendSuffix);
823 		res = file_rename(file, &bak_file);
824 		save_errno = errno;
825 		file_cleanup(&bak_file);
826 		if (res < 0)
827 			errno = save_errno;
828 		return res;
829 	}
830 	return -1;
831 }
832 
833 #ifdef _WIN32
834 /**
835  * Check if the specified path points to a readable file.
836  *
837  * @param real_path file path
838  * @param is_readable pointer to the result, it is set to 1, if the file is readable, to 0 otherwise
839  * @return 1 if the file with such path exists, 0 otherwise
840  */
real_path_is_readable(wchar_t * real_path,int * is_readable)841 static int real_path_is_readable(wchar_t* real_path, int* is_readable)
842 {
843 	/* note: using _wsopen, since _waccess doesn't check permissions */
844 	int fd = _wsopen(real_path, _O_RDONLY | _O_BINARY, _SH_DENYNO);
845 	*is_readable = (fd >= 0);
846 	if (fd >= 0) {
847 		_close(fd);
848 		return 1;
849 	}
850 	return (errno == EACCES);
851 }
852 #endif
853 
854 /**
855  * Check if the given file can't be opened for reading.
856  *
857  * @param file the file
858  * @return 1 if the file can be opened for reading, 0 otherwise
859  */
file_is_readable(file_t * file)860 int file_is_readable(file_t* file)
861 {
862 #ifdef _WIN32
863 	if (file->real_path) {
864 		int is_readable;
865 		(void)real_path_is_readable(file->real_path, &is_readable);
866 		return is_readable;
867 	}
868 	return 0;
869 #else
870 	return (access(file->real_path, R_OK) == 0);
871 #endif
872 }
873 
874 
875 /*=========================================================================
876  * file-list functions
877  *=========================================================================*/
878 
879 /**
880  * Open a file, containing a list of file paths, to iterate over those paths
881  * using the file_list_read() function.
882  *
883  * @param list the file_list_t structure to initialize
884  * @param file the file to open
885  * @return 0 on success, -1 on fail with error code stored in errno
886  */
file_list_open(file_list_t * list,file_t * file)887 int file_list_open(file_list_t* list, file_t* file)
888 {
889 	memset(list, 0, sizeof(file_list_t));
890 	if (FILE_ISSTDIN(file)) {
891 		list->fd = stdin;
892 		return 0;
893 	}
894 	list->fd = file_fopen(file, FOpenRead | FOpenBin);
895 	return (list->fd ? 0 : -1);
896 }
897 
898 /**
899  * Close file_list_t and free allocated memory.
900  */
file_list_close(file_list_t * list)901 void file_list_close(file_list_t* list)
902 {
903 	if (list->fd) {
904 		fclose(list->fd);
905 		list->fd = 0;
906 	}
907 	file_cleanup(&list->current_file);
908 }
909 
910 enum FileListStateBits {
911 	NotFirstLine = 1,
912 	FileListHasBom = FileInitUtf8PrintPath
913 };
914 
915 /**
916  * Iterate over file list.
917  *
918  * @param list the file list to iterate over
919  * @return 1 if the next file has been obtained, 0 on EOF or error
920  */
file_list_read(file_list_t * list)921 int file_list_read(file_list_t* list)
922 {
923 	char buf[2048];
924 	file_cleanup(&list->current_file);
925 	while(fgets(buf, 2048, list->fd)) {
926 		char* p;
927 		char* line = buf;
928 		char* buf_back = buf + sizeof(buf) - 1;
929 		/* detect and skip BOM */
930 		if (STARTS_WITH_UTF8_BOM(buf)) {
931 			line += 3;
932 			if (!(list->state & NotFirstLine))
933 				list->state |= FileListHasBom;
934 		}
935 		list->state |= NotFirstLine;
936 		for (p = line; p < buf_back && *p && *p != '\r' && *p != '\n'; p++);
937 		*p = 0;
938 		if (*line == '\0')
939 			continue; /* skip empty lines */
940 		file_init_by_print_path(&list->current_file, NULL, line,
941 			(list->state & FileInitUtf8PrintPath) | FileInitRunFstat);
942 		return 1;
943 	}
944 	return 0;
945 }
946 
947 /****************************************************************************
948  *                           Directory functions                            *
949  ****************************************************************************/
950 #ifdef _WIN32
951 struct WIN_DIR_t
952 {
953 	WIN32_FIND_DATAW findFileData;
954 	HANDLE hFind;
955 	struct win_dirent dir;
956 	int state; /* 0 - not started, -1 - ended, >=0 file index */
957 };
958 
959 /**
960  * Open directory iterator for reading the directory content.
961  *
962  * @param dir_path directory path
963  * @return pointer to directory stream, NULL on fail with error code stored in errno
964  */
win_opendir(const char * dir_path)965 WIN_DIR* win_opendir(const char* dir_path)
966 {
967 	WIN_DIR* d;
968 	wchar_t* real_path;
969 
970 	/* append '\*' to the dir_path */
971 	size_t len = strlen(dir_path);
972 	char* path = (char*)malloc(len + 3);
973 	if (!path) return NULL; /* failed, malloc also set errno = ENOMEM */
974 	strcpy(path, dir_path);
975 	strcpy(path + len, "\\*");
976 
977 	d = (WIN_DIR*)malloc(sizeof(WIN_DIR));
978 	if (!d) {
979 		free(path);
980 		return NULL;
981 	}
982 	memset(d, 0, sizeof(WIN_DIR));
983 
984 	real_path = convert_str_to_wcs(path, (ConvertToPrimaryEncoding | ConvertExact | ConvertPath));
985 	d->hFind = (real_path != NULL ?
986 		FindFirstFileW(real_path, &d->findFileData) : INVALID_HANDLE_VALUE);
987 	free(real_path);
988 
989 	if (d->hFind == INVALID_HANDLE_VALUE && GetLastError() != ERROR_ACCESS_DENIED) {
990 		/* try the secondary codepage */
991 		real_path = convert_str_to_wcs(path, (ConvertToSecondaryEncoding | ConvertExact | ConvertPath));
992 		if (real_path) {
993 			d->hFind = FindFirstFileW(real_path, &d->findFileData);
994 			free(real_path);
995 		}
996 	}
997 	free(path);
998 
999 	if (d->hFind == INVALID_HANDLE_VALUE && GetLastError() == ERROR_ACCESS_DENIED) {
1000 		free(d);
1001 		errno = EACCES;
1002 		return NULL;
1003 	}
1004 	set_errno_from_last_file_error();
1005 
1006 	d->state = (d->hFind == INVALID_HANDLE_VALUE ? -1 : 0);
1007 	d->dir.d_name = NULL;
1008 	return d;
1009 }
1010 
1011 /**
1012  * Open a directory for reading its content.
1013  * For simplicity the function supposes that dir_path points to an
1014  * existing directory and doesn't check for this error.
1015  * The Unicode version of the function.
1016  *
1017  * @param dir_path directory path
1018  * @return pointer to directory iterator
1019  */
win_wopendir(const wchar_t * dir_path)1020 WIN_DIR* win_wopendir(const wchar_t* dir_path)
1021 {
1022 	WIN_DIR* d;
1023 
1024 	/* append '\*' to the dir_path */
1025 	wchar_t* real_path = make_wpath_unc(dir_path, L"*");
1026 	d = (WIN_DIR*)rsh_malloc(sizeof(WIN_DIR));
1027 
1028 	d->hFind = FindFirstFileW(real_path, &d->findFileData);
1029 	free(real_path);
1030 	if (d->hFind == INVALID_HANDLE_VALUE && GetLastError() == ERROR_ACCESS_DENIED) {
1031 		free(d);
1032 		errno = EACCES;
1033 		return NULL;
1034 	}
1035 
1036 	/* note: we suppose if INVALID_HANDLE_VALUE was returned, then the file listing is empty */
1037 	d->state = (d->hFind == INVALID_HANDLE_VALUE ? -1 : 0);
1038 	d->dir.d_name = NULL;
1039 	return d;
1040 }
1041 
1042 /**
1043  * Close a directory iterator.
1044  *
1045  * @param d pointer to the directory iterator
1046  */
win_closedir(WIN_DIR * d)1047 void win_closedir(WIN_DIR* d)
1048 {
1049 	if (d->hFind != INVALID_HANDLE_VALUE) {
1050 		FindClose(d->hFind);
1051 	}
1052 	free(d->dir.d_name);
1053 	free(d);
1054 }
1055 
1056 /**
1057  * Read a directory content.
1058  *
1059  * @param d pointer to the directory iterator
1060  * @return directory entry or NULL if no entries left
1061  */
win_readdir(WIN_DIR * d)1062 struct win_dirent* win_readdir(WIN_DIR* d)
1063 {
1064 	char* filename;
1065 
1066 	if (d->state == -1) return NULL;
1067 	if (d->dir.d_name != NULL) {
1068 		free(d->dir.d_name);
1069 		d->dir.d_name = NULL;
1070 	}
1071 
1072 	for (;;) {
1073 		if (d->state > 0) {
1074 			if ( !FindNextFileW(d->hFind, &d->findFileData) ) {
1075 				/* the directory listing has ended */
1076 				d->state = -1;
1077 				return NULL;
1078 			}
1079 		}
1080 		d->state++;
1081 
1082 		if (d->findFileData.cFileName[0] == L'.' && (d->findFileData.cFileName[1] == 0 ||
1083 				(d->findFileData.cFileName[1] == L'.' && d->findFileData.cFileName[2] == 0)))
1084 			continue; /* simplified implementation, skips '.' and '..' names */
1085 
1086 		d->dir.d_name = filename = convert_wcs_to_str(d->findFileData.cFileName, (ConvertToPrimaryEncoding | ConvertExact));
1087 		if (filename) {
1088 			d->dir.d_wname = d->findFileData.cFileName;
1089 			d->dir.d_isdir = (0 != (d->findFileData.dwFileAttributes &
1090 				FILE_ATTRIBUTE_DIRECTORY));
1091 			return &d->dir;
1092 		}
1093 		/* quietly skip the file and repeat the search, if filename conversion failed */
1094 	}
1095 }
1096 #endif /* _WIN32 */
1097 
1098 #ifdef __cplusplus
1099 } /* extern "C" */
1100 #endif /* __cplusplus */
1101 
1102