1 /*
2  * Copyright 2011 kubtek <kubtek@mail.com>
3  *
4  * This file is part of StarDict.
5  *
6  * StarDict is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * StarDict is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23 
24 #include <cstring>
25 #include <cstdlib>
26 #include <vector>
27 #include <sstream>
28 #include <glib.h>
29 #include <glib/gstdio.h>
30 #include <unistd.h>
31 #include "libcommon.h"
32 #ifdef _WIN32
33 #  include <Shlwapi.h>
34 #endif
35 
36 const char* known_resource_types[] = {
37 	"img",
38 	"snd",
39 	"vdo",
40 	"att",
41 	NULL
42 };
43 
stardict_strcmp(const gchar * s1,const gchar * s2)44 gint stardict_strcmp(const gchar *s1, const gchar *s2)
45 {
46 	gint a;
47 	a = g_ascii_strcasecmp(s1, s2);
48 	if (a == 0)
49 		return strcmp(s1, s2);
50 	else
51 		return a;
52 }
53 
file_name_to_utf8(const std::string & str,std::string & out)54 bool file_name_to_utf8(const std::string& str, std::string& out)
55 {
56 	size_t len = str.length();
57 	gsize bytes_read, bytes_written;
58 	glib::CharStr gstr(g_filename_to_utf8(str.c_str(), len, &bytes_read,
59 		&bytes_written, NULL));
60 	if(!gstr || bytes_read != len) {
61 		g_error("Unable to convert string %s into utf-8 encoding", str.c_str());
62 		return false;
63 	}
64 	out = get_impl(gstr);
65 	return true;
66 }
67 
utf8_to_file_name(const std::string & str,std::string & out)68 bool utf8_to_file_name(const std::string& str, std::string& out)
69 {
70 	size_t len = str.length();
71 	gsize bytes_read, bytes_written;
72 	glib::CharStr gstr(g_filename_from_utf8(str.c_str(), len, &bytes_read,
73 		&bytes_written, NULL));
74 	if(!gstr || bytes_read != len) {
75 		g_error("Unable to convert utf8 string %s into file name encoding", str.c_str());
76 		return false;
77 	}
78 	out = get_impl(gstr);
79 	return true;
80 }
81 
82 #ifdef _WIN32
utf8_to_windows(const std::string & str_utf8,std_win_string & out)83 bool utf8_to_windows(const std::string& str_utf8, std_win_string& out)
84 {
85 #ifdef UNICODE
86 	const int buf_size = MultiByteToWideChar(
87 		CP_UTF8, //__in   UINT CodePage,
88 		0, //__in   DWORD dwFlags,
89 		str_utf8.c_str(), //__in   LPCSTR lpMultiByteStr,
90 		-1, //__in   int cbMultiByte,
91 		NULL, // __out  LPWSTR lpWideCharStr,
92 		0 //__in   int cchWideChar
93 	);
94 	if(buf_size == 0) {
95 		g_warning("Unable to convert from utf-8 to windows encoding. String: %s",
96 			str_utf8.c_str());
97 		return false;
98 	}
99 	std::vector<TCHAR> buf(buf_size);
100 	const int char_num = MultiByteToWideChar(
101 		CP_UTF8, //__in   UINT CodePage,
102 		0, //__in   DWORD dwFlags,
103 		str_utf8.c_str(), //__in   LPCSTR lpMultiByteStr,
104 		-1, //__in   int cbMultiByte,
105 		&buf[0], // __out  LPWSTR lpWideCharStr,
106 		buf_size //__in   int cchWideChar
107 	);
108 	if(char_num != buf_size) {
109 		g_warning("Unable to convert from utf-8 to windows encoding. String: %s",
110 			str_utf8.c_str());
111 		return false;
112 	}
113 	out = &buf[0];
114 	return true;
115 #else
116 	glib::Error err;
117 	gchar* tmp = g_locale_from_utf8(str_utf8.c_str(), -1, NULL, NULL, get_addr(err));
118 	if(!tmp) {
119 		g_warning("Unable to convert from utf-8 to windows encoding: %s", err->message);
120 		return false;
121 	}
122 	out = tmp;
123 	g_free(tmp);
124 	return true;
125 #endif
126 }
127 
windows_to_utf8(const std_win_string & str,std::string & out_utf8)128 bool windows_to_utf8(const std_win_string& str, std::string& out_utf8)
129 {
130 #ifdef UNICODE
131 	const int buf_size = WideCharToMultiByte(
132 		CP_UTF8, // __in   UINT CodePage,
133 		0, // __in   DWORD dwFlags,
134 		str.c_str(), // __in   LPCWSTR lpWideCharStr,
135 		-1, // __in   int cchWideChar,
136 		NULL, // __out  LPSTR lpMultiByteStr,
137 		0, // __in   int cbMultiByte,
138 		NULL, // __in   LPCSTR lpDefaultChar,
139 		NULL //__out  LPBOOL lpUsedDefaultChar
140 	);
141 	if(buf_size == 0) {
142 		g_warning("Unable to convert from windows encoding to utf-8.");
143 		return false;
144 	}
145 	std::vector<char> buf(buf_size);
146 	const int char_num = WideCharToMultiByte(
147 		CP_UTF8, // __in   UINT CodePage,
148 		0, // __in   DWORD dwFlags,
149 		str.c_str(), // __in   LPCWSTR lpWideCharStr,
150 		-1, // __in   int cchWideChar,
151 		&buf[0], // __out  LPSTR lpMultiByteStr,
152 		buf_size, // __in   int cbMultiByte,
153 		NULL, // __in   LPCSTR lpDefaultChar,
154 		NULL //__out  LPBOOL lpUsedDefaultChar
155 	);
156 	if(char_num != buf_size) {
157 		g_warning("Unable to convert from windows encoding to utf-8.");
158 		return false;
159 	}
160 	out_utf8 = &buf[0];
161 	return true;
162 #else
163 	glib::Error err;
164 	gchar* tmp = g_locale_to_utf8(str.c_str(), -1, NULL, NULL, get_addr(err));
165 	if(!tmp) {
166 		g_warning("Unable to convert from windows encoding to utf-8: %s", err->message);
167 		return false;
168 	}
169 	out_utf8 = tmp;
170 	g_free(tmp);
171 	return true;
172 #endif
173 }
174 
175 /* Returns a pointer to the first char after the root component.
176 If str is like "c:\path\...", root_end points after the "c:\".
177 If str is like "\\server\path\...", root_end points after the "\\server\".
178 If str is like "\\server", root_end points after the "\\server".
179 If str is like "\dir\dir", root_end points after the "\".
180 Otherwise the str is considered to have no root element and root_end points
181 to the beginning of the string.
182 The function returns NULL if the path is invalid.
183 T is either "char" or "const char". */
184 template<class T>
path_root_end_win(T * str)185 T* path_root_end_win(T* str)
186 {
187 	if(!str)
188 		return NULL;
189 	if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\')
190 		return str + 3;
191 	else if(str[0] == '\\' && str[1] == '\\') {
192 		if(str[2] == '\0') // "\\" - invalid path
193 			return NULL;
194 		char* p = strchr(str+2, '\\');
195 		if(p) {
196 			if(p == str+2) // "\\\..." - empty server - invalid path
197 				return NULL;
198 			return p + 1;
199 		} else { // str is "\\server"
200 			return strchr(str, '\0');
201 		}
202 	} else if(str[0] == '\\' && str[1] != '\\') {
203 		return str + 1;
204 	}
205 	return str;
206 }
207 
208 /* The same as path_root_end_win but for wide chars */
209 template<class T>
path_root_end_winW(T * str)210 T* path_root_end_winW(T* str)
211 {
212 	if(!str)
213 		return NULL;
214 	if(is_ascii_alpha(str[0]) && str[1] == L':' && str[2] == L'\\')
215 		return str + 3;
216 	else if(str[0] == L'\\' && str[1] == L'\\') {
217 		if(str[2] == L'\0') // "\\" - invalid path
218 			return NULL;
219 		T* p = StrChr(str+2, L'\\');
220 		if(p) {
221 			if(p == str+2) // "\\\..." - empty server - invalid path
222 				return NULL;
223 			return p + 1;
224 		} else { // str is "\\server"
225 			return StrChr(str, L'\0');
226 		}
227 	} else if(str[0] == L'\\' && str[1] != L'\\') {
228 		return str + 1;
229 	}
230 	return str;
231 }
232 
233 /* normalize path - resolve relative components in a path.
234 For example, path "c:\dir1\dir2\..\file" is converted to "c:\dir1\file".
235 This function accepts the following paths:
236 - an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ...
237 	("c:" is not allowed)
238 - an absolute path without disk: "\dir\file", ...
239 - UNC name: "\\server", "\\server\dir", ...
240 - relative path: "dir", "dir\dir\file", ...
241 ##- a relative path starting with the current directory component ".": ".\", ".\dir", ...
242 
243 A reference to the parent of the root directory is considered an error.
244 For example, these paths are considered invalid: "c:\..\dir1\file",
245 "\\..\path\file", "\..\dir".
246 If the path is relative, this function may leave references to the parent directory
247 if they cannot be resolved in the path given.
248 For example, "dir\..\..\..\dir2\dir3" is converted to "..\..\dir2\dir3".
249 Strip "." components.
250 
251 If after all transformations we get an empty string,
252 replace it with the current directory reference, that is '.'.
253 Empty string is not a valid path.
254 For example, we get an empty path for "abcd\.." and "abcd\..\".
255 If the original path is not blank and it ends on backslash,
256 append backslash to the '.'. That is:
257 "abcd\.." -> "."
258 "abcd\..\' -> ".\"
259 
260 Return value: EXIT_FAILURE or EXIT_SUCCESS. */
norm_path_win(const std::string & path,std::string & result)261 int norm_path_win(const std::string& path, std::string& result)
262 {
263 	result.clear();
264 	/* std::vector will free the allocated memory block
265 	when this function returns.
266 	+ 3 - make sure that the buffer contains at least 3 chars,
267 	that prevents buffer overread in the some checks.
268 	+ 1 - terminating '\0' */
269 	std::vector<char> buf(path.length() + 3 + 1);
270 	char* str = &buf[0];
271 	// end of string  - terminating '\0'
272 	char* str_end = g_stpcpy(str, path.c_str());
273 	char* root_end = path_root_end_win(str);
274 	if(!root_end)
275 		return EXIT_FAILURE;
276 	/*
277 	if(root_end == str && str[0] == '.' && (str[1] == '\\' || str[1] == '\0')) {
278 		if(str[1] == '\0')
279 			str += 1;
280 		else
281 			str += 2;
282 		root_end = str;
283 	}
284 	*/
285 	// if(str == root_end) - relative path
286 	/*p1 and p2 points to the first char of a path component,
287 	the previous char is normally '\\'.
288 	In each step p2 moves to the next path component.
289 	p1 normally moves forward as well, unless a parent directory reference
290 	is encontered, then p1 moves back. */
291 	char * p1 = root_end;
292 	char * p2 = root_end;
293 	while(p2 < str_end) {
294 		char *p = strchr(p2, '\\');
295 		if(!p)
296 			p = str_end;
297 		// [p2, p) - path component
298 		if(p == p2) // empty path component - error
299 			return EXIT_FAILURE;
300 		if(p2[0] == '.' && p2[1] == '.' && p2 + 2 == p) { // parent directory
301 			if(p1 == root_end) { // no component to strip
302 				if(str == root_end) { // relative path
303 					if(p1 != p2) {
304 						p1[0] = '.';
305 						p1[1] = '.';
306 						p1[2] = *p;
307 					}
308 					size_t len = p + 1 - p2;
309 					p1 += len;
310 					p2 += len;
311 				} else { // absolute path
312 					return EXIT_FAILURE; // error
313 				}
314 			} else { // search a component to strip
315 				char *p3 = strrchr_len(root_end, p1 - 1 - root_end, '\\');
316 				if(!p3)
317 					p3 = root_end;
318 				else
319 					++p3;
320 				// p3 - beginning of the privious to p1 path component
321 				if(p3[0] == '.' && p3[1] == '.' && p3[2] == '\\') {
322 					g_assert(str == root_end);
323 					// the previous component is "..", it cannot be stripped
324 					if(p1 != p2) {
325 						p1[0] = '.';
326 						p1[1] = '.';
327 						p1[2] = *p;
328 					}
329 					size_t len = p + 1 - p2;
330 					p1 += len;
331 					p2 += len;
332 				} else {
333 					p1 = p3;
334 					p2 = p + 1;
335 				}
336 			}
337 		} else if(p2[0] == '.' && p2 + 1 == p) { // strip "." component
338 			p2 = p + 1;
339 		} else { // normal directory
340 			if(p1 == p2) {
341 				p1 = p2 = p + 1;
342 			} else {
343 				size_t len = p + 1 - p2;
344 				strncpy(p1, p2, len);
345 				p1 += len;
346 				p2 += len;
347 			}
348 		}
349 	}
350 	/* p1[-1] == '\0' if the last char of the path is not '\\' */
351 	*p1 = '\0';
352 	if(str[0] == '\0') { // blank path
353 		str[0] = '.';
354 		if(!path.empty() && path[path.length()-1] == '\\') {
355 			str[1] = '\\';
356 			str[2] = '\0';
357 		} else
358 			str[1] = '\0';
359 	}
360 	result = str;
361 	return EXIT_SUCCESS;
362 }
363 
364 /* returns true if the path is absolute and false otherwise,
365 This function does not check that the path is valid
366 The following paths are accepted:
367 - an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ...
368 	("c:" is not allowed)
369 - an "absolute" path without disk: "\dir\file", ... - this path is considered relative!
370 - UNC name: "\\server", "\\server\dir", ...
371 -*/
is_absolute_path_win(const std::string & path)372 bool is_absolute_path_win(const std::string& path)
373 {
374 	const char* str = path.c_str();
375 	if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\')
376 		return true;
377 	if(str[0] == '\\' && str[1] == '\\')
378 		return true;
379 	return false;
380 }
381 
382 /* applies a number of tests to the path
383 Returns true if all tests passed and false otherwise. */
is_valid_path_win(const std::string & path)384 bool is_valid_path_win(const std::string& path)
385 {
386 	const char* str = path.c_str();
387 	/* End of the path prefix.
388 	if "c:\abcd" then after "c:\"
389 	if "\\abcd" then after "\\"
390 	if "\abd" the after "\"
391 	otherwise this the first char of the string. */
392 	const char* prefix_end = str;
393 	if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\')
394 		prefix_end = str + 3;
395 	else if(str[0] == '\\' && str[1] == '\\')
396 		prefix_end = str + 2;
397 	else if(str[0] == '\\')
398 		prefix_end = str + 1;
399 	if(prefix_end[0] == '\\')
400 		return false;
401 	if(strstr(prefix_end, "\\\\"))
402 		return false;
403 	if(strlen(prefix_end) != strcspn(prefix_end, "<>:\"/|?*"))
404 		return false;
405 	for(const char* p = prefix_end; *p; ++p)
406 		if((unsigned char)*p < 32)
407 			return false;
408 	return true;
409 }
410 
411 /* create a relative path from directory base_dir to file or dir path
412 base_dir and path must have a common prefix, for example,
413 "c:\dir1\dir2" and "c:\dir1\dir3\dir4" -> "..\dir3\dir4"
414 Return value: EXIT_FAILURE or EXIT_SUCCESS.
415 
416 PathRelativePathTo fuction gives strange results:
417 "c:\\dir", "c:\\dir", "..\\dir",
418 "c:\\dir\\", "c:\\dir", "..\\dir"
419 "c:\\dir\\", "c:\\dir\\", "",
420 "\\", "\\a\\", - fails!
421 
422 That is why I've decided to provide a custom implementation.
423 base_dir and path must be absolute paths!
424 */
425 #if 0
426 int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path)
427 {
428 	rel_path.clear();
429 	std_win_string base_dir_win;
430 	std_win_string path_win;
431 	if(!utf8_to_windows(base_dir, base_dir_win))
432 		return EXIT_FAILURE;
433 	if(!utf8_to_windows(path, path_win))
434 		return EXIT_FAILURE;
435 	if(base_dir_win.length() >= MAX_PATH)
436 		return EXIT_FAILURE;
437 	if(path_win.length() >= MAX_PATH)
438 		return EXIT_FAILURE;
439 	/* The output buffer must be at least MAX_PATH chars.
440 	How much space do we actually need? */
441 	wchar_t buf[MAX_PATH * 10];
442 	bool is_file = !path.empty() && path[path.length()-1] != '\\';
443 	if(!PathRelativePathToW(buf, base_dir_win.c_str(), FILE_ATTRIBUTE_DIRECTORY,
444 		path_win.c_str(), is_file ? 0 : FILE_ATTRIBUTE_DIRECTORY))
445 		return EXIT_FAILURE;
446 	wchar_t * buf2 = buf;
447 	if(buf[0] == L'.' && buf[1] == L'\\')
448 		buf2 = buf + 2;
449 	else if(buf[0] == L'.' && buf[1] == L'\0')
450 		buf2 = buf + 1;
451 	if(!windows_to_utf8(buf2, rel_path))
452 		return EXIT_FAILURE;
453 	return EXIT_SUCCESS;
454 }
455 #endif
456 
build_relative_path(const std::string & base_dir,const std::string & path,std::string & rel_path)457 int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path)
458 {
459 	rel_path.clear();
460 	std_win_string base_dir_win;
461 	std_win_string path_win;
462 	if(!utf8_to_windows(base_dir, base_dir_win))
463 		return EXIT_FAILURE;
464 	if(!utf8_to_windows(path, path_win))
465 		return EXIT_FAILURE;
466 	if(base_dir_win.empty())
467 		return EXIT_FAILURE;
468 	if(path_win.empty())
469 		return EXIT_FAILURE;
470 	/* Make sure that both paths end with a backslash, that simplifies further processing.
471 	base_dir must be a directory, so adding a backslash won't hurt.
472 	path may be either a file or a directory */
473 	if(base_dir_win[base_dir_win.length()-1] != L'\\')
474 		base_dir_win += L'\\';
475 	if(path_win[path_win.length()-1] != L'\\')
476 		path_win += L'\\';
477 	const wchar_t* c_base_dir_win = base_dir_win.c_str();
478 	const wchar_t* c_path_win = path_win.c_str();
479 	const wchar_t* base_dir_win_root_end = path_root_end_winW(c_base_dir_win);
480 	const wchar_t* path_win_root_end = path_root_end_winW(c_path_win);
481 	if(!base_dir_win_root_end || base_dir_win_root_end == c_base_dir_win)
482 		return EXIT_FAILURE;
483 	if(!path_win_root_end || path_win_root_end == c_path_win)
484 		return EXIT_FAILURE;
485 	if(base_dir_win_root_end - c_base_dir_win != path_win_root_end - c_path_win)
486 		return EXIT_FAILURE; // different roots
487 	if(StrCmpNI(c_base_dir_win, c_path_win, base_dir_win_root_end - c_base_dir_win))
488 		return EXIT_FAILURE; // different roots
489 	/* p and q points to the end of the common part in base_dir_win and path_win respectively. */
490 	const wchar_t* p = base_dir_win_root_end;
491 	const wchar_t* q = path_win_root_end;
492 	while(true)
493 	{
494 		const wchar_t* p2 = StrChr(p, L'\\');
495 		const wchar_t* q2 = StrChr(q, L'\\');
496 		if(!p2 || !q2)
497 			break;
498 		p2++;
499 		q2++;
500 		if(p2 - p != q2 - q)
501 			break;
502 		if(StrCmpNI(p, q, p2-p))
503 			break;
504 		p = p2;
505 		q = q2;
506 	}
507 	// found the longest common part
508 	/* calculate how many directories to strip from the base_dir
509 	== number of backslashes after p */
510 	int parent_cnt = 0;
511 	for(const wchar_t* r = StrChr(p, L'\\'); r; r = StrChr(r+1, L'\\'))
512 		++parent_cnt;
513 	std_win_string rel_path_win;
514 	rel_path_win.reserve(3 * parent_cnt + wcslen(q));
515 	for(int i=0; i<parent_cnt; ++i)
516 		rel_path_win.append(L"..\\");
517 	rel_path_win.append(q);
518 	/* What to do with the terminating backslash?
519 	Let make the relative path have the same ending as the path has.
520 	If rel_path is empty (this may occur if base_dir and path are indentical
521 	or differ in the terminating backslash only), leave it as is. */
522 	if(!rel_path_win.empty()) {
523 		if(rel_path_win[rel_path_win.length()-1] == L'\\' && path[path.length() - 1] != '\\')
524 			rel_path_win.resize(rel_path_win.length()-1);
525 		else if(rel_path_win[rel_path_win.length()-1] != L'\\' && path[path.length() - 1] == '\\')
526 			rel_path_win += L'\\';
527 	}
528 	if(!windows_to_utf8(rel_path_win, rel_path))
529 		return EXIT_FAILURE;
530 	return EXIT_SUCCESS;
531 }
532 
533 /* performs case-insensitive comparision of two paths
534 returns true if paths are equal */
is_equal_paths_win(const std::string & path1,const std::string & path2)535 bool is_equal_paths_win(const std::string& path1, const std::string& path2)
536 {
537 	std_win_string path1_win;
538 	std_win_string path2_win;
539 	if(!utf8_to_windows(path1, path1_win))
540 		return false;
541 	if(!utf8_to_windows(path2, path2_win))
542 		return false;
543 	return StrCmpI(path1_win.c_str(), path2_win.c_str()) == 0;
544 }
545 
is_path_end_with_win(const std::string & path,const std::string & suff)546 bool is_path_end_with_win(const std::string& path, const std::string& suff)
547 {
548 	std_win_string path_win;
549 	std_win_string suff_win;
550 	if(!utf8_to_windows(path, path_win))
551 		return false;
552 	if(!utf8_to_windows(suff, suff_win))
553 		return false;
554 	if(path_win.length() < suff_win.length())
555 		return false;
556 	size_t path_len = path_win.length();
557 	size_t suff_len = suff_win.length();
558 	return StrCmpI(path_win.substr(path_len - suff_len).c_str(), suff_win.c_str()) == 0;
559 }
560 
561 #endif // #ifdef _WIN32
562 
563 #if DB_DIR_SEPARATOR == G_DIR_SEPARATOR
564 #else
dir_separator_fs_to_db(const std::string & path)565 std::string dir_separator_fs_to_db(const std::string& path)
566 {
567 	std::string temp(path);
568 	const std::string::size_type len = temp.length();
569 	for(size_t i=0; i<len; ++i)
570 		if(temp[i] == G_DIR_SEPARATOR)
571 			temp[i] = DB_DIR_SEPARATOR;
572 	return temp;
573 }
574 
dir_separator_db_to_fs(const std::string & path)575 std::string dir_separator_db_to_fs(const std::string& path)
576 {
577 	std::string temp(path);
578 	const std::string::size_type len = temp.length();
579 	for(size_t i=0; i<len; ++i)
580 		if(temp[i] == DB_DIR_SEPARATOR)
581 			temp[i] = G_DIR_SEPARATOR;
582 	return temp;
583 }
584 #endif
585 
586 /* concatenate path1 and path2 inserting a path separator in between if needed. */
build_path(const std::string & path1,const std::string & path2)587 std::string build_path(const std::string& path1, const std::string& path2)
588 {
589 	std::string res;
590 	res.reserve(path1.length() + 1 + path2.length());
591 	res = path1;
592 	if(!res.empty() && res[res.length()-1] != G_DIR_SEPARATOR)
593 		res += G_DIR_SEPARATOR_S;
594 	if(!path2.empty() && path2[0] == G_DIR_SEPARATOR)
595 		res.append(path2, 1, std::string::npos);
596 	else
597 		res.append(path2);
598 	return res;
599 }
600 
unpack_zlib(const char * arch_file_name,const char * out_file_name)601 int unpack_zlib(const char* arch_file_name, const char* out_file_name)
602 {
603 	zip::gzFile in(gzopen(arch_file_name, "rb"));
604 	if(!in) {
605 		g_critical("Unable to open archive file: %s.", arch_file_name);
606 		return EXIT_FAILURE;
607 	}
608 	const size_t buffer_size = 1024*1024;
609 	std::vector<char> buffer(buffer_size);
610 	char* buf = &buffer[0];
611 	gulong len;
612 	clib::File out_file(g_fopen(out_file_name, "wb"));
613 	if(!out_file) {
614 		g_critical(open_write_file_err, out_file_name);
615 		return EXIT_FAILURE;
616 	}
617 	while(true) {
618 		len = gzread(get_impl(in), buf, buffer_size);
619 		if(len < 0) {
620 			g_critical(read_file_err, arch_file_name, "");
621 			return EXIT_FAILURE;
622 		}
623 		if(len == 0)
624 			break;
625 		if(1 != fwrite(buf, len, 1, get_impl(out_file))) {
626 			g_critical(write_file_err, out_file_name);
627 			return EXIT_FAILURE;
628 		}
629 	}
630 	return EXIT_SUCCESS;
631 }
632 
create_temp_file(void)633 const std::string& TempFile::create_temp_file(void)
634 {
635 	clear();
636 	file_name = ::create_temp_file();
637 	if(file_name.empty())
638 		g_critical(create_temp_file_no_name_err);
639 	return file_name;
640 }
641 
clear(void)642 void TempFile::clear(void)
643 {
644 	if(!file_name.empty()) {
645 		if(g_remove(file_name.c_str()))
646 			g_warning(remove_temp_file_err, file_name.c_str());
647 		file_name.clear();
648 	}
649 }
650 
create_temp_file(void)651 std::string create_temp_file(void)
652 {
653 #ifdef _WIN32
654 	/* g_file_open_tmp does not work reliably on Windows
655 	Use platform specific API here. */
656 	{
657 		UINT uRetVal   = 0;
658 		DWORD dwRetVal = 0;
659 		TCHAR szTempFileName[MAX_PATH];
660 		TCHAR lpTempPathBuffer[MAX_PATH];
661 		dwRetVal = GetTempPath(MAX_PATH, lpTempPathBuffer);
662 		if (dwRetVal > MAX_PATH || (dwRetVal == 0))
663 			return "";
664 
665 		uRetVal = GetTempFileName(lpTempPathBuffer, // directory for tmp files
666 			TEXT("temp"),     // temp file name prefix
667 			0,                // create unique name
668 			szTempFileName);  // buffer for name
669 		if (uRetVal == 0)
670 			return "";
671 		std::string tmp_url_utf8;
672 		std::string tmp_url;
673 		if(!windows_to_utf8(szTempFileName, tmp_url_utf8)
674 			|| !utf8_to_file_name(tmp_url_utf8, tmp_url))
675 			return "";
676 		FILE * f = g_fopen(tmp_url.c_str(), "wb");
677 		if(!f)
678 			return "";
679 		fwrite(" ", 1, 1, f);
680 		fclose(f);
681 		return tmp_url;
682 	}
683 #else
684 	{
685 		std::string tmp_url;
686 		gchar * buf = NULL;
687 		gint fd = g_file_open_tmp(NULL, &buf, NULL);
688 		if(fd == -1)
689 			return "";
690 		tmp_url = buf;
691 		g_free(buf);
692 		ssize_t write_size;
693 		write_size = write(fd, " ", 1);
694 		if (write_size == -1) {
695 			g_print("write error!\n");
696 		}
697 		close(fd);
698 		return tmp_url;
699 	}
700 #endif
701 }
702 
is_known_resource_type(const char * str)703 bool is_known_resource_type(const char* str)
704 {
705 	for(size_t i=0; known_resource_types[i]; ++i)
706 		if(strcmp(str, known_resource_types[i]) == 0)
707 			return true;
708 	return false;
709 }
710 
711 /* trim string src
712  * new_beg is set to new beginning of the string
713  * new_len length of the new string in bytes
714  * The source string is not modified. */
trim_spaces(const char * const src,const char * & new_beg,size_t & new_len)715 void trim_spaces(const char* const src, const char*& new_beg, size_t& new_len)
716 {
717 	new_beg = src;
718 	while(*new_beg && g_unichar_isspace(g_utf8_get_char(new_beg))) {
719 		new_beg = g_utf8_next_char(new_beg);
720 	}
721 	const char* p = new_beg;
722 	const char* end = p;
723 	while(*p) {
724 		if(!g_unichar_isspace(g_utf8_get_char(p)))
725 			end = p;
726 		p = g_utf8_next_char(p);
727 	}
728 	if(*end)
729 		end = g_utf8_next_char(end);
730 	new_len = end - new_beg;
731 }
732 
733 /* truncate utf8 string on char boundary (string content is not changed,
734  * instead desired new length is returned)
735  * new string length must be <= max_len
736  * beg - first char of the string,
737  * str_len - string length in bytes
738  * return value: length of the truncated string */
truncate_utf8_string(const char * const beg,const size_t str_len,const size_t max_len)739 size_t truncate_utf8_string(const char* const beg, const size_t str_len, const size_t max_len)
740 {
741 	if(str_len <= max_len)
742 		return str_len;
743 	if(max_len == 0)
744 		return 0;
745 	const char* char_end = beg+max_len;
746 	const char* p = beg+max_len-1;
747 	while(true) {
748 		// find the first byte of a utf8 char
749 		for(; beg <= p && (*p & 0xC0) == 0x80; --p)
750 			;
751 		if(p<beg)
752 			return 0;
753 		const gunichar guch = g_utf8_get_char_validated(p, char_end-p);
754 		if(guch != (gunichar)-1 && guch != (gunichar)-2)
755 			return char_end - beg;
756 		char_end = p;
757 		--p;
758 		if(p<beg)
759 			return 0;
760 	}
761 }
762 
763 
764 /* convert str into a valid utf8 string
765 We assume that str is a utf8-encoded string possibly containing invalid chars.
766 Replace invalid chars with replacement_char, or strip them if replacement_char == 0. */
fix_utf8_str(const std::string & str,char replacement_char)767 std::string fix_utf8_str(const std::string& str, char replacement_char)
768 {
769 	std::string out;
770 	// an utf8 encoded char occupies at most 6 bytes + 1 byte for terminating '\0'
771 	char buf[7];
772 	out.reserve(str.length());
773 	const char* p = str.c_str();
774 	gunichar uch;
775 	while(p && *p) {
776 		uch = g_utf8_get_char_validated(p, -1);
777 		if(uch == (gunichar)-1 || uch == (gunichar)-2 || !g_unichar_validate(uch) || uch == 0) {
778 			if(replacement_char)
779 				out += replacement_char;
780 		} else {
781 			buf[g_unichar_to_utf8(uch, buf)] = '\0';
782 			out += buf;
783 		}
784 		p = g_utf8_find_next_char(p+1, NULL);
785 	}
786 	return out;
787 }
788 
789 /* print a comma-separated list of Unicode character codes
790  * chars - a list of Unicode utf-8 encoded characters to print */
print_char_codes(const std::list<const char * > & chars)791 std::string print_char_codes(const std::list<const char*>& chars) {
792 	std::stringstream buf;
793 	bool add_splitter = false;
794 	for(std::list<const char*>::const_iterator it = chars.begin(); it != chars.end(); ++it) {
795 		if(add_splitter)
796 			buf << ", ";
797 		buf << static_cast<unsigned long>(g_utf8_get_char(*it));
798 		add_splitter = true;
799 	}
800 	return buf.str();
801 }
802 
strrchr_len(char * str,size_t size,char c)803 char* strrchr_len(char* str, size_t size, char c)
804 {
805 	for(char *p = str + size - 1; str <= p; --p)
806 		if(*p == c)
807 			return p;
808 	return NULL;
809 }
810 
is_ascii_alpha(wchar_t ch)811 bool is_ascii_alpha(wchar_t ch)
812 {
813 	static const wchar_t alphabet[] =
814 		L"abcdefghijklmnopqrstuvwxyz"
815 		L"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
816 	for(size_t i=0; i<sizeof(alphabet)/sizeof(alphabet[0])-1; ++i)
817 		if(ch == alphabet[i])
818 			return true;
819 	return false;
820 }
821 
get_basename_without_extension(const std::string & filepath)822 std::string get_basename_without_extension(const std::string& filepath)
823 {
824 	std::string::size_type pos = filepath.find_last_of(G_DIR_SEPARATOR);
825 	if(pos == std::string::npos)
826 		pos = 0;
827 	else
828 		pos += 1;
829 	if(pos >= filepath.length())
830 		return "";
831 	std::string::size_type pos2 = filepath.find_last_of('.');
832 	if(pos2 == std::string::npos || pos2 < pos)
833 		return filepath.substr(pos);
834 	return filepath.substr(pos, pos2-pos);
835 }
836 
837 /* remove the item at path
838  * if this is a regular file, removed the file;
839  * if this is a symbolic line, remove the link;
840  * if this is a directory, remove the directory recursively.
841  * Return value: EXIT_SUCCESS or EXIT_FAILURE
842  * */
remove_recursive(const std::string & path)843 int remove_recursive(const std::string& path)
844 {
845 	int res = EXIT_SUCCESS;
846 	if(g_file_test(path.c_str(),G_FILE_TEST_IS_DIR)) {
847 		// change file mode so we can read directory and remove items from it
848 		// If we cannot read mode or change it, go on, maybe we can remove the dir anyway.
849 		stardict_stat_t stats;
850 		if(!g_stat(path.c_str(), &stats)) {
851 			// full access for everyone
852 			g_chmod(path.c_str(), stats.st_mode | (S_IRWXU|S_IRWXG|S_IRWXO));
853 		}
854 		glib::Dir dir(g_dir_open(path.c_str(), 0, NULL));
855 		if(!dir)
856 			res = EXIT_FAILURE;
857 		else {
858 			std::string dirpath(path); // directory path ending with a dir separator
859 			if(dirpath[dirpath.length()-1] != G_DIR_SEPARATOR)
860 				dirpath += G_DIR_SEPARATOR;
861 			const gchar * filename;
862 			while((filename = g_dir_read_name(get_impl(dir)))) {
863 				if (strcmp(filename, ".") == 0 || strcmp(filename, "..") == 0)
864 					continue;
865 				const std::string itempath(dirpath + filename);
866 				if(remove_recursive(itempath.c_str()))
867 					res = EXIT_FAILURE;
868 			}
869 		}
870 		if(g_rmdir(path.c_str()))
871 			res = EXIT_FAILURE;
872 		return res;
873 	} else {
874 		if(g_remove(path.c_str()))
875 			res = EXIT_FAILURE;
876 		return res;
877 	}
878 }
879