1 /*
2  * Copyright 2011 kubtek <kubtek@mail.com>
3  *
4  * This file is part of StarDict.
5  *
6  * StarDict is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * StarDict is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23 
24 #include <cstring>
25 #include <vector>
26 #include <cstdlib>
27 #include <glib/gstdio.h>
28 #include <glib.h>
29 #include <algorithm>
30 #include <memory>
31 #include <errno.h>
32 
33 #ifdef _WIN32
34 #  include <windows.h>
35 #else
36 #  include <unistd.h>
37 #endif
38 
39 #include "lib_res_store.h"
40 #include "libcommon.h"
41 #include "ifo_file.h"
42 #include "lib_dict_verify.h"
43 
44 struct fileitem_t {
45 	std::string filename;
46 	guint32 offset;
47 	guint32 size;
48 };
49 
compare_fileitem(const fileitem_t & left,const fileitem_t & right)50 static bool compare_fileitem(const fileitem_t& left, const fileitem_t& right)
51 {
52 	return 0 > strcmp(left.filename.c_str(), right.filename.c_str());
53 }
54 
compare_fileitem_by_offset(const fileitem_t * left,const fileitem_t * right)55 static bool compare_fileitem_by_offset(const fileitem_t* left, const fileitem_t* right)
56 {
57 	return left->offset < right->offset;
58 }
59 
60 class resource_database
61 {
62 public:
resource_database()63 	resource_database()
64 	:
65 		verif_result(VERIF_RESULT_OK)
66 	{
67 	}
68 	TLoadResult load(const std::string& dirname);
69 	// filename uses database directory separator
70 	bool have_file(const std::string& filename) const;
get_verif_result(void) const71 	VerifResult get_verif_result(void) const { return verif_result; }
72 	/* true if res.ridx.gz used, res.ridx otherwise */
res_ridx_compressed(void) const73 	bool res_ridx_compressed(void) const
74 	{
75 		return ridxfilename != ridxfilename_orig;
76 	}
77 	/* true if res.rdic.dz used, res.rdic otherwise */
res_rdic_compressed(void) const78 	bool res_rdic_compressed(void) const
79 	{
80 		return rdicfilename != rdicfilename_orig;
81 	}
82 private:
83 	int prepare_ridx_file(void);
84 	int prepare_rdic_file(void);
85 	int load_rifo_file(void);
86 	int load_ridx_file(void);
87 	VerifResult load_rdic_file(void);
88 	void print_index(void);
89 	VerifResult verify_data_blocks_overlapping(void);
90 
91 	std::string rifofilename;
92 	std::string ridxfilename;
93 	std::string ridxfilename_orig;
94 	std::string rdicfilename;
95 	std::string rdicfilename_orig;
96 	std::string dirname;
97 	TempFile ridxtemp;
98 	TempFile rdictemp;
99 	DictInfo dict_info;
100 	std::vector<fileitem_t> index;
101 	guint32 rdicfilesize;
102 	VerifResult verif_result;
103 };
104 
load(const std::string & dirname)105 TLoadResult resource_database::load(const std::string& dirname)
106 {
107 	this->dirname = dirname;
108 	verif_result = VERIF_RESULT_OK;
109 
110 	rifofilename = build_path(dirname, "res.rifo");
111 	if(!g_file_test(rifofilename.c_str(), G_FILE_TEST_EXISTS))
112 		return lrNotFound;
113 
114 	if(load_rifo_file()) {
115 		verif_result = combine_result(verif_result, VERIF_RESULT_FATAL);
116 		return lrError;
117 	}
118 	if(load_ridx_file()) {
119 		verif_result = combine_result(verif_result, VERIF_RESULT_FATAL);
120 		return lrError;
121 	}
122 	verif_result = combine_result(verif_result, load_rdic_file());
123 	return VERIF_RESULT_CRITICAL <= verif_result ? lrError : lrOK;
124 }
125 
prepare_ridx_file(void)126 int resource_database::prepare_ridx_file(void)
127 {
128 	const std::string index_file_name_gz = build_path(dirname, "res.ridx.gz");
129 	const std::string index_file_name_ridx = build_path(dirname, "res.ridx");
130 	if(g_file_test(index_file_name_gz.c_str(), G_FILE_TEST_EXISTS)
131 		&& g_file_test(index_file_name_ridx.c_str(), G_FILE_TEST_EXISTS)) {
132 		g_warning(rdb_two_index_files_msg, index_file_name_gz.c_str(), index_file_name_ridx.c_str());
133 		verif_result = combine_result(verif_result, VERIF_RESULT_WARNING);
134 	}
135 	ridxfilename_orig = index_file_name_gz;
136 	if(g_file_test(ridxfilename_orig.c_str(), G_FILE_TEST_EXISTS)) {
137 		ridxfilename = ridxtemp.create_temp_file();
138 		if(ridxfilename.empty())
139 			return EXIT_FAILURE;
140 		if(EXIT_FAILURE == unpack_zlib(ridxfilename_orig.c_str(), ridxfilename.c_str()))
141 			return EXIT_FAILURE;
142 	} else {
143 		ridxfilename_orig = index_file_name_ridx;
144 		ridxfilename = ridxfilename_orig;
145 	}
146 	return EXIT_SUCCESS;
147 }
148 
prepare_rdic_file(void)149 int resource_database::prepare_rdic_file(void)
150 {
151 	const std::string dict_file_name_dz = build_path(dirname, "res.rdic.dz");
152 	const std::string dict_file_name_rdic = build_path(dirname, "res.rdic");
153 	if(g_file_test(dict_file_name_dz.c_str(), G_FILE_TEST_EXISTS)
154 		&& g_file_test(dict_file_name_rdic.c_str(), G_FILE_TEST_EXISTS)) {
155 		g_warning(rdb_two_dict_files_msg, dict_file_name_dz.c_str(), dict_file_name_rdic.c_str());
156 		verif_result = combine_result(verif_result, VERIF_RESULT_WARNING);
157 	}
158 	rdicfilename_orig = dict_file_name_dz;
159 	if(g_file_test(rdicfilename_orig.c_str(), G_FILE_TEST_EXISTS)) {
160 		rdicfilename = rdictemp.create_temp_file();
161 		if(rdicfilename.empty())
162 			return EXIT_FAILURE;
163 		if(unpack_zlib(rdicfilename_orig.c_str(), rdicfilename.c_str()))
164 			return EXIT_FAILURE;
165 	} else {
166 		rdicfilename_orig = dict_file_name_rdic;
167 		rdicfilename = rdicfilename_orig;
168 	}
169 	return EXIT_SUCCESS;
170 }
171 
load_rifo_file(void)172 int resource_database::load_rifo_file(void)
173 {
174 	if(!dict_info.load_from_ifo_file(rifofilename, DictInfoType_ResDb))
175 		return EXIT_FAILURE;
176 	bool have_errors = false;
177 	if(dict_info.get_filecount() == 0) {
178 		g_critical(rdb_filecnt_zero_err, rifofilename.c_str());
179 		have_errors = true;
180 	}
181 	if(dict_info.get_index_file_size() == 0) {
182 		g_critical(rdb_ridxfilesize_zero_err, rifofilename.c_str());
183 		have_errors = true;
184 	}
185 	return have_errors ? EXIT_FAILURE : EXIT_SUCCESS;
186 }
187 
load_ridx_file(void)188 int resource_database::load_ridx_file(void)
189 {
190 	if(prepare_ridx_file())
191 		return EXIT_FAILURE;
192 
193 	stardict_stat_t stats;
194 	if (g_stat (ridxfilename.c_str(), &stats) == -1) {
195 		std::string error(g_strerror(errno));
196 		g_critical(file_not_found_idx_err, ridxfilename.c_str(), error.c_str());
197 		return EXIT_FAILURE;
198 	}
199 	g_message(rdb_loading_ridx_file_msg, ridxfilename_orig.c_str());
200 	if (dict_info.get_index_file_size()!=(guint)stats.st_size) {
201 		g_critical(incorrect_ridx_file_size_err,
202 			dict_info.get_index_file_size(), (long) stats.st_size);
203 		return EXIT_FAILURE;
204 	}
205 
206 	index.clear();
207 	index.reserve(dict_info.get_filecount());
208 
209 	std::vector<gchar> buf(stats.st_size+1);
210 	gchar * const buffer_beg = &buf[0];
211 	gchar * const buffer_end = buffer_beg+stats.st_size;
212 	{
213 		FILE *idxfile = g_fopen(ridxfilename.c_str(),"rb");
214 		size_t fread_size;
215 		fread_size = fread(buffer_beg, 1, stats.st_size, idxfile);
216 		if (fread_size != (size_t)stats.st_size) {
217 			g_print("fread error!\n");
218 		}
219 		fclose(idxfile);
220 	}
221 
222 	gchar *p=buffer_beg;
223 	gchar *prefilename=NULL;
224 	int filenamelen;
225 	guint filecount=0;
226 	bool have_errors=false;
227 	fileitem_t fileitem;
228 	size_t size_remain; // to the end of the index file
229 
230 	while (p < buffer_end) {
231 		size_remain = buffer_end - p;
232 		const char* p2 = reinterpret_cast<const char*>(memchr(p, '\0', size_remain));
233 		if(!p2) {
234 			g_warning(index_file_truncated_err);
235 			have_errors=true;
236 			break;
237 		}
238 		filenamelen = p2 - p;
239 		if (filenamelen==0) {
240 			g_warning(empty_file_name_err);
241 			have_errors=true;
242 		}
243 		if (!g_utf8_validate(p, filenamelen, NULL)) {
244 			std::string tmp(p, filenamelen);
245 			g_warning(invalid_utf8_index_item_err, p, tmp.c_str());
246 			have_errors=true;
247 		}
248 		if(strchr(p, '\\')) {
249 			g_warning(rdb_invalid_file_name_format_back_spash_err, p);
250 			have_errors=true;
251 		}
252 		if(p[0] == '/') {
253 			g_warning(rdb_invalid_file_name_format_abs_path_err, p);
254 			have_errors=true;
255 		}
256 		if(strstr(p, "//")) {
257 			g_warning(rdb_invalid_file_name_format_empty_dir_err, p);
258 			have_errors=true;
259 		}
260 		if (prefilename) {
261 			int cmpvalue=strcmp(prefilename, p);
262 			if (cmpvalue>0) {
263 				g_warning(wrong_file_order_err, prefilename, p);
264 				have_errors=true;
265 			}
266 			if(cmpvalue==0) {
267 				g_warning(duplicate_file_name, p);
268 				have_errors=true;
269 			}
270 		}
271 		prefilename=p;
272 		fileitem.filename = p;
273 		p += filenamelen + 1;
274 		size_remain = buffer_end - p;
275 		if(size_remain < 2 * sizeof(guint32)) {
276 			g_warning(index_file_truncated_err);
277 			have_errors=true;
278 			break;
279 		}
280 		fileitem.offset = g_ntohl(*reinterpret_cast<guint32 *>(p));
281 		p += sizeof(guint32);
282 		fileitem.size = g_ntohl(*reinterpret_cast<guint32 *>(p));
283 		p += sizeof(guint32);
284 		if (fileitem.size==0) {
285 			g_warning(empty_block_err, prefilename);
286 		}
287 		filecount++;
288 		index.push_back(fileitem);
289 	} // while
290 
291 	g_assert(p <= buffer_end);
292 
293 	if (filecount!=dict_info.get_filecount()) {
294 		g_warning(rdb_incorrect_file_cnt, dict_info.get_filecount(), filecount);
295 		have_errors=true;
296 	}
297 
298 	return have_errors ? EXIT_FAILURE : EXIT_SUCCESS;
299 }
300 
load_rdic_file(void)301 VerifResult resource_database::load_rdic_file(void)
302 {
303 	VerifResult result = VERIF_RESULT_OK;
304 	if(prepare_rdic_file())
305 		return combine_result(result, VERIF_RESULT_FATAL);
306 
307 	stardict_stat_t stats;
308 	if (g_stat (rdicfilename.c_str(), &stats) == -1) {
309 		std::string error(g_strerror(errno));
310 		g_critical(rdb_dict_file_not_found_err, rdicfilename.c_str(), error.c_str());
311 		return combine_result(result, VERIF_RESULT_FATAL);
312 	}
313 	rdicfilesize = stats.st_size;
314 
315 	g_message(rdb_loading_dict_file_msg, rdicfilename_orig.c_str());
316 	clib::File rdicfile(g_fopen(rdicfilename.c_str(), "rb"));
317 	if(!rdicfile) {
318 		std::string error(g_strerror(errno));
319 		g_critical(open_read_file_err, rdicfilename.c_str(), error.c_str());
320 		return combine_result(result, VERIF_RESULT_FATAL);
321 	}
322 
323 	for(size_t i=0; i<index.size(); ++i) {
324 		if(index[i].offset + index[i].size > rdicfilesize) {
325 			g_warning(record_out_of_file_err, index[i].filename.c_str());
326 			result = combine_result(result, VERIF_RESULT_CRITICAL);
327 			continue;
328 		}
329 	}
330 	result = combine_result(result, verify_data_blocks_overlapping());
331 	return result;
332 }
333 
have_file(const std::string & filename) const334 bool resource_database::have_file(const std::string& filename) const
335 {
336 	fileitem_t fileitem;
337 	fileitem.filename = filename;
338 	return std::binary_search(index.begin(), index.end(), fileitem, compare_fileitem);
339 }
340 
print_index(void)341 void resource_database::print_index(void)
342 {
343 	for(size_t i=0; i<index.size(); ++i) {
344 		g_print("Info: index item '%s'\n", index[i].filename.c_str());
345 	}
346 }
347 
verify_data_blocks_overlapping(void)348 VerifResult resource_database::verify_data_blocks_overlapping(void)
349 {
350 	VerifResult result = VERIF_RESULT_OK;
351 	std::vector<const fileitem_t*> sort_index(index.size(), NULL);
352 	for(size_t i=0; i<index.size(); ++i)
353 		sort_index[i] = &index[i];
354 	std::sort(sort_index.begin(), sort_index.end(), compare_fileitem_by_offset);
355 	// find overlapping but not equal regions (offset, size)
356 	std::vector<std::pair<size_t, size_t> > overlapping_blocks;
357 	::verify_data_blocks_overlapping(sort_index, overlapping_blocks);
358 	for(size_t i=0; i<overlapping_blocks.size(); ++i) {
359 		const fileitem_t& first = *sort_index[overlapping_blocks[i].first];
360 		const fileitem_t& second = *sort_index[overlapping_blocks[i].second];
361 		g_warning(overlapping_data_blocks_msg,
362 			first.filename.c_str(), second.filename.c_str(),
363 			first.offset, first.size, second.offset, second.size);
364 		result = combine_result(result, VERIF_RESULT_WARNING);
365 	}
366 	// find not used regions
367 	std::vector<region_t> unused_regions;
368 	verify_unused_regions(sort_index, unused_regions, rdicfilesize);
369 	if(!unused_regions.empty()) {
370 		g_warning(rdb_unreferenced_data_blocks_msg);
371 		for(size_t i = 0; i<unused_regions.size(); ++i)
372 			g_warning("\t(%u, %u)\n", unused_regions[i].offset, unused_regions[i].size);
373 		result = combine_result(result, VERIF_RESULT_NOTE);
374 	}
375 	return result;
376 }
377 
378 
379 class resource_files
380 {
381 public:
382 	TLoadResult load(const std::string& dirname);
383 	// filename uses database directory separator
384 	bool have_file(const std::string& filename) const;
385 private:
386 	std::string dirname;
387 	std::string resdirname;
388 };
389 
load(const std::string & dirname)390 TLoadResult resource_files::load(const std::string& dirname)
391 {
392 	this->dirname = dirname;
393 	resdirname = build_path(dirname, "res");
394 	if(!g_file_test(resdirname.c_str(), G_FILE_TEST_IS_DIR))
395 		return lrNotFound;
396 	return lrOK;
397 }
398 
have_file(const std::string & filename) const399 bool resource_files::have_file(const std::string& filename) const
400 {
401 	const std::string full_fs_filename(build_path(resdirname, dir_separator_db_to_fs(filename)));
402 	return static_cast<bool>(g_file_test(full_fs_filename.c_str(), G_FILE_TEST_IS_REGULAR));
403 }
404 
405 
resource_storage(void)406 resource_storage::resource_storage(void)
407 :
408 	db(NULL),
409 	files(NULL),
410 	verif_result(VERIF_RESULT_OK)
411 {
412 
413 }
414 
~resource_storage(void)415 resource_storage::~resource_storage(void)
416 {
417 	clear();
418 }
419 
load(const std::string & dirname)420 TLoadResult resource_storage::load(const std::string& dirname)
421 {
422 	clear();
423 	std::auto_ptr<resource_database> t_db(new resource_database);
424 	TLoadResult res = t_db->load(dirname);
425 	if(res == lrOK) {
426 		g_message(rdb_loaded_db_msg);
427 		verif_result = t_db->get_verif_result();
428 		db = t_db.release();
429 		return lrOK;
430 	}
431 	if(res == lrError) {
432 		g_critical(rdb_load_db_failed_msg);
433 		verif_result = t_db->get_verif_result();
434 		return lrError;
435 	}
436 	std::auto_ptr<resource_files> t_files(new resource_files);
437 	res = t_files->load(dirname);
438 	if(res == lrOK) {
439 		g_message(rdb_loaded_files_msg);
440 		verif_result = VERIF_RESULT_OK;
441 		files = t_files.release();
442 		return lrOK;
443 	}
444 	if(res == lrError) {
445 		g_critical(rdb_load_files_failed_msg);
446 		verif_result = VERIF_RESULT_FATAL;
447 		return lrError;
448 	}
449 	verif_result = VERIF_RESULT_OK;
450 	return res;
451 }
452 
have_file(const std::string & filename) const453 bool resource_storage::have_file(const std::string& filename) const
454 {
455 	if(db)
456 		return db->have_file(filename);
457 	if(files)
458 		return files->have_file(filename);
459 	return false;
460 }
461 
get_storage_type(void) const462 StorageType resource_storage::get_storage_type(void) const
463 {
464 	if(db)
465 		return StorageType_DATABASE;
466 	if(files)
467 		return StorageType_FILE;
468 	return StorageType_UNKNOWN;
469 }
470 
res_ridx_compressed(void) const471 bool resource_storage::res_ridx_compressed(void) const
472 {
473 	if(db)
474 		return db->res_ridx_compressed();
475 	return false;
476 }
477 
res_rdic_compressed(void) const478 bool resource_storage::res_rdic_compressed(void) const
479 {
480 	if(db)
481 		return db->res_rdic_compressed();
482 	return false;
483 }
484 
clear(void)485 void resource_storage::clear(void)
486 {
487 	if(db)
488 		delete db;
489 	db = NULL;
490 	if(files)
491 		delete files;
492 	files = NULL;
493 	verif_result = VERIF_RESULT_OK;
494 }
495 
496