1 /*
2 * Copyright 2011 kubtek <kubtek@mail.com>
3 *
4 * This file is part of StarDict.
5 *
6 * StarDict is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * StarDict is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #include <cstring>
25 #include <vector>
26 #include <cstdlib>
27 #include <glib/gstdio.h>
28 #include <glib.h>
29 #include <algorithm>
30 #include <memory>
31 #include <errno.h>
32
33 #ifdef _WIN32
34 # include <windows.h>
35 #else
36 # include <unistd.h>
37 #endif
38
39 #include "lib_res_store.h"
40 #include "libcommon.h"
41 #include "ifo_file.h"
42 #include "lib_dict_verify.h"
43
44 struct fileitem_t {
45 std::string filename;
46 guint32 offset;
47 guint32 size;
48 };
49
compare_fileitem(const fileitem_t & left,const fileitem_t & right)50 static bool compare_fileitem(const fileitem_t& left, const fileitem_t& right)
51 {
52 return 0 > strcmp(left.filename.c_str(), right.filename.c_str());
53 }
54
compare_fileitem_by_offset(const fileitem_t * left,const fileitem_t * right)55 static bool compare_fileitem_by_offset(const fileitem_t* left, const fileitem_t* right)
56 {
57 return left->offset < right->offset;
58 }
59
60 class resource_database
61 {
62 public:
resource_database()63 resource_database()
64 :
65 verif_result(VERIF_RESULT_OK)
66 {
67 }
68 TLoadResult load(const std::string& dirname);
69 // filename uses database directory separator
70 bool have_file(const std::string& filename) const;
get_verif_result(void) const71 VerifResult get_verif_result(void) const { return verif_result; }
72 /* true if res.ridx.gz used, res.ridx otherwise */
res_ridx_compressed(void) const73 bool res_ridx_compressed(void) const
74 {
75 return ridxfilename != ridxfilename_orig;
76 }
77 /* true if res.rdic.dz used, res.rdic otherwise */
res_rdic_compressed(void) const78 bool res_rdic_compressed(void) const
79 {
80 return rdicfilename != rdicfilename_orig;
81 }
82 private:
83 int prepare_ridx_file(void);
84 int prepare_rdic_file(void);
85 int load_rifo_file(void);
86 int load_ridx_file(void);
87 VerifResult load_rdic_file(void);
88 void print_index(void);
89 VerifResult verify_data_blocks_overlapping(void);
90
91 std::string rifofilename;
92 std::string ridxfilename;
93 std::string ridxfilename_orig;
94 std::string rdicfilename;
95 std::string rdicfilename_orig;
96 std::string dirname;
97 TempFile ridxtemp;
98 TempFile rdictemp;
99 DictInfo dict_info;
100 std::vector<fileitem_t> index;
101 guint32 rdicfilesize;
102 VerifResult verif_result;
103 };
104
load(const std::string & dirname)105 TLoadResult resource_database::load(const std::string& dirname)
106 {
107 this->dirname = dirname;
108 verif_result = VERIF_RESULT_OK;
109
110 rifofilename = build_path(dirname, "res.rifo");
111 if(!g_file_test(rifofilename.c_str(), G_FILE_TEST_EXISTS))
112 return lrNotFound;
113
114 if(load_rifo_file()) {
115 verif_result = combine_result(verif_result, VERIF_RESULT_FATAL);
116 return lrError;
117 }
118 if(load_ridx_file()) {
119 verif_result = combine_result(verif_result, VERIF_RESULT_FATAL);
120 return lrError;
121 }
122 verif_result = combine_result(verif_result, load_rdic_file());
123 return VERIF_RESULT_CRITICAL <= verif_result ? lrError : lrOK;
124 }
125
prepare_ridx_file(void)126 int resource_database::prepare_ridx_file(void)
127 {
128 const std::string index_file_name_gz = build_path(dirname, "res.ridx.gz");
129 const std::string index_file_name_ridx = build_path(dirname, "res.ridx");
130 if(g_file_test(index_file_name_gz.c_str(), G_FILE_TEST_EXISTS)
131 && g_file_test(index_file_name_ridx.c_str(), G_FILE_TEST_EXISTS)) {
132 g_warning(rdb_two_index_files_msg, index_file_name_gz.c_str(), index_file_name_ridx.c_str());
133 verif_result = combine_result(verif_result, VERIF_RESULT_WARNING);
134 }
135 ridxfilename_orig = index_file_name_gz;
136 if(g_file_test(ridxfilename_orig.c_str(), G_FILE_TEST_EXISTS)) {
137 ridxfilename = ridxtemp.create_temp_file();
138 if(ridxfilename.empty())
139 return EXIT_FAILURE;
140 if(EXIT_FAILURE == unpack_zlib(ridxfilename_orig.c_str(), ridxfilename.c_str()))
141 return EXIT_FAILURE;
142 } else {
143 ridxfilename_orig = index_file_name_ridx;
144 ridxfilename = ridxfilename_orig;
145 }
146 return EXIT_SUCCESS;
147 }
148
prepare_rdic_file(void)149 int resource_database::prepare_rdic_file(void)
150 {
151 const std::string dict_file_name_dz = build_path(dirname, "res.rdic.dz");
152 const std::string dict_file_name_rdic = build_path(dirname, "res.rdic");
153 if(g_file_test(dict_file_name_dz.c_str(), G_FILE_TEST_EXISTS)
154 && g_file_test(dict_file_name_rdic.c_str(), G_FILE_TEST_EXISTS)) {
155 g_warning(rdb_two_dict_files_msg, dict_file_name_dz.c_str(), dict_file_name_rdic.c_str());
156 verif_result = combine_result(verif_result, VERIF_RESULT_WARNING);
157 }
158 rdicfilename_orig = dict_file_name_dz;
159 if(g_file_test(rdicfilename_orig.c_str(), G_FILE_TEST_EXISTS)) {
160 rdicfilename = rdictemp.create_temp_file();
161 if(rdicfilename.empty())
162 return EXIT_FAILURE;
163 if(unpack_zlib(rdicfilename_orig.c_str(), rdicfilename.c_str()))
164 return EXIT_FAILURE;
165 } else {
166 rdicfilename_orig = dict_file_name_rdic;
167 rdicfilename = rdicfilename_orig;
168 }
169 return EXIT_SUCCESS;
170 }
171
load_rifo_file(void)172 int resource_database::load_rifo_file(void)
173 {
174 if(!dict_info.load_from_ifo_file(rifofilename, DictInfoType_ResDb))
175 return EXIT_FAILURE;
176 bool have_errors = false;
177 if(dict_info.get_filecount() == 0) {
178 g_critical(rdb_filecnt_zero_err, rifofilename.c_str());
179 have_errors = true;
180 }
181 if(dict_info.get_index_file_size() == 0) {
182 g_critical(rdb_ridxfilesize_zero_err, rifofilename.c_str());
183 have_errors = true;
184 }
185 return have_errors ? EXIT_FAILURE : EXIT_SUCCESS;
186 }
187
load_ridx_file(void)188 int resource_database::load_ridx_file(void)
189 {
190 if(prepare_ridx_file())
191 return EXIT_FAILURE;
192
193 stardict_stat_t stats;
194 if (g_stat (ridxfilename.c_str(), &stats) == -1) {
195 std::string error(g_strerror(errno));
196 g_critical(file_not_found_idx_err, ridxfilename.c_str(), error.c_str());
197 return EXIT_FAILURE;
198 }
199 g_message(rdb_loading_ridx_file_msg, ridxfilename_orig.c_str());
200 if (dict_info.get_index_file_size()!=(guint)stats.st_size) {
201 g_critical(incorrect_ridx_file_size_err,
202 dict_info.get_index_file_size(), (long) stats.st_size);
203 return EXIT_FAILURE;
204 }
205
206 index.clear();
207 index.reserve(dict_info.get_filecount());
208
209 std::vector<gchar> buf(stats.st_size+1);
210 gchar * const buffer_beg = &buf[0];
211 gchar * const buffer_end = buffer_beg+stats.st_size;
212 {
213 FILE *idxfile = g_fopen(ridxfilename.c_str(),"rb");
214 size_t fread_size;
215 fread_size = fread(buffer_beg, 1, stats.st_size, idxfile);
216 if (fread_size != (size_t)stats.st_size) {
217 g_print("fread error!\n");
218 }
219 fclose(idxfile);
220 }
221
222 gchar *p=buffer_beg;
223 gchar *prefilename=NULL;
224 int filenamelen;
225 guint filecount=0;
226 bool have_errors=false;
227 fileitem_t fileitem;
228 size_t size_remain; // to the end of the index file
229
230 while (p < buffer_end) {
231 size_remain = buffer_end - p;
232 const char* p2 = reinterpret_cast<const char*>(memchr(p, '\0', size_remain));
233 if(!p2) {
234 g_warning(index_file_truncated_err);
235 have_errors=true;
236 break;
237 }
238 filenamelen = p2 - p;
239 if (filenamelen==0) {
240 g_warning(empty_file_name_err);
241 have_errors=true;
242 }
243 if (!g_utf8_validate(p, filenamelen, NULL)) {
244 std::string tmp(p, filenamelen);
245 g_warning(invalid_utf8_index_item_err, p, tmp.c_str());
246 have_errors=true;
247 }
248 if(strchr(p, '\\')) {
249 g_warning(rdb_invalid_file_name_format_back_spash_err, p);
250 have_errors=true;
251 }
252 if(p[0] == '/') {
253 g_warning(rdb_invalid_file_name_format_abs_path_err, p);
254 have_errors=true;
255 }
256 if(strstr(p, "//")) {
257 g_warning(rdb_invalid_file_name_format_empty_dir_err, p);
258 have_errors=true;
259 }
260 if (prefilename) {
261 int cmpvalue=strcmp(prefilename, p);
262 if (cmpvalue>0) {
263 g_warning(wrong_file_order_err, prefilename, p);
264 have_errors=true;
265 }
266 if(cmpvalue==0) {
267 g_warning(duplicate_file_name, p);
268 have_errors=true;
269 }
270 }
271 prefilename=p;
272 fileitem.filename = p;
273 p += filenamelen + 1;
274 size_remain = buffer_end - p;
275 if(size_remain < 2 * sizeof(guint32)) {
276 g_warning(index_file_truncated_err);
277 have_errors=true;
278 break;
279 }
280 fileitem.offset = g_ntohl(*reinterpret_cast<guint32 *>(p));
281 p += sizeof(guint32);
282 fileitem.size = g_ntohl(*reinterpret_cast<guint32 *>(p));
283 p += sizeof(guint32);
284 if (fileitem.size==0) {
285 g_warning(empty_block_err, prefilename);
286 }
287 filecount++;
288 index.push_back(fileitem);
289 } // while
290
291 g_assert(p <= buffer_end);
292
293 if (filecount!=dict_info.get_filecount()) {
294 g_warning(rdb_incorrect_file_cnt, dict_info.get_filecount(), filecount);
295 have_errors=true;
296 }
297
298 return have_errors ? EXIT_FAILURE : EXIT_SUCCESS;
299 }
300
load_rdic_file(void)301 VerifResult resource_database::load_rdic_file(void)
302 {
303 VerifResult result = VERIF_RESULT_OK;
304 if(prepare_rdic_file())
305 return combine_result(result, VERIF_RESULT_FATAL);
306
307 stardict_stat_t stats;
308 if (g_stat (rdicfilename.c_str(), &stats) == -1) {
309 std::string error(g_strerror(errno));
310 g_critical(rdb_dict_file_not_found_err, rdicfilename.c_str(), error.c_str());
311 return combine_result(result, VERIF_RESULT_FATAL);
312 }
313 rdicfilesize = stats.st_size;
314
315 g_message(rdb_loading_dict_file_msg, rdicfilename_orig.c_str());
316 clib::File rdicfile(g_fopen(rdicfilename.c_str(), "rb"));
317 if(!rdicfile) {
318 std::string error(g_strerror(errno));
319 g_critical(open_read_file_err, rdicfilename.c_str(), error.c_str());
320 return combine_result(result, VERIF_RESULT_FATAL);
321 }
322
323 for(size_t i=0; i<index.size(); ++i) {
324 if(index[i].offset + index[i].size > rdicfilesize) {
325 g_warning(record_out_of_file_err, index[i].filename.c_str());
326 result = combine_result(result, VERIF_RESULT_CRITICAL);
327 continue;
328 }
329 }
330 result = combine_result(result, verify_data_blocks_overlapping());
331 return result;
332 }
333
have_file(const std::string & filename) const334 bool resource_database::have_file(const std::string& filename) const
335 {
336 fileitem_t fileitem;
337 fileitem.filename = filename;
338 return std::binary_search(index.begin(), index.end(), fileitem, compare_fileitem);
339 }
340
print_index(void)341 void resource_database::print_index(void)
342 {
343 for(size_t i=0; i<index.size(); ++i) {
344 g_print("Info: index item '%s'\n", index[i].filename.c_str());
345 }
346 }
347
verify_data_blocks_overlapping(void)348 VerifResult resource_database::verify_data_blocks_overlapping(void)
349 {
350 VerifResult result = VERIF_RESULT_OK;
351 std::vector<const fileitem_t*> sort_index(index.size(), NULL);
352 for(size_t i=0; i<index.size(); ++i)
353 sort_index[i] = &index[i];
354 std::sort(sort_index.begin(), sort_index.end(), compare_fileitem_by_offset);
355 // find overlapping but not equal regions (offset, size)
356 std::vector<std::pair<size_t, size_t> > overlapping_blocks;
357 ::verify_data_blocks_overlapping(sort_index, overlapping_blocks);
358 for(size_t i=0; i<overlapping_blocks.size(); ++i) {
359 const fileitem_t& first = *sort_index[overlapping_blocks[i].first];
360 const fileitem_t& second = *sort_index[overlapping_blocks[i].second];
361 g_warning(overlapping_data_blocks_msg,
362 first.filename.c_str(), second.filename.c_str(),
363 first.offset, first.size, second.offset, second.size);
364 result = combine_result(result, VERIF_RESULT_WARNING);
365 }
366 // find not used regions
367 std::vector<region_t> unused_regions;
368 verify_unused_regions(sort_index, unused_regions, rdicfilesize);
369 if(!unused_regions.empty()) {
370 g_warning(rdb_unreferenced_data_blocks_msg);
371 for(size_t i = 0; i<unused_regions.size(); ++i)
372 g_warning("\t(%u, %u)\n", unused_regions[i].offset, unused_regions[i].size);
373 result = combine_result(result, VERIF_RESULT_NOTE);
374 }
375 return result;
376 }
377
378
379 class resource_files
380 {
381 public:
382 TLoadResult load(const std::string& dirname);
383 // filename uses database directory separator
384 bool have_file(const std::string& filename) const;
385 private:
386 std::string dirname;
387 std::string resdirname;
388 };
389
load(const std::string & dirname)390 TLoadResult resource_files::load(const std::string& dirname)
391 {
392 this->dirname = dirname;
393 resdirname = build_path(dirname, "res");
394 if(!g_file_test(resdirname.c_str(), G_FILE_TEST_IS_DIR))
395 return lrNotFound;
396 return lrOK;
397 }
398
have_file(const std::string & filename) const399 bool resource_files::have_file(const std::string& filename) const
400 {
401 const std::string full_fs_filename(build_path(resdirname, dir_separator_db_to_fs(filename)));
402 return static_cast<bool>(g_file_test(full_fs_filename.c_str(), G_FILE_TEST_IS_REGULAR));
403 }
404
405
resource_storage(void)406 resource_storage::resource_storage(void)
407 :
408 db(NULL),
409 files(NULL),
410 verif_result(VERIF_RESULT_OK)
411 {
412
413 }
414
~resource_storage(void)415 resource_storage::~resource_storage(void)
416 {
417 clear();
418 }
419
load(const std::string & dirname)420 TLoadResult resource_storage::load(const std::string& dirname)
421 {
422 clear();
423 std::auto_ptr<resource_database> t_db(new resource_database);
424 TLoadResult res = t_db->load(dirname);
425 if(res == lrOK) {
426 g_message(rdb_loaded_db_msg);
427 verif_result = t_db->get_verif_result();
428 db = t_db.release();
429 return lrOK;
430 }
431 if(res == lrError) {
432 g_critical(rdb_load_db_failed_msg);
433 verif_result = t_db->get_verif_result();
434 return lrError;
435 }
436 std::auto_ptr<resource_files> t_files(new resource_files);
437 res = t_files->load(dirname);
438 if(res == lrOK) {
439 g_message(rdb_loaded_files_msg);
440 verif_result = VERIF_RESULT_OK;
441 files = t_files.release();
442 return lrOK;
443 }
444 if(res == lrError) {
445 g_critical(rdb_load_files_failed_msg);
446 verif_result = VERIF_RESULT_FATAL;
447 return lrError;
448 }
449 verif_result = VERIF_RESULT_OK;
450 return res;
451 }
452
have_file(const std::string & filename) const453 bool resource_storage::have_file(const std::string& filename) const
454 {
455 if(db)
456 return db->have_file(filename);
457 if(files)
458 return files->have_file(filename);
459 return false;
460 }
461
get_storage_type(void) const462 StorageType resource_storage::get_storage_type(void) const
463 {
464 if(db)
465 return StorageType_DATABASE;
466 if(files)
467 return StorageType_FILE;
468 return StorageType_UNKNOWN;
469 }
470
res_ridx_compressed(void) const471 bool resource_storage::res_ridx_compressed(void) const
472 {
473 if(db)
474 return db->res_ridx_compressed();
475 return false;
476 }
477
res_rdic_compressed(void) const478 bool resource_storage::res_rdic_compressed(void) const
479 {
480 if(db)
481 return db->res_rdic_compressed();
482 return false;
483 }
484
clear(void)485 void resource_storage::clear(void)
486 {
487 if(db)
488 delete db;
489 db = NULL;
490 if(files)
491 delete files;
492 files = NULL;
493 verif_result = VERIF_RESULT_OK;
494 }
495
496