1 /*
2 * Copyright 2011 kubtek <kubtek@mail.com>
3 *
4 * This file is part of StarDict.
5 *
6 * StarDict is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * StarDict is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef _LIBSTARDICTVERIFY_H_
21 #define _LIBSTARDICTVERIFY_H_
22
23 #include <vector>
24 #include <algorithm>
25 #include "libcommon.h"
26
27 #ifdef _WIN32
28 # ifdef min
29 # undef min
30 # endif
31 # ifdef max
32 # undef max
33 # endif
34 #endif
35
36 enum VerifResult {
37 VERIF_RESULT_OK, // no error
38 VERIF_RESULT_NOTE, // minor issue, save to ignore (for example, trailing spaces in key word)
39 VERIF_RESULT_WARNING, // important issue, maybe ignored (double keys in index, referring to the same data)
40 VERIF_RESULT_CRITICAL, // may be fixed, but cannot be ignored (for example, index entries are out of order)
41 VERIF_RESULT_FATAL // cannot be fixed (for example, .idx file is missing)
42 };
43
44 extern VerifResult stardict_verify(const char *ifofilename);
45
46 struct region_t {
47 guint32 offset;
48 guint32 size;
49 };
50
51 /* combine two verification results = the most serious error */
52 inline
combine_result(VerifResult a,VerifResult b)53 VerifResult combine_result(VerifResult a, VerifResult b)
54 {
55 return std::max(a, b);
56 }
57
58 template <class item_t>
verify_data_blocks_overlapping(std::vector<item_t * > & sort_index,std::vector<std::pair<size_t,size_t>> & overlapping_blocks)59 void verify_data_blocks_overlapping(std::vector<item_t*>& sort_index,
60 std::vector<std::pair<size_t, size_t> >& overlapping_blocks)
61 {
62 for(size_t i=0; i<sort_index.size(); ++i) {
63 for(size_t j=i+1; j<sort_index.size()
64 && sort_index[i]->offset + sort_index[i]->size > sort_index[j]->offset; ++j) {
65 if(sort_index[i]->offset == sort_index[j]->offset
66 && sort_index[i]->size == sort_index[j]->size)
67 continue;
68 if(sort_index[j]->size == 0)
69 continue;
70 overlapping_blocks.push_back(std::pair<size_t, size_t>(i, j));
71 }
72 }
73 }
74
75 template <class item_t>
verify_unused_regions(std::vector<item_t * > & sort_index,std::vector<region_t> & unused_regions,guint32 filesize)76 void verify_unused_regions(std::vector<item_t*>& sort_index,
77 std::vector<region_t>& unused_regions, guint32 filesize)
78 {
79 region_t region;
80 guint32 low_boundary=0;
81 for(size_t i=0; i<sort_index.size(); ++i) {
82 const guint32 l_left = sort_index[i]->offset;
83 const guint32 l_right = sort_index[i]->offset + sort_index[i]->size;
84 if(l_left < low_boundary) {
85 if(l_right > low_boundary)
86 low_boundary = l_right;
87 } if(l_left == low_boundary) {
88 low_boundary = l_right;
89 } else { // gap found
90 region.offset = low_boundary;
91 region.size = l_left - low_boundary;
92 unused_regions.push_back(region);
93 low_boundary = l_right;
94 }
95 }
96 if(low_boundary < filesize) {
97 region.offset = low_boundary;
98 region.size = filesize - low_boundary;
99 unused_regions.push_back(region);
100 }
101 }
102
103 #define index_file_truncated_err \
104 "Index file is truncated, last record is truncated."
105 #define incorrect_data_block_size_err \
106 "Index item '%s'. Fields do not fit into the data block, incorrect data block size."
107 #define empty_field_err \
108 "Index item '%s'. Empty field in definition data block. Type ID '%c'."
109 #define invalid_utf8_field_err \
110 "Index item '%s'. Invalid field. Type id = '%c'. Invalid utf8 string: '''\n%s\n'''"
111 #define invalid_utf8_index_item_err \
112 "Index item '%s'. Invalid field. Invalid utf8 string: '''\n%s\n'''"
113 #define invalid_field_content_err \
114 "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''"
115 #define invalid_chars_in_textual_data_msg \
116 "The text contains either invalid Unicode characters " \
117 "or Unicode characters not suitable for textual data (mainly control characters). " \
118 "The following characters are prohibited: %s."
119 #define invalid_field_content_chars_err \
120 "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''\n"\
121 invalid_chars_in_textual_data_msg
122 #define syn_file_truncated_err \
123 "Synonyms file is truncated, last record is truncated."
124 #define unknown_type_id_err \
125 "Index item '%s'. Unknown type identifier '%c'."
126 #define empty_word_err \
127 "Blank key in index."
128 #define empty_file_name_err \
129 "Blank file name in index."
130 #define long_word_err \
131 "Index item '%s'. Key is too long. Maximum allowed length: %d, key length: %d."
132 #define word_begin_space_err \
133 "Index item '%s'. Key begins with a space character."
134 #define word_end_space_err \
135 "Index item '%s'. Key ends with a space character."
136 #define word_forbidden_chars_err \
137 "Index item '''%s'''\nKey contains forbidden characters."
138 #define word_invalid_utf8_err \
139 "Index item '%s'. Invalid utf8 string."
140 #define word_invalid_char_value_err \
141 "Index item '%s'. Invalid item name.\n" \
142 invalid_chars_in_textual_data_msg
143 #define wrong_word_order_err \
144 "Wrong key order, first key = '%s', second key = '%s'."
145 #define wrong_file_order_err \
146 "Wrong file order, first file name = '%s', second file name = '%s'."
147 #define fields_extraction_faild_err \
148 "Index item '%s'. Extraction of the fields failed."
149 #define unsupported_file_type_err \
150 "Unsupported file type. File must have 'ifo' extension. File: '%s'."
151 #define dictionary_no_loaded_err \
152 "Dictionary is not loaded."
153 #define file_not_found_idx_err \
154 "Unable to find index file: '%s'. Error: %s."
155 #define loading_idx_file_msg \
156 "Loading index file: '%s'..."
157 #define incorrect_idx_file_size_err \
158 "Incorrect size of the index file: in .ifo file, idxfilesize=%u, real file size is %u."
159 #define incorrect_ridx_file_size_err \
160 "Incorrect size of the index file: in .rifo file, ridxfilesize=%d, real file size is %ld."
161 #define empty_block_err \
162 "Index item '%s'. Data block size = 0."
163 #define incorrect_word_cnt_err \
164 "Incorrect number of words: in .ifo file, wordcount=%d, while the real word count is %d."
165 #define incorrect_syn_word_cnt_err \
166 "Incorrect number of words: in .ifo file, synwordcount=%d, while the real synwordcount is %d."
167 #define duplicate_index_item_err \
168 "Multiple index items have the same key = '%s', offset = %d, size = %d."
169 #define duplicate_syn_item_err \
170 "Multiple synonym items with the same key = '%s', index = %d."
171 #define syn_file_exist_msg \
172 ".syn file exists but there is no \"synwordcount=\" entry in .ifo file."
173 #define syn_file_no_found_msg \
174 "Unable to find synonyms file '%s'. Error: %s."
175 #define loading_syn_file_msg \
176 "Loading synonyms file: '%s'..."
177 #define wrong_index_err \
178 "Index item '%s'. Wrong index of entry in the index file: %d."
179 #define load_syn_file_failed_err \
180 "Loading synonyms file failed: '%s'."
181 #define dict_file_not_found_err \
182 "Dictionary file does not exist: '%s'. Error: %s."
183 #define loading_dict_file_err \
184 "Loading dictionary file: '%s'..."
185 #define open_dict_file_failed_err \
186 "Unable open dictionary file '%s'. Error: %s."
187 #define record_out_of_file_err \
188 "Index item '%s'. Incorrect size, offset parameters. Referenced data block is outside dictionary file."
189 #define overlapping_data_blocks_msg \
190 "Index item '%s' and index item '%s' refer to overlapping but not equal regions (offset, size): " \
191 "(%u, %u) and (%u, %u)."
192 #define unreferenced_data_blocks_msg \
193 "Dictionary contains unreferenced data blocks (offset, size):"
194 #define rdb_unreferenced_data_blocks_msg \
195 "Resource database contains unreferenced data blocks (offset, size):"
196 #define data_block_no_fields_err \
197 "Index item '%s'. No fields were extracted."
198 #define resource_not_found_msg \
199 "Index item '%s'. Type id '%c'. The field refers to resource '%s', that is not found in resource storage."
200 #define resource_invalid_format_empty_line_msg \
201 "Index item '%s'. Type id '%c'. Invalid field format. Empty resource line."
202 #define resource_invalid_format_colon_msg \
203 "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. ':' is not found."
204 #define resource_invalid_format_type_blank_msg \
205 "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Type is blank."
206 #define resource_invalid_format_key_blank_msg \
207 "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key is blank."
208 #define resource_invalid_format_unknown_type_msg \
209 "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Unknown type."
210 #define resource_invalid_format_back_spash_msg \
211 "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key contains '\\' char."
212 #define resource_resource_nof_found_msg \
213 "Index item '%s'. Type id '%c'. Line '%s'. The field refers to resource '%s', that is not found in resource storage."
214 #define resource_empty_list_msg \
215 "Index item '%s'. Type id '%c'. Empty resource list."
216 #define two_index_files_msg \
217 "Two index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
218 #define two_dict_files_msg \
219 "Two dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
220 #define rdb_filecnt_zero_err \
221 "Resource database '%s'. No files. filecount = 0."
222 #define rdb_ridxfilesize_zero_err \
223 "Resource database '%s'. Empty index file size. ridxfilesize = 0."
224 #define rdb_invalid_file_name_format_back_spash_err \
225 "Index item '%s'. Found '\\' character. '/' must be used as directory separator."
226 #define rdb_invalid_file_name_format_abs_path_err \
227 "Index item '%s'. File name must not start with directory separator '/'."
228 #define rdb_invalid_file_name_format_empty_dir_err \
229 "Index item '%s'. Empty directory in file path: '//'."
230 #define rdb_incorrect_file_cnt \
231 "Incorrect number of files: in .rifo file, filecount=%d, while the real file count is %d."
232 #define rdb_dict_file_not_found_err \
233 "Unable to find resource dictionary file: '%s'. Error: %s."
234 #define rdb_loading_ridx_file_msg \
235 "Loading resource index file: '%s'..."
236 #define rdb_loading_dict_file_msg \
237 "Loading resource dictionary file: '%s'..."
238 #define rdb_loaded_db_msg \
239 "Resource storage loaded. Type - database."
240 #define rdb_load_db_failed_msg \
241 "Resource storage load failed. Type - database."
242 #define rdb_loaded_files_msg \
243 "Resource storage loaded. Type - files."
244 #define rdb_load_files_failed_msg \
245 "Resource storage load failed. Type - files."
246 #define rdb_two_index_files_msg \
247 "Two resource index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
248 #define rdb_two_dict_files_msg \
249 "Two resource dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
250
251 #define fixed_ignore_field_msg \
252 "The problem was fixed. Ignore the field."
253 #define duplicate_file_name \
254 "Multiple index items with the same file name: '%s'."
255 #define fixed_accept_unknown_field_msg \
256 "The problem was fixed. Accept unknown field type."
257 #define fixed_ignore_resource_line_msg \
258 "The problem was fixed. Ignore the resource line."
259 #define fixed_ignore_file_tail_msg \
260 "The problem was fixed. Ignore the tail of the file."
261 #define fixed_ignore_syn_file_msg \
262 "The problem was fixed. Ignore the .syn file."
263 #define fixed_ignore_word_msg \
264 "The problem was fixed. Ignore the key."
265 #define fixed_drop_invalid_char_msg \
266 "The problem was fixed. Dropping invalid chars."
267 #define fixed_word_truncated_msg \
268 "The problem was fixed. The key is truncated."
269 #define fixed_words_reordered_msg \
270 "The problem was fixed. Key will be reordered."
271 #define fixed_process_syn_file_msg \
272 "The problem was fixed. Process the .syn file."
273 #define fixed_data_block_size_change_msg \
274 "The problem was fixed. Changed size of the data block."
275 #define fixed_change_field_size_msg \
276 "The problem was fixed. Change field size."
277 #define fixed_field_take_longest_str_msg \
278 "The problem was fixed. Take the longest string."
279 #define fixed_field_take_zero_term_str_msg \
280 "The problem was fixed. Take a zero-terminated string."
281 #define fixed_trim_spaces \
282 "The problem was fixed. Leading and trailing spaces trimmed."
283 #define fixed_utf8_drop_invalid_char_msg \
284 "The problem was fixed. Dropping invalid UTF-8 characters."
285
286 #endif
287
288