1 /*
2  * Copyright 2011 kubtek <kubtek@mail.com>
3  *
4  * This file is part of StarDict.
5  *
6  * StarDict is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * StarDict is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef _LIBSTARDICTVERIFY_H_
21 #define _LIBSTARDICTVERIFY_H_
22 
23 #include <vector>
24 #include <algorithm>
25 #include "libcommon.h"
26 
27 #ifdef _WIN32
28 #  ifdef min
29 #    undef min
30 #  endif
31 #  ifdef max
32 #    undef max
33 #  endif
34 #endif
35 
36 enum VerifResult {
37 	VERIF_RESULT_OK, // no error
38 	VERIF_RESULT_NOTE, // minor issue, save to ignore (for example, trailing spaces in key word)
39 	VERIF_RESULT_WARNING, // important issue, maybe ignored (double keys in index, referring to the same data)
40 	VERIF_RESULT_CRITICAL, // may be fixed, but cannot be ignored (for example, index entries are out of order)
41 	VERIF_RESULT_FATAL // cannot be fixed (for example, .idx file is missing)
42 };
43 
44 extern VerifResult stardict_verify(const char *ifofilename);
45 
46 struct region_t {
47 	guint32 offset;
48 	guint32 size;
49 };
50 
51 /* combine two verification results = the most serious error */
52 inline
combine_result(VerifResult a,VerifResult b)53 VerifResult combine_result(VerifResult a, VerifResult b)
54 {
55 	return std::max(a, b);
56 }
57 
58 template <class item_t>
verify_data_blocks_overlapping(std::vector<item_t * > & sort_index,std::vector<std::pair<size_t,size_t>> & overlapping_blocks)59 void verify_data_blocks_overlapping(std::vector<item_t*>& sort_index,
60 	std::vector<std::pair<size_t, size_t> >& overlapping_blocks)
61 {
62 	for(size_t i=0; i<sort_index.size(); ++i) {
63 		for(size_t j=i+1; j<sort_index.size()
64 			&& sort_index[i]->offset + sort_index[i]->size > sort_index[j]->offset; ++j) {
65 			if(sort_index[i]->offset == sort_index[j]->offset
66 				&& sort_index[i]->size == sort_index[j]->size)
67 				continue;
68 			if(sort_index[j]->size == 0)
69 				continue;
70 			overlapping_blocks.push_back(std::pair<size_t, size_t>(i, j));
71 		}
72 	}
73 }
74 
75 template <class item_t>
verify_unused_regions(std::vector<item_t * > & sort_index,std::vector<region_t> & unused_regions,guint32 filesize)76 void verify_unused_regions(std::vector<item_t*>& sort_index,
77 		std::vector<region_t>& unused_regions, guint32 filesize)
78 {
79 	region_t region;
80 	guint32 low_boundary=0;
81 	for(size_t i=0; i<sort_index.size(); ++i) {
82 		const guint32 l_left = sort_index[i]->offset;
83 		const guint32 l_right = sort_index[i]->offset + sort_index[i]->size;
84 		if(l_left < low_boundary) {
85 			if(l_right > low_boundary)
86 				low_boundary = l_right;
87 		} if(l_left == low_boundary) {
88 			low_boundary = l_right;
89 		} else { // gap found
90 			region.offset = low_boundary;
91 			region.size = l_left - low_boundary;
92 			unused_regions.push_back(region);
93 			low_boundary = l_right;
94 		}
95 	}
96 	if(low_boundary < filesize) {
97 		region.offset = low_boundary;
98 		region.size = filesize - low_boundary;
99 		unused_regions.push_back(region);
100 	}
101 }
102 
103 #define index_file_truncated_err \
104 	"Index file is truncated, last record is truncated."
105 #define incorrect_data_block_size_err \
106 	"Index item '%s'. Fields do not fit into the data block, incorrect data block size."
107 #define empty_field_err \
108 	"Index item '%s'. Empty field in definition data block. Type ID '%c'."
109 #define invalid_utf8_field_err \
110 	"Index item '%s'. Invalid field. Type id = '%c'. Invalid utf8 string: '''\n%s\n'''"
111 #define invalid_utf8_index_item_err \
112 	"Index item '%s'. Invalid field. Invalid utf8 string: '''\n%s\n'''"
113 #define invalid_field_content_err \
114 	"Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''"
115 #define invalid_chars_in_textual_data_msg \
116 	"The text contains either invalid Unicode characters " \
117 	"or Unicode characters not suitable for textual data (mainly control characters). " \
118 	"The following characters are prohibited: %s."
119 #define invalid_field_content_chars_err \
120 	"Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''\n"\
121 	invalid_chars_in_textual_data_msg
122 #define syn_file_truncated_err \
123 	"Synonyms file is truncated, last record is truncated."
124 #define unknown_type_id_err \
125 	"Index item '%s'. Unknown type identifier '%c'."
126 #define empty_word_err \
127 	"Blank key in index."
128 #define empty_file_name_err \
129 	"Blank file name in index."
130 #define long_word_err \
131 	"Index item '%s'. Key is too long. Maximum allowed length: %d, key length: %d."
132 #define word_begin_space_err \
133 	"Index item '%s'. Key begins with a space character."
134 #define word_end_space_err \
135 	"Index item '%s'. Key ends with a space character."
136 #define word_forbidden_chars_err \
137 	"Index item '''%s'''\nKey contains forbidden characters."
138 #define word_invalid_utf8_err \
139 	"Index item '%s'. Invalid utf8 string."
140 #define word_invalid_char_value_err \
141 	"Index item '%s'. Invalid item name.\n" \
142 	invalid_chars_in_textual_data_msg
143 #define wrong_word_order_err \
144 	"Wrong key order, first key = '%s', second key = '%s'."
145 #define wrong_file_order_err \
146 	"Wrong file order, first file name = '%s', second file name = '%s'."
147 #define fields_extraction_faild_err \
148 	"Index item '%s'. Extraction of the fields failed."
149 #define unsupported_file_type_err \
150 	"Unsupported file type. File must have 'ifo' extension. File: '%s'."
151 #define dictionary_no_loaded_err \
152 	"Dictionary is not loaded."
153 #define file_not_found_idx_err \
154 	"Unable to find index file: '%s'. Error: %s."
155 #define loading_idx_file_msg \
156 	"Loading index file: '%s'..."
157 #define incorrect_idx_file_size_err \
158 	"Incorrect size of the index file: in .ifo file, idxfilesize=%u, real file size is %u."
159 #define incorrect_ridx_file_size_err \
160 	"Incorrect size of the index file: in .rifo file, ridxfilesize=%d, real file size is %ld."
161 #define empty_block_err \
162 	"Index item '%s'. Data block size = 0."
163 #define incorrect_word_cnt_err \
164 	"Incorrect number of words: in .ifo file, wordcount=%d, while the real word count is %d."
165 #define incorrect_syn_word_cnt_err \
166 	"Incorrect number of words: in .ifo file, synwordcount=%d, while the real synwordcount is %d."
167 #define duplicate_index_item_err \
168 	"Multiple index items have the same key = '%s', offset = %d, size = %d."
169 #define duplicate_syn_item_err \
170 	"Multiple synonym items with the same key = '%s', index = %d."
171 #define syn_file_exist_msg \
172 	".syn file exists but there is no \"synwordcount=\" entry in .ifo file."
173 #define syn_file_no_found_msg \
174 	"Unable to find synonyms file '%s'. Error: %s."
175 #define loading_syn_file_msg \
176 	"Loading synonyms file: '%s'..."
177 #define wrong_index_err \
178 	"Index item '%s'. Wrong index of entry in the index file: %d."
179 #define load_syn_file_failed_err \
180 	"Loading synonyms file failed: '%s'."
181 #define dict_file_not_found_err \
182 	"Dictionary file does not exist: '%s'. Error: %s."
183 #define loading_dict_file_err \
184 	"Loading dictionary file: '%s'..."
185 #define open_dict_file_failed_err \
186 	"Unable open dictionary file '%s'. Error: %s."
187 #define record_out_of_file_err \
188 	"Index item '%s'. Incorrect size, offset parameters. Referenced data block is outside dictionary file."
189 #define overlapping_data_blocks_msg \
190 	"Index item '%s' and index item '%s' refer to overlapping but not equal regions (offset, size): " \
191 	"(%u, %u) and (%u, %u)."
192 #define unreferenced_data_blocks_msg \
193 	"Dictionary contains unreferenced data blocks (offset, size):"
194 #define rdb_unreferenced_data_blocks_msg \
195 	"Resource database contains unreferenced data blocks (offset, size):"
196 #define data_block_no_fields_err \
197 	"Index item '%s'. No fields were extracted."
198 #define resource_not_found_msg \
199 	"Index item '%s'. Type id '%c'. The field refers to resource '%s', that is not found in resource storage."
200 #define resource_invalid_format_empty_line_msg \
201 	"Index item '%s'. Type id '%c'. Invalid field format. Empty resource line."
202 #define resource_invalid_format_colon_msg \
203 	"Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. ':' is not found."
204 #define resource_invalid_format_type_blank_msg \
205 	"Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Type is blank."
206 #define resource_invalid_format_key_blank_msg \
207 	"Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key is blank."
208 #define resource_invalid_format_unknown_type_msg \
209 	"Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Unknown type."
210 #define resource_invalid_format_back_spash_msg \
211 	"Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key contains '\\' char."
212 #define resource_resource_nof_found_msg \
213 	"Index item '%s'. Type id '%c'. Line '%s'. The field refers to resource '%s', that is not found in resource storage."
214 #define resource_empty_list_msg \
215 	"Index item '%s'. Type id '%c'. Empty resource list."
216 #define two_index_files_msg \
217 	"Two index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
218 #define two_dict_files_msg \
219 	"Two dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
220 #define rdb_filecnt_zero_err \
221 	"Resource database '%s'. No files. filecount = 0."
222 #define rdb_ridxfilesize_zero_err \
223 	"Resource database '%s'. Empty index file size. ridxfilesize = 0."
224 #define rdb_invalid_file_name_format_back_spash_err \
225 	"Index item '%s'. Found '\\' character. '/' must be used as directory separator."
226 #define rdb_invalid_file_name_format_abs_path_err \
227 	"Index item '%s'. File name must not start with directory separator '/'."
228 #define rdb_invalid_file_name_format_empty_dir_err \
229 	"Index item '%s'. Empty directory in file path: '//'."
230 #define rdb_incorrect_file_cnt \
231 	"Incorrect number of files: in .rifo file, filecount=%d, while the real file count is %d."
232 #define rdb_dict_file_not_found_err \
233 	"Unable to find resource dictionary file: '%s'. Error: %s."
234 #define rdb_loading_ridx_file_msg \
235 	"Loading resource index file: '%s'..."
236 #define rdb_loading_dict_file_msg \
237 	"Loading resource dictionary file: '%s'..."
238 #define rdb_loaded_db_msg \
239 	"Resource storage loaded. Type - database."
240 #define rdb_load_db_failed_msg \
241 	"Resource storage load failed. Type - database."
242 #define rdb_loaded_files_msg \
243 	"Resource storage loaded. Type - files."
244 #define rdb_load_files_failed_msg \
245 	"Resource storage load failed. Type - files."
246 #define rdb_two_index_files_msg \
247 	"Two resource index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
248 #define rdb_two_dict_files_msg \
249 	"Two resource dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version."
250 
251 #define fixed_ignore_field_msg \
252 	"The problem was fixed. Ignore the field."
253 #define duplicate_file_name \
254 	"Multiple index items with the same file name: '%s'."
255 #define fixed_accept_unknown_field_msg \
256 	"The problem was fixed. Accept unknown field type."
257 #define fixed_ignore_resource_line_msg \
258 	"The problem was fixed. Ignore the resource line."
259 #define fixed_ignore_file_tail_msg \
260 	"The problem was fixed. Ignore the tail of the file."
261 #define fixed_ignore_syn_file_msg \
262 	"The problem was fixed. Ignore the .syn file."
263 #define fixed_ignore_word_msg \
264 	"The problem was fixed. Ignore the key."
265 #define fixed_drop_invalid_char_msg \
266 	"The problem was fixed. Dropping invalid chars."
267 #define fixed_word_truncated_msg \
268 	"The problem was fixed. The key is truncated."
269 #define fixed_words_reordered_msg \
270 	"The problem was fixed. Key will be reordered."
271 #define fixed_process_syn_file_msg \
272 	"The problem was fixed. Process the .syn file."
273 #define fixed_data_block_size_change_msg \
274 	"The problem was fixed. Changed size of the data block."
275 #define fixed_change_field_size_msg \
276 	"The problem was fixed. Change field size."
277 #define fixed_field_take_longest_str_msg \
278 	"The problem was fixed. Take the longest string."
279 #define fixed_field_take_zero_term_str_msg \
280 	"The problem was fixed. Take a zero-terminated string."
281 #define fixed_trim_spaces \
282 	"The problem was fixed. Leading and trailing spaces trimmed."
283 #define fixed_utf8_drop_invalid_char_msg \
284 	"The problem was fixed. Dropping invalid UTF-8 characters."
285 
286 #endif
287 
288