1 /*
2  * Copyright 2011 kubtek <kubtek@mail.com>
3  *
4  * This file is part of StarDict.
5  *
6  * StarDict is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * StarDict is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 /*
21  * Implementation of class to work with standard StarDict's dictionaries
22  * lookup word, get articles and so on.
23  *
24  * Notice: read doc/StarDictFileFormat for the dictionary
25  * file's format information!
26  */
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30 
31 #include <cstring>
32 #include <glib.h>
33 #include <glib/gi18n.h>
34 #include <glib/gstdio.h>
35 #include <stdlib.h>
36 #include <algorithm>
37 #include <memory>
38 
39 #include "ifo_file.h"
40 #include "edit-distance.h"
41 //#include "kmp.h"
42 #include "mapfile.h"
43 #include "iappdirs.h"
44 
45 #include "stddict.h"
46 #include "utils.h"
47 
stardict_collate(const gchar * str1,const gchar * str2,CollateFunctions func)48 static gint stardict_collate(const gchar *str1, const gchar *str2, CollateFunctions func)
49 {
50 	gint x = utf8_collate(str1, str2, func);
51 	if (x == 0)
52 		return strcmp(str1, str2);
53 	else
54 		return x;
55 }
56 
stardict_server_collate(const gchar * str1,const gchar * str2,CollationLevelType CollationLevel,CollateFunctions func,int servercollatefunc)57 gint stardict_server_collate(const gchar *str1, const gchar *str2, CollationLevelType CollationLevel, CollateFunctions func, int servercollatefunc)
58 {
59 	if (CollationLevel == CollationLevel_NONE)
60 		return stardict_strcmp(str1, str2);
61 	if (CollationLevel == CollationLevel_SINGLE)
62 		return stardict_collate(str1, str2, func);
63 	if (servercollatefunc == 0)
64 		return stardict_strcmp(str1, str2);
65 	return stardict_collate(str1, str2, (CollateFunctions)(servercollatefunc-1));
66 }
67 
68 // not perfect case-insensitive comparison of strings
stardict_strcasecmp(const gchar * s1,const gchar * s2)69 static gint stardict_strcasecmp(const gchar *s1, const gchar *s2)
70 {
71 	gchar *sci1 = g_utf8_casefold(s1, -1);
72 	gchar *sci2 = g_utf8_casefold(s2, -1);
73 	gint res = g_utf8_collate(sci1, sci2);
74 	g_free(sci1);
75 	g_free(sci2);
76 	return res;
77 }
78 
stardict_casecmp(const gchar * s1,const gchar * s2,CollationLevelType CollationLevel,CollateFunctions func,int servercollatefunc)79 static gint stardict_casecmp(const gchar *s1, const gchar *s2, CollationLevelType CollationLevel, CollateFunctions func, int servercollatefunc)
80 {
81 	if (CollationLevel == CollationLevel_NONE)
82 		return stardict_strcasecmp(s1, s2);
83 	if (CollationLevel == CollationLevel_SINGLE)
84 		return utf8_collate(s1, s2, func);
85 	if (servercollatefunc == 0)
86 		return stardict_strcasecmp(s1, s2);
87 	return utf8_collate(s1, s2, (CollateFunctions)(servercollatefunc-1));
88 }
89 
90 /* return the length of the common prefix of two strings in characters
91  * comparison is case-insensitive */
prefix_match(const gchar * s1,const gchar * s2)92 static inline gint prefix_match(const gchar *s1, const gchar *s2)
93 {
94     if(!s1 || !s2)
95         return 0;
96     gint ret=-1;
97     gunichar u1, u2;
98     do {
99         u1 = g_utf8_get_char(s1);
100         u2 = g_utf8_get_char(s2);
101         s1 = g_utf8_next_char(s1);
102         s2 = g_utf8_next_char(s2);
103         ret++;
104     } while (u1 && g_unichar_tolower(u1) == g_unichar_tolower(u2));
105     return ret;
106 }
107 
108 /* check that string str has length allowed for index word
109  * strlen(str) < MAX_INDEX_KEY_SIZE
110  * This function does not read more than MAX_INDEX_KEY_SIZE or buf_size chars,
111  * which one is smaller.
112  * return value:
113  * true - ok,
114  * false - string length exceeded. */
check_key_str_len(const gchar * str,size_t buf_size)115 static bool check_key_str_len(const gchar* str, size_t buf_size)
116 {
117 	size_t max = MAX_INDEX_KEY_SIZE;
118 	if(buf_size < max)
119 		max = buf_size;
120 	for(size_t i = 0; i < max; ++i)
121 		if(!str[i])
122 			return true;
123 	return false;
124 }
125 
bIsVowel(gchar inputchar)126 static inline bool bIsVowel(gchar inputchar)
127 {
128   gchar ch = g_ascii_toupper(inputchar);
129   return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );
130 }
131 
132 class offset_index : public index_file {
133 public:
134 	offset_index();
135 	~offset_index();
136 	bool load(const std::string& url, gulong wc, gulong fsize,
137 		  bool CreateCacheFile, CollationLevelType CollationLevel,
138 		  CollateFunctions _CollateFunction, show_progress_t *sp);
139 	void get_data(glong idx);
140 	const gchar *get_key_and_data(glong idx);
141 private:
142 	const gchar *get_key(glong idx);
143 	bool lookup(const char *str, glong &idx, glong &idx_suggest);
144 
145 	static const gint ENTR_PER_PAGE=32;
146 
147 	/* oft_file.get_wordoffset(page_num) - offset of the first element on the page
148 	 * number page_num. 0<= page_num <= npages-2
149 	 * oft_file.get_wordoffset(npages-1) - offset of the next to the last element
150 	 * in the index file
151 	 * oft_file.get_wordoffset(page_num+1) - oft_file.get_wordoffset(page_num)
152 	 * - size of data on the page number page_num, in bytes. */
153 	cache_file oft_file;
154 	FILE *idxfile;
155 	/* number of pages = ((wordcount-1)/ENTR_PER_PAGE) + 2
156 	 * The page number npages-2 always contains at least one element.
157 	 * It may contain from 1 to ENTR_PER_PAGE elements.
158 	 * To be exact it contains nentr elements, that may be calculated as follows:
159 	 * nentr = wordcount%ENTR_PER_PAGE;
160 	 * if(nentr == 0)
161 	 *   nentr = ENTR_PER_PAGE;
162 	 * The page number npages-1 (the last) is always empty. */
163 	gulong npages;
164 
165 	// The length of "word_str" should be less than MAX_INDEX_KEY_SIZE.
166 	// See doc/StarDictFileFormat.
167 	gchar wordentry_buf[MAX_INDEX_KEY_SIZE+sizeof(guint32)*2];
168 	struct index_entry {
169 		glong idx; // page number
170 		std::string keystr;
assignoffset_index::index_entry171 		void assign(glong i, const std::string& str) {
172 			idx=i;
173 			keystr.assign(str);
174 		}
175 	};
176 	/* first - first word on the first page - first word in the index
177 	 * last - first word on the pre-last page (last page addressing real data)
178 	 * middle - first word on the middle page
179 	 * read_last - last word in the index */
180 	index_entry first, last, middle, real_last;
181 
182 	struct page_entry {
183 		gchar *keystr;
184 		guint32 off, size;
185 	};
186 	std::vector<gchar> page_data;
187 	struct page_t {
188 		glong idx;
189 		page_entry entries[ENTR_PER_PAGE];
190 
page_toffset_index::page_t191 		page_t(): idx(-1) {}
192 		void fill(gchar *data, gint nent, glong idx_);
193 	} page;
194 	gulong load_page(glong page_idx);
195 	const gchar *read_first_on_page_key(glong page_idx);
196 	const gchar *get_first_on_page_key(glong page_idx);
197 };
198 
199 /* class for compressed index (file ends with ".gz") */
200 class compressed_index : public index_file {
201 public:
202 	compressed_index();
203 	~compressed_index();
204 	bool load(const std::string& url, gulong wc, gulong fsize,
205 		  bool CreateCacheFile, CollationLevelType CollationLevel,
206 		  CollateFunctions _CollateFunction, show_progress_t *sp);
207 	void get_data(glong idx);
208 	const gchar *get_key_and_data(glong idx);
209 private:
210 	const gchar *get_key(glong idx);
211 	bool lookup(const char *str, glong &idx, glong &idx_suggest);
212 
213 	/* whole uncompressed index file in memory */
214 	gchar *idxdatabuf;
215 	/* pointers to the words-keys in idxdatabuf. Each word is '\0'-terminated and
216 	 * followed by data offset and size. See ".idx" file format.
217 	 * wordlist.size() == number of words + 1 */
218 	std::vector<gchar *> wordlist;
219 };
220 
offset_index()221 offset_index::offset_index() : oft_file(CacheFileType_oft, COLLATE_FUNC_NONE)
222 {
223 	idxfile = NULL;
224 	npages = 0;
225 }
226 
~offset_index()227 offset_index::~offset_index()
228 {
229 	if (idxfile)
230 		fclose(idxfile);
231 }
232 
fill(gchar * data,gint nent,glong idx_)233 void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
234 {
235 	idx=idx_;
236 	gchar *p=data;
237 	glong len;
238 	for (gint i=0; i<nent; ++i) {
239 		entries[i].keystr=p;
240 		len=strlen(p);
241 		p+=len+1;
242 		entries[i].off=g_ntohl(get_uint32(p));
243 		p+=sizeof(guint32);
244 		entries[i].size=g_ntohl(get_uint32(p));
245 		p+=sizeof(guint32);
246 	}
247 }
248 
read_first_on_page_key(glong page_idx)249 inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
250 {
251 	g_assert(gulong(page_idx+1) < npages);
252 	fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
253 	guint32 page_size=oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx);
254 	gulong minsize = sizeof(wordentry_buf);
255 	if (page_size < minsize) {
256 		minsize = page_size;
257 	}
258 	size_t fread_size;
259 	fread_size = fread(wordentry_buf, minsize, 1, idxfile);
260 	if (fread_size != 1) {
261 		g_print("fread error!\n");
262 	}
263 	if(!check_key_str_len(wordentry_buf, minsize)) {
264 		wordentry_buf[minsize-1] = '\0';
265 		g_critical("Index key length exceeds allowed limit. Key: %s, "
266 			"max length = %i", wordentry_buf, MAX_INDEX_KEY_SIZE - 1);
267 		return NULL;
268 	}
269 	return wordentry_buf;
270 }
271 
get_first_on_page_key(glong page_idx)272 inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
273 {
274 	if (page_idx<middle.idx) {
275 		if (page_idx==first.idx)
276 			return first.keystr.c_str();
277 		return read_first_on_page_key(page_idx);
278 	} else if (page_idx>middle.idx) {
279 		if (page_idx==last.idx)
280 			return last.keystr.c_str();
281 		return read_first_on_page_key(page_idx);
282 	} else
283 		return middle.keystr.c_str();
284 }
285 
cache_file(CacheFileType _cachefiletype,CollateFunctions _cltfunc)286 cache_file::cache_file(CacheFileType _cachefiletype, CollateFunctions _cltfunc)
287 {
288 	wordoffset = NULL;
289 	npages = 0;
290 	mf = NULL;
291 	cachefiletype = _cachefiletype;
292 	cltfunc = _cltfunc;
293 }
294 
295 
~cache_file()296 cache_file::~cache_file()
297 {
298 	if (mf)
299 		delete mf;
300 	else
301 		g_free(wordoffset);
302 }
303 
304 #define OFFSETFILE_MAGIC_DATA "StarDict's oft file\nversion=2.4.8\n"
305 #define COLLATIONFILE_MAGIC_DATA "StarDict's clt file\nversion=2.4.8\n"
306 
find_and_load_cache_file(const gchar * filename,const std::string & url,const std::string & saveurl,glong filedatasize,int next) const307 MapFile* cache_file::find_and_load_cache_file(const gchar *filename,
308 	const std::string &url, const std::string &saveurl,
309 	glong filedatasize, int next) const
310 {
311 	stardict_stat_t cachestat;
312 	if (g_stat(filename, &cachestat)!=0)
313 		return NULL;
314 	std::auto_ptr<MapFile> mf(new MapFile);
315 	if (!mf->open(filename, cachestat.st_size))
316 		return NULL;
317 	guint32  word_off_size = (get_uint32(mf->begin()) + 1) * sizeof(guint32);
318 	if (word_off_size >= static_cast<guint32>(cachestat.st_size) ||
319 	    *(mf->begin() + cachestat.st_size - 1) != '\0')
320 		return NULL;
321 
322 	gchar *p = mf->begin() + word_off_size;
323 	gboolean has_prefix;
324 	if (cachefiletype == CacheFileType_oft)
325 		has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
326 	else
327 		has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
328 	if (!has_prefix)
329 		return NULL;
330 	if (cachefiletype == CacheFileType_oft)
331 		p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
332 	else
333 		p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
334 	gchar *p2;
335 	p2 = strstr(p, "\nurl=");
336 	if (!p2)
337 		return NULL;
338 	p2+=sizeof("\nurl=")-1;
339 	gchar *p3;
340 	p3 = strchr(p2, '\n');
341 	if (!p3)
342 		return NULL;
343 	std::string tmpstr(p2, p3-p2);
344 #ifdef _WIN32
345 	tmpstr = abs_path_to_data_dir(tmpstr);
346 #endif
347 	if (is_equal_paths(saveurl, tmpstr)) {
348 		if (cachefiletype == CacheFileType_clt) {
349 			p2 = strstr(p, "\nfunc=");
350 			if (!p2)
351 				return NULL;
352 			p2 += sizeof("\nfunc=")-1;
353 			p3 = strchr(p2, '\n');
354 			if (!p3)
355 				return NULL;
356 			tmpstr.assign(p2, p3-p2);
357 			if (atoi(tmpstr.c_str())!=cltfunc)
358 				return NULL;
359 		}
360 
361 		if (static_cast<gulong>(cachestat.st_size)
362 			!= static_cast<gulong>(filedatasize + sizeof(guint32) + strlen(mf->begin() + word_off_size) +1))
363 			return NULL;
364 		stardict_stat_t idxstat;
365 		if (g_stat(url.c_str(), &idxstat)!=0)
366 			return NULL;
367 		if (cachestat.st_mtime<idxstat.st_mtime)
368 			return NULL;
369 		//g_print("Using map file: %s\n", filename);
370 		return mf.release();
371 	}
372 	mf.reset();
373 	glib::CharStr basename(g_path_get_basename(saveurl.c_str()));
374 	p = strrchr(get_impl(basename), '.');
375 	if (!p)
376 		return NULL;
377 	*p='\0';
378 	gchar *extendname = p+1;
379 	glib::CharStr dirname(g_path_get_dirname(filename));
380 	glib::CharStr nextfilename(get_next_filename(get_impl(dirname),
381 		get_impl(basename), next, extendname));
382 	return find_and_load_cache_file(get_impl(nextfilename), url, saveurl, filedatasize, next+1);
383 }
384 
load_cache(const std::string & url,const std::string & saveurl,glong filedatasize)385 bool cache_file::load_cache(const std::string& url, const std::string& saveurl,
386 	glong filedatasize)
387 {
388 	g_assert(!wordoffset);
389 	std::string oftfilename;
390 	build_primary_cache_filename(saveurl, oftfilename);
391 	/* First search the file in the dictionary directory, then in the cache
392 	 * directory. */
393 	for (int i=0; i<2; i++) {
394 		if (i==1) {
395 			if (!build_primary_cache_filename_in_user_cache(saveurl, oftfilename, false))
396 				break;
397 		}
398 		mf = find_and_load_cache_file(oftfilename.c_str(), url, saveurl, filedatasize, 2);
399 		if (!mf)
400 			continue;
401 		wordoffset = reinterpret_cast<guint32 *>(mf->begin()) + 1;
402 		npages = get_uint32(mf->begin());
403 		return true;
404 	}
405 	return false;
406 }
407 
build_primary_cache_filename_in_user_cache(const std::string & url,std::string & cachefilename,bool create) const408 bool cache_file::build_primary_cache_filename_in_user_cache(const std::string& url, std::string &cachefilename, bool create) const
409 {
410 	const std::string cache_dir(app_dirs->get_user_cache_dir());
411 	if (create) {
412 		if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
413 			if (-1 == g_mkdir_with_parents(cache_dir.c_str(), 0700))
414 				return false;
415 		}
416 	}
417 	if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR))
418 		return false;
419 
420 	gchar *base=g_path_get_basename(url.c_str());
421 	build_primary_cache_filename(build_path(cache_dir, base), cachefilename);
422 	g_free(base);
423 	return true;
424 }
425 
find_and_open_for_overwrite_cache_file(const gchar * filename,const std::string & saveurl,int next,std::string & cfilename) const426 FILE* cache_file::find_and_open_for_overwrite_cache_file(const gchar *filename, const std::string &saveurl, int next, std::string &cfilename) const
427 {
428 	cfilename = filename;
429 	stardict_stat_t oftstat;
430 	if (g_stat(filename, &oftstat)!=0) {
431 		return fopen(filename, "wb");
432 	}
433 	MapFile mf;
434 	if (!mf.open(filename, oftstat.st_size)) {
435 		return fopen(filename, "wb");
436 	}
437 	guint32  word_off_size = (get_uint32(mf.begin()) + 1) * sizeof(guint32);
438 	if (word_off_size >= static_cast<guint32>(oftstat.st_size) ||
439 	    *(mf.begin() + oftstat.st_size - 1) != '\0')
440 		return fopen(filename, "wb");
441 
442 	gchar *p = mf.begin() + word_off_size;
443 	bool has_prefix;
444 	if (cachefiletype == CacheFileType_oft)
445 		has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
446 	else
447 		has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
448 	if (!has_prefix) {
449 		return fopen(filename, "wb");
450 	}
451 	if (cachefiletype == CacheFileType_oft)
452 		p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
453 	else
454 		p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
455 	gchar *p2;
456 	p2 = strstr(p, "\nurl=");
457 	if (!p2) {
458 		return fopen(filename, "wb");
459 	}
460 	p2+=sizeof("\nurl=")-1;
461 	gchar *p3;
462 	p3 = strchr(p2, '\n');
463 	if (!p3) {
464 		return fopen(filename, "wb");
465 	}
466 	std::string tmpstr(p2, p3-p2);
467 #ifdef _WIN32
468 	tmpstr = abs_path_to_data_dir(tmpstr);
469 #endif
470 	if (is_equal_paths(saveurl, tmpstr)) {
471 		return fopen(filename, "wb");
472 	}
473 	mf.close();
474 	glib::CharStr basename(g_path_get_basename(saveurl.c_str()));
475 	p = strrchr(get_impl(basename), '.');
476 	if (!p)
477 		return NULL;
478 	*p='\0';
479 	gchar *extendname = p+1;
480 	glib::CharStr dirname(g_path_get_dirname(filename));
481 	glib::CharStr nextfilename(get_next_filename(get_impl(dirname),
482 		get_impl(basename), next, extendname));
483 	return find_and_open_for_overwrite_cache_file(get_impl(nextfilename), saveurl, next+1, cfilename);
484 }
485 
save_cache(const std::string & saveurl) const486 bool cache_file::save_cache(const std::string& saveurl) const
487 {
488 	std::string oftfilename;
489 	build_primary_cache_filename(saveurl, oftfilename);
490 	for (int i=0;i<2;i++) {
491 		if (i==1) {
492 			if (!build_primary_cache_filename_in_user_cache(saveurl, oftfilename, true))
493 				break;
494 		}
495 		std::string cfilename;
496 		FILE *out= find_and_open_for_overwrite_cache_file(oftfilename.c_str(), saveurl, 2, cfilename);
497 		if (!out)
498 			continue;
499 		guint32 nentries = npages;
500 		fwrite(&nentries, sizeof(nentries), 1, out);
501 		fwrite(wordoffset, sizeof(guint32), npages, out);
502 		if (cachefiletype == CacheFileType_oft)
503 			fwrite(OFFSETFILE_MAGIC_DATA, 1, sizeof(OFFSETFILE_MAGIC_DATA)-1, out);
504 		else
505 			fwrite(COLLATIONFILE_MAGIC_DATA, 1, sizeof(COLLATIONFILE_MAGIC_DATA)-1, out);
506 		fwrite("url=", 1, sizeof("url=")-1, out);
507 #ifdef _WIN32
508 		const std::string url_rel(rel_path_to_data_dir(saveurl));
509 		fwrite(url_rel.c_str(), 1, url_rel.length(), out);
510 #else
511 		fwrite(saveurl.c_str(), 1, saveurl.length(), out);
512 #endif
513 		if (cachefiletype == CacheFileType_clt) {
514 #ifdef _MSC_VER
515 			fprintf_s(out, "\nfunc=%d", cltfunc);
516 #else
517 			fprintf(out, "\nfunc=%d", cltfunc);
518 #endif
519 		}
520 		fwrite("\n", 1, 2, out);
521 		fclose(out);
522 		g_print("Save cache file: %s\n", cfilename.c_str());
523 		return true;
524 	}
525 	return false;
526 }
527 
allocate_wordoffset(size_t _npages)528 void cache_file::allocate_wordoffset(size_t _npages)
529 {
530 	g_assert(!wordoffset);
531 	if(mf) {
532 		delete mf;
533 		mf = NULL;
534 	}
535 	wordoffset = (guint32 *)g_malloc(_npages * sizeof(guint32));
536 	npages = _npages;
537 }
538 
get_next_filename(const gchar * dirname,const gchar * basename,int num,const gchar * extendname) const539 gchar *cache_file::get_next_filename(
540 	const gchar *dirname, const gchar *basename, int num,
541 	const gchar *extendname) const
542 {
543 	if (cachefiletype == CacheFileType_oft)
544 		return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, num, extendname);
545 	else if (cachefiletype == CacheFileType_clt)
546 		return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, num, extendname);
547 	else
548 		return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, num, extendname, cltfunc);
549 }
550 
build_primary_cache_filename(const std::string & url,std::string & filename) const551 void cache_file::build_primary_cache_filename(const std::string &url,
552 	std::string &filename) const
553 {
554 	if (cachefiletype == CacheFileType_oft) {
555 		filename=url+".oft";
556 	} else if (cachefiletype == CacheFileType_clt) {
557 		filename=url+".clt";
558 	} else {
559 		gchar *func = g_strdup_printf("%d", cltfunc);
560 		filename=url+'.'+func+".clt";
561 		g_free(func);
562 	}
563 }
564 
collation_file(idxsyn_file * _idx_file,CacheFileType _cachefiletype,CollateFunctions _CollateFunction)565 collation_file::collation_file(idxsyn_file *_idx_file, CacheFileType _cachefiletype,
566 	CollateFunctions _CollateFunction)
567 : cache_file(_cachefiletype, _CollateFunction),
568 	idx_file(_idx_file)
569 {
570 	g_assert(_cachefiletype == CacheFileType_clt || _cachefiletype == CacheFileType_server_clt);
571 
572 }
573 
GetWord(glong idx)574 const gchar *collation_file::GetWord(glong idx)
575 {
576 	return idx_file->get_key(get_wordoffset(idx));
577 }
578 
GetOrigIndex(glong cltidx)579 glong collation_file::GetOrigIndex(glong cltidx)
580 {
581 	return get_wordoffset(cltidx);
582 }
583 
lookup(const char * sWord,glong & idx,glong & idx_suggest)584 bool collation_file::lookup(const char *sWord, glong &idx, glong &idx_suggest)
585 {
586 	bool bFound=false;
587 	glong iTo=idx_file->get_word_count()-1;
588 	if (stardict_collate(sWord, GetWord(0), get_CollateFunction())<0) {
589 		idx = 0;
590 		idx_suggest = 0;
591 	} else if (stardict_collate(sWord, GetWord(iTo), get_CollateFunction()) >0) {
592 		idx = INVALID_INDEX;
593 		idx_suggest = iTo;
594 	} else {
595 		glong iThisIndex=0;
596 		glong iFrom=0;
597 		gint cmpint;
598 		while (iFrom<=iTo) {
599 			iThisIndex=(iFrom+iTo)/2;
600 			cmpint = stardict_collate(sWord, GetWord(iThisIndex), get_CollateFunction());
601 			if (cmpint>0)
602 				iFrom=iThisIndex+1;
603 			else if (cmpint<0)
604 				iTo=iThisIndex-1;
605 			else {
606 				bFound=true;
607 				break;
608 			}
609 		}
610 		if (!bFound) {
611 			idx = iFrom;    //next
612 			idx_suggest = iFrom;
613 			gint best, back;
614 			best = prefix_match (sWord, GetWord(idx_suggest));
615 			for (;;) {
616 				if ((iTo=idx_suggest-1) < 0)
617 					break;
618 				back = prefix_match (sWord, GetWord(iTo));
619 				if (!back || back < best)
620 					break;
621 				best = back;
622 				idx_suggest = iTo;
623 			}
624 		} else {
625 			idx = iThisIndex;
626 			idx_suggest = iThisIndex;
627 		}
628 	}
629 	return bFound;
630 }
631 
632 struct sort_collation_index_user_data {
633 	idxsyn_file *idx_file;
634 	CollateFunctions cltfunc;
635 };
636 
sort_collation_index(gconstpointer a,gconstpointer b,gpointer user_data)637 static gint sort_collation_index(gconstpointer a, gconstpointer b, gpointer user_data)
638 {
639 	sort_collation_index_user_data *data = (sort_collation_index_user_data*)user_data;
640 	gchar *str1 = g_strdup(data->idx_file->get_key(*((guint32 *)a)));
641 	const gchar *str2 = data->idx_file->get_key(*((guint32 *)b));
642 	gint x = stardict_collate(str1, str2, data->cltfunc);
643 	g_free(str1);
644 	if (x==0)
645 		return *((guint32 *)a) - *((guint32 *)b);
646 	else
647 		return x;
648 }
649 
idxsyn_file()650 idxsyn_file::idxsyn_file()
651 :
652 	clt_file(NULL),
653 	wordcount(0)
654 {
655 	memset(clt_files, 0, sizeof(clt_files));
656 }
657 
~idxsyn_file()658 idxsyn_file::~idxsyn_file()
659 {
660 	delete clt_file;
661 	for(size_t i=0; i<COLLATE_FUNC_NUMS; ++i)
662 		delete clt_files[i];
663 }
664 
getWord(glong idx,CollationLevelType CollationLevel,int servercollatefunc)665 const gchar *idxsyn_file::getWord(glong idx, CollationLevelType CollationLevel, int servercollatefunc)
666 {
667 	if (CollationLevel == CollationLevel_NONE)
668 		return get_key(idx);
669 	if (CollationLevel == CollationLevel_SINGLE)
670 		return clt_file->GetWord(idx);
671 	if (servercollatefunc == 0)
672 		return get_key(idx);
673 	collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
674 	return clt_files[servercollatefunc-1]->GetWord(idx);
675 }
676 
Lookup(const char * str,glong & idx,glong & idx_suggest,CollationLevelType CollationLevel,int servercollatefunc)677 bool idxsyn_file::Lookup(const char *str, glong &idx, glong &idx_suggest, CollationLevelType CollationLevel, int servercollatefunc)
678 {
679 	if (CollationLevel == CollationLevel_NONE)
680 		return lookup(str, idx, idx_suggest);
681 	if (CollationLevel == CollationLevel_SINGLE)
682 		return clt_file->lookup(str, idx, idx_suggest);
683 	if (servercollatefunc == 0)
684 		return lookup(str, idx, idx_suggest);
685 	collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
686 	return clt_files[servercollatefunc-1]->lookup(str, idx, idx_suggest);
687 }
688 
collate_save_info(const std::string & _url,const std::string & _saveurl)689 void idxsyn_file::collate_save_info(const std::string& _url, const std::string& _saveurl)
690 {
691 	url = _url;
692 	saveurl = _saveurl;
693 }
694 
collate_load(CollateFunctions collf,CollationLevelType CollationLevel,show_progress_t * sp)695 void idxsyn_file::collate_load(CollateFunctions collf, CollationLevelType CollationLevel, show_progress_t *sp)
696 {
697 	g_assert(CollationLevel == CollationLevel_SINGLE || CollationLevel == CollationLevel_MULTI);
698 	if(CollationLevel == CollationLevel_SINGLE) {
699 		if(clt_file)
700 			return;
701 		clt_file = collate_load_impl(url, saveurl, collf, sp, CacheFileType_clt);
702 	} else if(CollationLevel == CollationLevel_MULTI) {
703 		if (clt_files[collf])
704 			return;
705 		clt_files[collf] = collate_load_impl(url, saveurl, collf, sp, CacheFileType_server_clt);
706 	}
707 }
708 
collate_load_impl(const std::string & _url,const std::string & _saveurl,CollateFunctions collf,show_progress_t * sp,CacheFileType CacheType)709 collation_file * idxsyn_file::collate_load_impl(
710 	const std::string& _url, const std::string& _saveurl,
711 	CollateFunctions collf, show_progress_t *sp, CacheFileType CacheType)
712 {
713 	collation_file * _clt_file = new collation_file(this, CacheType, collf);
714 	if (!_clt_file->load_cache(_url, _saveurl, wordcount*sizeof(guint32))) {
715 		if(sp)
716 			sp->notify_about_start(_("Sorting, please wait..."));
717 		_clt_file->allocate_wordoffset(wordcount);
718 		for (glong i=0; i<wordcount; i++)
719 			_clt_file->get_wordoffset(i) = i;
720 		sort_collation_index_user_data data;
721 		data.idx_file = this;
722 		data.cltfunc = collf;
723 		g_qsort_with_data(_clt_file->get_wordoffset(), wordcount, sizeof(guint32), sort_collation_index, &data);
724 		if (!_clt_file->save_cache(_saveurl))
725 			g_printerr("Cache update failed.\n");
726 	}
727 	return _clt_file;
728 }
729 
load(const std::string & url,gulong wc,gulong fsize,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)730 bool offset_index::load(const std::string& url, gulong wc, gulong fsize,
731 			bool CreateCacheFile, CollationLevelType CollationLevel,
732 			CollateFunctions _CollateFunction, show_progress_t *sp)
733 {
734 	wordcount=wc;
735 	npages=(wc-1)/ENTR_PER_PAGE+2;
736 	if (!oft_file.load_cache(url, url, npages*sizeof(guint32))) {
737 		MapFile map_file;
738 		if (!map_file.open(url.c_str(), fsize))
739 			return false;
740 		const gchar *idxdatabuffer=map_file.begin();
741 		/* oft_file.wordoffset[i] holds offset of the i-th page in the index file */
742 		oft_file.allocate_wordoffset(npages);
743 		const gchar *p1 = idxdatabuffer;
744 		gulong index_size;
745 		guint32 j=0;
746 		for (guint32 i=0; i<wc; i++) {
747 			index_size=strlen(p1) +1 + 2*sizeof(guint32);
748 			if (i % ENTR_PER_PAGE==0) {
749 				oft_file.get_wordoffset(j)=p1-idxdatabuffer;
750 				++j;
751 			}
752 			p1 += index_size;
753 		}
754 		oft_file.get_wordoffset(j)=p1-idxdatabuffer;
755 		map_file.close();
756 		if (CreateCacheFile) {
757 			if (!oft_file.save_cache(url))
758 				g_printerr("Cache update failed.\n");
759 		}
760 	}
761 
762 	if (!(idxfile = fopen(url.c_str(), "rb"))) {
763 		return false;
764 	}
765 
766 	first.assign(0, read_first_on_page_key(0));
767 	last.assign(npages-2, read_first_on_page_key(npages-2));
768 	middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
769 	real_last.assign(wc-1, get_key(wc-1));
770 
771 	if (CollationLevel == CollationLevel_NONE) {
772 	} else if (CollationLevel == CollationLevel_SINGLE) {
773 		collate_save_info(url, url);
774 		collate_load(_CollateFunction, CollationLevel_SINGLE, sp);
775 	} else if (CollationLevel == CollationLevel_MULTI) {
776 		collate_save_info(url, url);
777 	}
778 
779 	return true;
780 }
781 
load_page(glong page_idx)782 inline gulong offset_index::load_page(glong page_idx)
783 {
784 	gulong nentr=ENTR_PER_PAGE;
785 	if (page_idx==glong(npages-2))
786 		if ((nentr=wordcount%ENTR_PER_PAGE)==0)
787 			nentr=ENTR_PER_PAGE;
788 
789 
790 	if (page_idx!=page.idx) {
791 		page_data.resize(oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx));
792 		fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
793 		size_t fread_size;
794 		size_t page_data_size = page_data.size();
795 		fread_size = fread(&page_data[0], 1, page_data_size, idxfile);
796 		if (fread_size != page_data_size) {
797 			g_print("fread error!\n");
798 		}
799 		page.fill(&page_data[0], nentr, page_idx);
800 	}
801 
802 	return nentr;
803 }
804 
get_key(glong idx)805 const gchar *offset_index::get_key(glong idx)
806 {
807 	load_page(idx/ENTR_PER_PAGE);
808 	glong idx_in_page=idx%ENTR_PER_PAGE;
809 	wordentry_offset=page.entries[idx_in_page].off;
810 	wordentry_size=page.entries[idx_in_page].size;
811 
812 	return page.entries[idx_in_page].keystr;
813 }
814 
get_data(glong idx)815 void offset_index::get_data(glong idx)
816 {
817 	get_key(idx);
818 }
819 
get_key_and_data(glong idx)820 const gchar *offset_index::get_key_and_data(glong idx)
821 {
822 	return get_key(idx);
823 }
824 
825 /* Search for string str.
826  * Returns true if the string is found and false otherwise.
827  * If the string is found, idx - index of the search string.
828  * If the string is not found, idx - index of the "next" item in the index.
829  * idx == INVALID_INDEX if the search word is greater then the last word of
830  * the index.
831  * idx_suggest - index of the closest word in the index.
832  * It's always a valid index. */
lookup(const char * str,glong & idx,glong & idx_suggest)833 bool offset_index::lookup(const char *str, glong &idx, glong &idx_suggest)
834 {
835 	bool bFound=false;
836 	glong iFrom;
837 	glong iTo=npages-2;
838 	gint cmpint;
839 	glong iThisIndex;
840 	if (stardict_strcmp(str, first.keystr.c_str())<0) {
841 		idx = 0;
842 		idx_suggest = 0;
843 		return false;
844 	} else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
845 		idx = INVALID_INDEX;
846 		idx_suggest = wordcount-1;
847 		return false;
848 	} else {
849 		// find the page number where the search word might be
850 		iFrom=0;
851 		iThisIndex=0;
852 		while (iFrom<=iTo) {
853 			iThisIndex=(iFrom+iTo)/2;
854 			cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
855 			if (cmpint>0)
856 				iFrom=iThisIndex+1;
857 			else if (cmpint<0)
858 				iTo=iThisIndex-1;
859 			else {
860 				bFound=true;
861 				break;
862 			}
863 		}
864 		if (!bFound) {
865 			idx = iTo;    //prev
866 		} else {
867 			idx = iThisIndex;
868 		}
869 	}
870 	if (!bFound) {
871 		// the search word is on the page number idx if it's anywhere
872 		gulong netr=load_page(idx);
873 		iFrom=1; // Needn't search the first word anymore.
874 		iTo=netr-1;
875 		iThisIndex=0;
876 		while (iFrom<=iTo) {
877 			iThisIndex=(iFrom+iTo)/2;
878 			cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
879 			if (cmpint>0)
880 				iFrom=iThisIndex+1;
881 			else if (cmpint<0)
882 				iTo=iThisIndex-1;
883 			else {
884 				bFound=true;
885 				break;
886 			}
887 		}
888 		idx*=ENTR_PER_PAGE;
889 		if (!bFound) {
890 			idx += iFrom;    //next
891 			idx_suggest = idx;
892 			gint best, back;
893 			best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
894 			for (;;) {
895 				if ((iTo=idx_suggest-1) < 0)
896 					break;
897 				if (idx_suggest % ENTR_PER_PAGE == 0)
898 					load_page(iTo / ENTR_PER_PAGE);
899 				back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
900 				if (!back || back < best)
901 					break;
902 				best = back;
903 				idx_suggest = iTo;
904 			}
905 		} else {
906 			idx += iThisIndex;
907 			idx_suggest = idx;
908 		}
909 	} else {
910 		idx*=ENTR_PER_PAGE;
911 		idx_suggest = idx;
912 	}
913 	return bFound;
914 }
915 
compressed_index()916 compressed_index::compressed_index()
917 {
918 	idxdatabuf = NULL;
919 }
920 
~compressed_index()921 compressed_index::~compressed_index()
922 {
923 	g_free(idxdatabuf);
924 }
925 
926 /* Parameters:
927  * url - index file path, has suffix ".idx.gz".
928  * wc - number of words in the index
929  * fsize - uncompressed index size
930  * */
load(const std::string & url,gulong wc,gulong fsize,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)931 bool compressed_index::load(const std::string& url, gulong wc, gulong fsize,
932 			  bool CreateCacheFile, CollationLevelType CollationLevel,
933 			  CollateFunctions _CollateFunction, show_progress_t *sp)
934 {
935 	wordcount=wc;
936 	gzFile in = gzopen(url.c_str(), "rb");
937 	if (in == NULL)
938 		return false;
939 
940 	idxdatabuf = (gchar *)g_malloc(fsize);
941 
942 	gulong len = gzread(in, idxdatabuf, fsize);
943 	gzclose(in);
944 	if (len < 0)
945 		return false;
946 
947 	if (len != fsize)
948 		return false;
949 
950 	wordlist.resize(wc+1);
951 	gchar *p1 = idxdatabuf;
952 	guint32 i;
953 	for (i=0; i<wc; i++) {
954 		wordlist[i] = p1;
955 		p1 += strlen(p1) +1 + 2*sizeof(guint32);
956 	}
957 	/* pointer to the next to last word entry */
958 	wordlist[wc] = p1;
959 
960 	if (CollationLevel == CollationLevel_NONE) {
961 	} else {
962 		std::string saveurl = url;
963 		saveurl.erase(saveurl.length()-sizeof(".gz")+1, sizeof(".gz")-1);
964 		if (CollationLevel == CollationLevel_SINGLE) {
965 			collate_save_info(url, saveurl);
966 			collate_load(_CollateFunction, CollationLevel_SINGLE, sp);
967 		} else if (CollationLevel == CollationLevel_MULTI) {
968 			collate_save_info(url, saveurl);
969 		}
970 	}
971 	return true;
972 }
973 
get_key(glong idx)974 const gchar *compressed_index::get_key(glong idx)
975 {
976 	return wordlist[idx];
977 }
978 
get_data(glong idx)979 void compressed_index::get_data(glong idx)
980 {
981 	gchar *p1 = wordlist[idx]+strlen(wordlist[idx])+sizeof(gchar);
982 	wordentry_offset = g_ntohl(get_uint32(p1));
983 	p1 += sizeof(guint32);
984 	wordentry_size = g_ntohl(get_uint32(p1));
985 }
986 
get_key_and_data(glong idx)987 const gchar *compressed_index::get_key_and_data(glong idx)
988 {
989 	get_data(idx);
990 	return get_key(idx);
991 }
992 
lookup(const char * str,glong & idx,glong & idx_suggest)993 bool compressed_index::lookup(const char *str, glong &idx, glong &idx_suggest)
994 {
995 	bool bFound=false;
996 	glong iTo=wordlist.size()-2;
997 
998 	if (stardict_strcmp(str, get_key(0))<0) {
999 		idx = 0;
1000 		idx_suggest = 0;
1001 	} else if (stardict_strcmp(str, get_key(iTo)) >0) {
1002 		idx = INVALID_INDEX;
1003 		idx_suggest = iTo;
1004 	} else {
1005 		glong iThisIndex=0;
1006 		glong iFrom=0;
1007 		gint cmpint;
1008 		while (iFrom<=iTo) {
1009 			iThisIndex=(iFrom+iTo)/2;
1010 			cmpint = stardict_strcmp(str, get_key(iThisIndex));
1011 			if (cmpint>0)
1012 				iFrom=iThisIndex+1;
1013 			else if (cmpint<0)
1014 				iTo=iThisIndex-1;
1015 			else {
1016 				bFound=true;
1017 				break;
1018 			}
1019 		}
1020 		if (!bFound) {
1021 			idx = iFrom;    //next
1022 			idx_suggest = iFrom;
1023 			gint best, back;
1024 			best = prefix_match (str, get_key(idx_suggest));
1025 			for (;;) {
1026 				if ((iTo=idx_suggest-1) < 0)
1027 					break;
1028 				back = prefix_match (str, get_key(iTo));
1029 				if (!back || back < best)
1030 					break;
1031 				best = back;
1032 				idx_suggest = iTo;
1033 			}
1034 		} else {
1035 			idx = iThisIndex;
1036 			idx_suggest = iThisIndex;
1037 		}
1038 	}
1039 	return bFound;
1040 }
1041 
1042 //===================================================================
Create(const std::string & filebasename,const char * mainext,std::string & fullfilename)1043 index_file* index_file::Create(const std::string& filebasename,
1044 		const char* mainext, std::string& fullfilename)
1045 {
1046 	index_file *index = NULL;
1047 
1048 	fullfilename = filebasename + "." + mainext + ".gz";
1049 	if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1050 		index = new compressed_index;
1051 	} else {
1052 		fullfilename = filebasename + "." + mainext;
1053 		index = new offset_index;
1054 	}
1055 	return index;
1056 }
1057 
1058 //===================================================================
fill(gchar * data,gint nent,glong idx_)1059 void synonym_file::page_t::fill(gchar *data, gint nent, glong idx_)
1060 {
1061 	idx=idx_;
1062 	gchar *p=data;
1063 	glong len;
1064 	for (gint i=0; i<nent; ++i) {
1065 		entries[i].keystr=p;
1066 		len=strlen(p);
1067 		p+=len+1;
1068 		entries[i].index=g_ntohl(get_uint32(p));
1069 		p+=sizeof(guint32);
1070 	}
1071 }
1072 
synonym_file()1073 synonym_file::synonym_file() : oft_file(CacheFileType_oft, COLLATE_FUNC_NONE)
1074 {
1075 }
1076 
~synonym_file()1077 synonym_file::~synonym_file()
1078 {
1079 	if (synfile)
1080 		fclose(synfile);
1081 }
1082 
read_first_on_page_key(glong page_idx)1083 inline const gchar *synonym_file::read_first_on_page_key(glong page_idx)
1084 {
1085 	fseek(synfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
1086 	guint32 page_size=oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx);
1087 	gulong minsize = sizeof(wordentry_buf);
1088         if (page_size < minsize) {
1089                 minsize = page_size;
1090 	}
1091 	size_t fread_size;
1092 	fread_size = fread(wordentry_buf, minsize, 1, synfile); //TODO: check returned values, deal with word entry that strlen>255.
1093 	if (fread_size != 1) {
1094 		g_print("fread error!\n");
1095 	}
1096 	return wordentry_buf;
1097 }
1098 
get_first_on_page_key(glong page_idx)1099 inline const gchar *synonym_file::get_first_on_page_key(glong page_idx)
1100 {
1101 	if (page_idx<middle.idx) {
1102 		if (page_idx==first.idx)
1103 			return first.keystr.c_str();
1104 		return read_first_on_page_key(page_idx);
1105 	} else if (page_idx>middle.idx) {
1106 		if (page_idx==last.idx)
1107 			return last.keystr.c_str();
1108 		return read_first_on_page_key(page_idx);
1109 	} else
1110 		return middle.keystr.c_str();
1111 }
1112 
load(const std::string & url,gulong wc,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)1113 bool synonym_file::load(const std::string& url, gulong wc, bool CreateCacheFile,
1114 	CollationLevelType CollationLevel, CollateFunctions _CollateFunction,
1115 	show_progress_t *sp)
1116 {
1117 	wordcount=wc;
1118 	npages=(wc-1)/ENTR_PER_PAGE+2;
1119 	if (!oft_file.load_cache(url, url, npages*sizeof(guint32))) {
1120 		stardict_stat_t stats;
1121 		if (g_stat(url.c_str(), &stats) == -1)
1122 			return false;
1123 		MapFile map_file;
1124 		if (!map_file.open(url.c_str(), stats.st_size))
1125 			return false;
1126 		const gchar *syndatabuffer=map_file.begin();
1127 		oft_file.allocate_wordoffset(npages);
1128 		const gchar *p1 = syndatabuffer;
1129 		gulong index_size;
1130 		guint32 j=0;
1131 		for (guint32 i=0; i<wc; i++) {
1132 			index_size=strlen(p1) +1 + sizeof(guint32);
1133 			if (i % ENTR_PER_PAGE==0) {
1134 				oft_file.get_wordoffset(j)=p1-syndatabuffer;
1135 				++j;
1136 			}
1137 			p1 += index_size;
1138 		}
1139 		oft_file.get_wordoffset(j)=p1-syndatabuffer;
1140 		map_file.close();
1141 		if (CreateCacheFile) {
1142 			if (!oft_file.save_cache(url))
1143 				g_printerr("Cache update failed.\n");
1144 		}
1145 	}
1146 
1147 	if (!(synfile = fopen(url.c_str(), "rb"))) {
1148 		return false;
1149 	}
1150 
1151 	first.assign(0, read_first_on_page_key(0));
1152 	last.assign(npages-2, read_first_on_page_key(npages-2));
1153 	middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
1154 	real_last.assign(wc-1, get_key(wc-1));
1155 
1156 	if (CollationLevel == CollationLevel_NONE) {
1157 	} else if (CollationLevel == CollationLevel_SINGLE) {
1158 		collate_save_info(url, url);
1159 		collate_load(_CollateFunction,CollationLevel_SINGLE, sp);
1160 	} else if (CollationLevel == CollationLevel_MULTI) {
1161 		collate_save_info(url, url);
1162 	}
1163 
1164 	return true;
1165 }
1166 
load_page(glong page_idx)1167 inline gulong synonym_file::load_page(glong page_idx)
1168 {
1169 	gulong nentr=ENTR_PER_PAGE;
1170 	if (page_idx==glong(npages-2))
1171 		if ((nentr=wordcount%ENTR_PER_PAGE)==0)
1172 			nentr=ENTR_PER_PAGE;
1173 
1174 
1175 	if (page_idx!=page.idx) {
1176 		page_data.resize(oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx));
1177 		fseek(synfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
1178 		size_t fread_size;
1179 		size_t page_data_size = page_data.size();
1180 		fread_size = fread(&page_data[0], 1, page_data_size, synfile);
1181 		if (fread_size != page_data_size) {
1182 			g_print("fread error!\n");
1183 		}
1184 		page.fill(&page_data[0], nentr, page_idx);
1185 	}
1186 
1187 	return nentr;
1188 }
1189 
get_key(glong idx)1190 const gchar *synonym_file::get_key(glong idx)
1191 {
1192 	load_page(idx/ENTR_PER_PAGE);
1193 	glong idx_in_page=idx%ENTR_PER_PAGE;
1194 	wordentry_index=page.entries[idx_in_page].index;
1195 
1196 	return page.entries[idx_in_page].keystr;
1197 }
1198 
lookup(const char * str,glong & idx,glong & idx_suggest)1199 bool synonym_file::lookup(const char *str, glong &idx, glong &idx_suggest)
1200 {
1201 	bool bFound=false;
1202 	glong iFrom;
1203 	glong iTo=npages-2;
1204 	gint cmpint;
1205 	glong iThisIndex;
1206 	if (stardict_strcmp(str, first.keystr.c_str())<0) {
1207 		idx = 0;
1208 		idx_suggest = 0;
1209 		return false;
1210 	} else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
1211 		idx = INVALID_INDEX;
1212 		idx_suggest = wordcount-1;
1213 		return false;
1214 	} else {
1215 		iFrom=0;
1216 		iThisIndex=0;
1217 		while (iFrom<=iTo) {
1218 			iThisIndex=(iFrom+iTo)/2;
1219 			cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
1220 			if (cmpint>0)
1221 				iFrom=iThisIndex+1;
1222 			else if (cmpint<0)
1223 				iTo=iThisIndex-1;
1224 			else {
1225 				bFound=true;
1226 				break;
1227 			}
1228 		}
1229 		if (!bFound)
1230 			idx = iTo;    //prev
1231 		else
1232 			idx = iThisIndex;
1233 	}
1234 	if (!bFound) {
1235 		gulong netr=load_page(idx);
1236 		iFrom=1; // Needn't search the first word anymore.
1237 		iTo=netr-1;
1238 		iThisIndex=0;
1239 		while (iFrom<=iTo) {
1240 			iThisIndex=(iFrom+iTo)/2;
1241 			cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
1242 			if (cmpint>0)
1243 				iFrom=iThisIndex+1;
1244 			else if (cmpint<0)
1245 				iTo=iThisIndex-1;
1246 			else {
1247 				bFound=true;
1248 				break;
1249 			}
1250 		}
1251 		idx*=ENTR_PER_PAGE;
1252 		if (!bFound) {
1253 			idx += iFrom;    //next
1254 			idx_suggest = idx;
1255 			gint best, back;
1256 			best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
1257 			for (;;) {
1258 				if ((iTo=idx_suggest-1) < 0)
1259 					break;
1260 				if (idx_suggest % ENTR_PER_PAGE == 0)
1261 					load_page(iTo / ENTR_PER_PAGE);
1262 				back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
1263 				if (!back || back < best)
1264 					break;
1265 				best = back;
1266 				idx_suggest = iTo;
1267 			}
1268 		} else {
1269 			idx += iThisIndex;
1270 			idx_suggest = idx;
1271 		}
1272 	} else {
1273 		idx*=ENTR_PER_PAGE;
1274 		idx_suggest = idx;
1275 	}
1276 	return bFound;
1277 }
1278 
1279 //===================================================================
Dict()1280 Dict::Dict()
1281 {
1282 	storage = NULL;
1283 }
1284 
~Dict()1285 Dict::~Dict()
1286 {
1287 	delete storage;
1288 }
1289 
load(const std::string & ifofilename,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions CollateFunction,show_progress_t * sp)1290 bool Dict::load(const std::string& ifofilename, bool CreateCacheFile,
1291 	CollationLevelType CollationLevel, CollateFunctions CollateFunction,
1292 	show_progress_t *sp)
1293 {
1294 	gulong idxfilesize;
1295 	glong wordcount, synwordcount;
1296 	if (!load_ifofile(ifofilename, idxfilesize, wordcount, synwordcount))
1297 		return false;
1298 	sp->notify_about_start(_("Loading..."));
1299 
1300 	// ifofilename without extension - base file name
1301 	std::string filebasename
1302 		= ifofilename.substr(0, ifofilename.length()-sizeof(".ifo")+1);
1303 	if(!DictBase::load(filebasename, "dict"))
1304 		return false;
1305 
1306 	std::string fullfilename;
1307 	idx_file.reset(index_file::Create(filebasename, "idx", fullfilename));
1308 	if (!idx_file->load(fullfilename, wordcount, idxfilesize,
1309 			    CreateCacheFile, CollationLevel,
1310 			    CollateFunction, sp))
1311 		return false;
1312 
1313 	if (synwordcount) {
1314 		fullfilename = filebasename + ".syn";
1315 		if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1316 			syn_file.reset(new synonym_file);
1317 			if (!syn_file->load(fullfilename, synwordcount,
1318 					    CreateCacheFile, CollationLevel,
1319 					    CollateFunction, sp))
1320 				return false;
1321 		}
1322 	}
1323 
1324 	gchar *dirname = g_path_get_dirname(ifofilename.c_str());
1325 	storage = ResourceStorage::create(dirname, CreateCacheFile, sp);
1326 	g_free(dirname);
1327 
1328 	g_print("bookname: %s, wordcount %lu\n", bookname.c_str(), wordcount);
1329 	return true;
1330 }
1331 
load_ifofile(const std::string & ifofilename,gulong & idxfilesize,glong & wordcount,glong & synwordcount)1332 bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize, glong &wordcount, glong &synwordcount)
1333 {
1334 	DictInfo dict_info;
1335 	if (!dict_info.load_from_ifo_file(ifofilename, DictInfoType_NormDict))
1336 		return false;
1337 
1338 	ifo_file_name=dict_info.ifo_file_name;
1339 	bookname=dict_info.get_bookname();
1340 
1341 	idxfilesize=dict_info.get_index_file_size();
1342 	wordcount=dict_info.get_wordcount();
1343 	synwordcount=dict_info.get_synwordcount();
1344 
1345 	sametypesequence=dict_info.get_sametypesequence();
1346 	dicttype=dict_info.get_dicttype();
1347 
1348 	return true;
1349 }
1350 
nsynarticles() const1351 glong Dict::nsynarticles() const
1352 {
1353 	if (syn_file.get() == NULL)
1354 		return 0;
1355 	return syn_file->get_word_count();
1356 }
1357 
GetWordPrev(glong idx,glong & pidx,bool isidx,CollationLevelType CollationLevel,int servercollatefunc)1358 bool Dict::GetWordPrev(glong idx, glong &pidx, bool isidx, CollationLevelType CollationLevel, int servercollatefunc)
1359 {
1360 	idxsyn_file *is_file;
1361 	if (isidx)
1362 		is_file = idx_file.get();
1363 	else
1364 		is_file = syn_file.get();
1365 	if (idx==INVALID_INDEX) {
1366 		pidx = is_file->get_word_count()-1;
1367 		return true;
1368 	}
1369 	pidx = idx;
1370 	gchar *cWord = g_strdup(is_file->getWord(pidx, CollationLevel, servercollatefunc));
1371 	const gchar *pWord;
1372 	bool found=false;
1373 	while (pidx>0) {
1374 		pWord = is_file->getWord(pidx-1, CollationLevel, servercollatefunc);
1375 		if (strcmp(pWord, cWord)!=0) {
1376 			found=true;
1377 			break;
1378 		}
1379 		pidx--;
1380 	}
1381 	g_free(cWord);
1382 	if (found) {
1383 		pidx--;
1384 		return true;
1385 	} else {
1386 		return false;
1387 	}
1388 }
1389 
GetWordNext(glong & idx,bool isidx,CollationLevelType CollationLevel,int servercollatefunc)1390 void Dict::GetWordNext(glong &idx, bool isidx, CollationLevelType CollationLevel, int servercollatefunc)
1391 {
1392 	idxsyn_file *is_file;
1393 	if (isidx)
1394 		is_file = idx_file.get();
1395 	else
1396 		is_file = syn_file.get();
1397 	gchar *cWord = g_strdup(is_file->getWord(idx, CollationLevel, servercollatefunc));
1398 	const gchar *pWord;
1399 	bool found=false;
1400 	while (idx < is_file->get_word_count()-1) {
1401 		pWord = is_file->getWord(idx+1, CollationLevel, servercollatefunc);
1402 		if (strcmp(pWord, cWord)!=0) {
1403 			found=true;
1404 			break;
1405 		}
1406 		idx++;
1407 	}
1408 	g_free(cWord);
1409 	if (found)
1410 		idx++;
1411 	else
1412 		idx=INVALID_INDEX;
1413 }
1414 
GetOrigWordCount(glong & idx,bool isidx)1415 gint Dict::GetOrigWordCount(glong& idx, bool isidx)
1416 {
1417 	idxsyn_file *is_file;
1418 	if (isidx)
1419 		is_file = idx_file.get();
1420 	else
1421 		is_file = syn_file.get();
1422 	gchar *cWord = g_strdup(is_file->get_key(idx));
1423 	const gchar *pWord;
1424 	gint count = 1;
1425 	glong idx1 = idx;
1426 	while (idx1>0) {
1427 		pWord = is_file->get_key(idx1-1);
1428 		if (strcmp(pWord, cWord)!=0)
1429 			break;
1430 		count++;
1431 		idx1--;
1432 	}
1433 	glong idx2=idx;
1434 	while (idx2<is_file->get_word_count()-1) {
1435 		pWord = is_file->get_key(idx2+1);
1436 		if (strcmp(pWord, cWord)!=0)
1437 			break;
1438 		count++;
1439 		idx2++;
1440 	}
1441 	idx=idx1;
1442 	g_free(cWord);
1443 	return count;
1444 }
1445 
LookupSynonym(const char * str,glong & synidx,glong & synidx_suggest,CollationLevelType CollationLevel,int servercollatefunc)1446 bool Dict::LookupSynonym(const char *str, glong &synidx, glong &synidx_suggest, CollationLevelType CollationLevel, int servercollatefunc)
1447 {
1448 	if (syn_file.get() == NULL) {
1449 		synidx = UNSET_INDEX;
1450 		synidx_suggest = UNSET_INDEX;
1451 		return false;
1452 	}
1453 	return syn_file->Lookup(str, synidx, synidx_suggest, CollationLevel, servercollatefunc);
1454 }
1455 
LookupWithRule(GPatternSpec * pspec,glong * aIndex,int iBuffLen)1456 bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1457 {
1458 	int iIndexCount=0;
1459 	for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1460 		// Need to deal with same word in index? But this will slow down processing in most case.
1461 		if (g_pattern_match_string(pspec, idx_file->getWord(i, CollationLevel_NONE, 0)))
1462 			aIndex[iIndexCount++]=i;
1463 	aIndex[iIndexCount]= -1; // -1 is the end.
1464 	return (iIndexCount>0);
1465 }
1466 
LookupWithRuleSynonym(GPatternSpec * pspec,glong * aIndex,int iBuffLen)1467 bool Dict::LookupWithRuleSynonym(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1468 {
1469 	if (syn_file.get() == NULL)
1470 		return false;
1471 	int iIndexCount=0;
1472 	for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1473 		// Need to deal with same word in index? But this will slow down processing in most case.
1474 		if (g_pattern_match_string(pspec, syn_file->getWord(i, CollationLevel_NONE, 0)))
1475 			aIndex[iIndexCount++]=i;
1476 	aIndex[iIndexCount]= -1; // -1 is the end.
1477 	return (iIndexCount>0);
1478 }
1479 
LookupWithRegex(GRegex * regex,glong * aIndex,int iBuffLen)1480 bool Dict::LookupWithRegex(GRegex *regex, glong *aIndex, int iBuffLen)
1481 {
1482 	int iIndexCount=0;
1483 	for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1484 		// Need to deal with same word in index? But this will slow down processing in most case.
1485 		if (g_regex_match(regex, idx_file->getWord(i, CollationLevel_NONE, 0), (GRegexMatchFlags)0, NULL))
1486 			aIndex[iIndexCount++]=i;
1487 	aIndex[iIndexCount]= -1; // -1 is the end.
1488 	return (iIndexCount>0);
1489 }
1490 
LookupWithRegexSynonym(GRegex * regex,glong * aIndex,int iBuffLen)1491 bool Dict::LookupWithRegexSynonym(GRegex *regex, glong *aIndex, int iBuffLen)
1492 {
1493 	if (syn_file.get() == NULL)
1494 		return false;
1495 	int iIndexCount=0;
1496 	for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1497 		// Need to deal with same word in index? But this will slow down processing in most case.
1498 		if (g_regex_match(regex, syn_file->getWord(i, CollationLevel_NONE, 0), (GRegexMatchFlags)0, NULL))
1499 			aIndex[iIndexCount++]=i;
1500 	aIndex[iIndexCount]= -1; // -1 is the end.
1501 	return (iIndexCount>0);
1502 }
1503 
1504 //===================================================================
1505 show_progress_t Libs::default_show_progress;
1506 
Libs(show_progress_t * sp,bool create_cache_files,CollationLevelType level,CollateFunctions func)1507 Libs::Libs(show_progress_t *sp, bool create_cache_files, CollationLevelType level, CollateFunctions func)
1508 :
1509 	iMaxFuzzyDistance(MAX_FUZZY_DISTANCE),
1510 	show_progress(NULL),
1511 	CreateCacheFile(create_cache_files)
1512 {
1513 #ifdef SD_SERVER_CODE
1514 	root_info_item = NULL;
1515 #endif
1516 	ValidateCollateParams(level, func);
1517 	CollationLevel = level;
1518 	CollateFunction = func;
1519 	set_show_progress(sp);
1520 	init_collations();
1521 }
1522 
~Libs()1523 Libs::~Libs()
1524 {
1525 #ifdef SD_SERVER_CODE
1526 	if (root_info_item)
1527 		delete root_info_item;
1528 #endif
1529 	for (std::vector<Dict *>::iterator p=oLib.begin(); p!=oLib.end(); ++p)
1530 		delete *p;
1531 	free_collations();
1532 }
1533 
load_dict(const std::string & url,show_progress_t * sp)1534 bool Libs::load_dict(const std::string& url, show_progress_t *sp)
1535 {
1536 	Dict *lib=new Dict;
1537 	if (lib->load(url, CreateCacheFile, CollationLevel, CollateFunction, sp)) {
1538 		oLib.push_back(lib);
1539 		return true;
1540 	} else {
1541 		delete lib;
1542 		return false;
1543 	}
1544 }
1545 
1546 #ifdef SD_SERVER_CODE
LoadFromXML()1547 void Libs::LoadFromXML()
1548 {
1549 	root_info_item = new DictInfoItem();
1550 	root_info_item->isdir = 1;
1551 	root_info_item->dir = new DictInfoDirItem();
1552 	root_info_item->dir->name='/';
1553 	LoadXMLDir("/usr/share/stardict/dic", root_info_item);
1554 	GenLinkDict(root_info_item);
1555 }
1556 
GenLinkDict(DictInfoItem * info_item)1557 void Libs::GenLinkDict(DictInfoItem *info_item)
1558 {
1559 	std::list<std::list<DictInfoItem *>::iterator> eraselist;
1560 	for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1561 		if ((*i)->isdir == 1) {
1562 			GenLinkDict(*i);
1563 		} else if ((*i)->isdir == 2) {
1564 			std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1565 			uid_iter = uidmap.find(*((*i)->linkuid));
1566 			if (uid_iter!=uidmap.end()) {
1567 				delete (*i)->linkuid;
1568 				(*i)->dict = uid_iter->second;
1569 			} else {
1570 				g_print("Error, linkdict uid not found! %s\n", (*i)->linkuid->c_str());
1571 				delete (*i)->linkuid;
1572 				eraselist.push_back(i);
1573 			}
1574 		}
1575 	}
1576 	for (std::list<std::list<DictInfoItem *>::iterator>::iterator i = eraselist.begin(); i!= eraselist.end(); ++i) {
1577 		info_item->dir->info_item_list.erase(*i);
1578 	}
1579 }
1580 
func_parse_start_element(GMarkupParseContext * context,const gchar * element_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)1581 void Libs::func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
1582 {
1583 	if (strcmp(element_name, "dict")==0) {
1584 		ParseUserData *Data = (ParseUserData *)user_data;
1585 		Data->indict = true;
1586 		Data->path.clear();
1587 		Data->uid.clear();
1588 		Data->level.clear();
1589 		Data->download.clear();
1590 		Data->from.clear();
1591 		Data->to.clear();
1592 	} else if (strcmp(element_name, "linkdict")==0) {
1593 		ParseUserData *Data = (ParseUserData *)user_data;
1594 		Data->inlinkdict = true;
1595 		Data->linkuid.clear();
1596 	}
1597 }
1598 
func_parse_end_element(GMarkupParseContext * context,const gchar * element_name,gpointer user_data,GError ** error)1599 void Libs::func_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
1600 {
1601 	if (strcmp(element_name, "dict")==0) {
1602 		ParseUserData *Data = (ParseUserData *)user_data;
1603 		Data->indict = false;
1604 		if (!Data->path.empty() && !Data->uid.empty()) {
1605 			std::string url;
1606 			url = Data->dir;
1607 			url += G_DIR_SEPARATOR;
1608 			url += Data->path;
1609 			if (Data->oLibs->load_dict(url, Data->oLibs->show_progress)) {
1610 				DictInfoItem *sub_info_item = new DictInfoItem();
1611 				sub_info_item->isdir = 0;
1612 				sub_info_item->dict = new DictInfoDictItem();
1613 				sub_info_item->dict->uid = Data->uid;
1614 				sub_info_item->dict->download = Data->download;
1615 				sub_info_item->dict->from = Data->from;
1616 				sub_info_item->dict->to = Data->to;
1617 				if (Data->level.empty())
1618 					sub_info_item->dict->level = 0;
1619 				else
1620 					sub_info_item->dict->level = atoi(Data->level.c_str());
1621 				sub_info_item->dict->id = Data->oLibs->oLib.size()-1;
1622 				Data->info_item->dir->info_item_list.push_back(sub_info_item);
1623 				Data->oLibs->uidmap[Data->uid] = sub_info_item->dict;
1624 			}
1625 		}
1626 	} else if (strcmp(element_name, "linkdict")==0) {
1627 		ParseUserData *Data = (ParseUserData *)user_data;
1628 		Data->inlinkdict = false;
1629 		if (!Data->linkuid.empty()) {
1630 			DictInfoItem *sub_info_item = new DictInfoItem();
1631 			sub_info_item->isdir = 2;
1632 			sub_info_item->linkuid = new std::string(Data->linkuid);
1633 			Data->info_item->dir->info_item_list.push_back(sub_info_item);
1634 		}
1635 	}
1636 }
1637 
func_parse_text(GMarkupParseContext * context,const gchar * text,gsize text_len,gpointer user_data,GError ** error)1638 void Libs::func_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
1639 {
1640 	const gchar *element = g_markup_parse_context_get_element(context);
1641 	if (!element)
1642 		return;
1643 	ParseUserData *Data = (ParseUserData *)user_data;
1644 	if (strcmp(element, "subdir")==0) {
1645 		std::string subdir;
1646 		subdir = Data->dir;
1647 		subdir += G_DIR_SEPARATOR;
1648 		subdir.append(text, text_len);
1649 		DictInfoItem *sub_info_item = new DictInfoItem();
1650 		sub_info_item->isdir = 1;
1651 		sub_info_item->dir = new DictInfoDirItem();
1652 		sub_info_item->dir->name.assign(text, text_len);
1653 		Data->oLibs->LoadXMLDir(subdir.c_str(), sub_info_item);
1654 		Data->info_item->dir->info_item_list.push_back(sub_info_item);
1655 	} else if (strcmp(element, "dirname")==0) {
1656 		Data->info_item->dir->dirname.assign(text, text_len);
1657 	} else if (strcmp(element, "path")==0) {
1658 		Data->path.assign(text, text_len);
1659 	} else if (strcmp(element, "uid")==0) {
1660 		if (Data->indict) {
1661 			std::string uid(text, text_len);
1662 			if (uid.find_first_of(' ')!=std::string::npos) {
1663 				g_print("Error: uid contains space! %s: %s\n", Data->dir, uid.c_str());
1664 			} else {
1665 				std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1666 				uid_iter = Data->oLibs->uidmap.find(uid);
1667 				if (uid_iter!=Data->oLibs->uidmap.end()) {
1668 					g_print("Error: uid duplicated! %s: %s\n", Data->dir, uid.c_str());
1669 				} else {
1670 					Data->uid = uid;
1671 				}
1672 			}
1673 		} else if (Data->inlinkdict) {
1674 			Data->linkuid.assign(text, text_len);
1675 		}
1676 	} else if (strcmp(element, "level")==0) {
1677 		Data->level.assign(text, text_len);
1678 	} else if (strcmp(element, "download")==0) {
1679 		Data->download.assign(text, text_len);
1680 	} else if (strcmp(element, "from")==0) {
1681 		Data->from.assign(text, text_len);
1682 	} else if (strcmp(element, "to")==0) {
1683 		Data->to.assign(text, text_len);
1684 	}
1685 }
1686 
LoadXMLDir(const char * dir,DictInfoItem * info_item)1687 void Libs::LoadXMLDir(const char *dir, DictInfoItem *info_item)
1688 {
1689 	std::string filename;
1690 	filename = build_path(dir, "stardictd.xml");
1691 	stardict_stat_t filestat;
1692 	if (g_stat(filename.c_str(), &filestat)!=0)
1693 		return;
1694 	MapFile mf;
1695 	if (!mf.open(filename.c_str(), filestat.st_size))
1696 		return;
1697 	ParseUserData Data;
1698 	Data.oLibs = this;
1699 	Data.dir = dir;
1700 	Data.info_item = info_item;
1701 	Data.indict = false;
1702 	Data.inlinkdict = false;
1703 	GMarkupParser parser;
1704 	parser.start_element = func_parse_start_element;
1705 	parser.end_element = func_parse_end_element;
1706 	parser.text = func_parse_text;
1707 	parser.passthrough = NULL;
1708 	parser.error = NULL;
1709 	GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
1710 	g_markup_parse_context_parse(context, mf.begin(), filestat.st_size, NULL);
1711 	g_markup_parse_context_end_parse(context, NULL);
1712 	g_markup_parse_context_free(context);
1713 	mf.close();
1714 	info_item->dir->dictcount = 0;
1715 	for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1716 		if ((*i)->isdir == 1) {
1717 			info_item->dir->dictcount += (*i)->dir->dictcount;
1718 		} else if ((*i)->isdir == 0) {
1719 			info_item->dir->dictcount++;
1720 		}
1721 	}
1722 }
1723 
get_fromto_info()1724 const std::string &Libs::get_fromto_info() {
1725 	if(cache_fromto.empty()){
1726 		std::map<std::string, std::list<FromTo> > map_fromto;
1727 		gen_fromto_info(root_info_item, map_fromto);
1728 		cache_fromto+="<lang>";
1729 		for (std::map<std::string, std::list<FromTo> >::iterator map_it = map_fromto.begin(); map_it != map_fromto.end(); ++map_it){
1730 			cache_fromto+="<from lang=\"";
1731 			cache_fromto+=map_it->first;
1732 			cache_fromto+="\">";
1733 			std::list<FromTo> &fromTo = map_it->second;
1734 			for (std::list<FromTo>::iterator i = fromTo.begin() ; i!= fromTo.end(); ++i){
1735 				cache_fromto+="<to lang=\"";
1736 				cache_fromto+= i->to;
1737 				cache_fromto+="\">";
1738 				std::list<FromToInfo> &fromtoinfo = i->fromto_info;
1739 				for (std::list<FromToInfo>::iterator j = fromtoinfo.begin() ; j!= fromtoinfo.end(); ++j){
1740 					cache_fromto+="<dict><uid>";
1741 					cache_fromto+=j->uid;
1742 					cache_fromto+="</uid><bookname>";
1743 					cache_fromto+= j->bookname;
1744 					cache_fromto+="</bookname></dict>";
1745 				}
1746 				cache_fromto+="</to>";
1747 			}
1748 			cache_fromto+="</from>";
1749 		}
1750 		cache_fromto+="</lang>";
1751 	}
1752 	return cache_fromto;
1753 }
1754 
gen_fromto_info(struct DictInfoItem * info_item,std::map<std::string,std::list<FromTo>> & map_fromto)1755 void Libs::gen_fromto_info(struct DictInfoItem *info_item, std::map<std::string, std::list<FromTo> > &map_fromto) {
1756 	gchar *etext;
1757 	for(std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin() ; i!= info_item->dir->info_item_list.end(); ++i){
1758 		if ((*i)->isdir == 1) {
1759 			gen_fromto_info((*i), map_fromto);
1760 		} else {
1761 			std::string from_str     = (*i)->dict->from;
1762 			std::string to_str       = (*i)->dict->to;
1763 			if(from_str.empty() || to_str.empty()){
1764 				continue;
1765 			}
1766 			std::string uid_str      = (*i)->dict->uid;
1767 			etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1768 			std::string bookname_str = etext;
1769 			g_free(etext);
1770 			std::map<std::string, std::list<FromTo> >::iterator fromto1 = map_fromto.find(from_str);
1771 			if (fromto1==map_fromto.end()) {
1772 				//if an from_str element not already in map,  add new from_str to map
1773 				FromToInfo fromtoinfo;
1774 				fromtoinfo.uid = uid_str;
1775 				fromtoinfo.bookname = bookname_str;
1776 				std::list<FromToInfo> list_fromtoinfo ;
1777 				list_fromtoinfo.push_back(fromtoinfo);
1778 				FromTo new_fromTo;
1779 				new_fromTo.to = to_str;
1780 				new_fromTo.fromto_info = list_fromtoinfo;
1781 				std::list<FromTo> list_fromTo;
1782 				list_fromTo.push_back(new_fromTo);
1783 				map_fromto[from_str] = list_fromTo;
1784 			} else {
1785 				// else if from_str already in map, so comparison to_str and from_to1 , then choose insert.
1786 				std::list<FromTo> &fromTo_list = fromto1->second;
1787 				std::string from_name1 = fromto1->first;
1788 				bool found = false;
1789 				for (std::list<FromTo>::iterator new_fromTo = fromTo_list.begin(); new_fromTo != fromTo_list.end(); ++new_fromTo) {
1790 					if(to_str == new_fromTo->to) {
1791 						std::list<FromToInfo> &fromtoinfo1 = new_fromTo->fromto_info;
1792 						FromToInfo fromtoinfo;
1793 						fromtoinfo.uid = uid_str;
1794 						fromtoinfo.bookname = bookname_str;
1795 						fromtoinfo1.push_back(fromtoinfo);
1796 						found = true;
1797 						break;
1798 					}
1799 				}
1800 				if(!found){
1801 					FromToInfo fromtoinfo;
1802 					fromtoinfo.uid = uid_str;
1803 					fromtoinfo.bookname = bookname_str;
1804 					std::list<FromToInfo> fromtoinfo1;
1805 					fromtoinfo1.push_back(fromtoinfo);
1806 					FromTo fromTo;
1807 					fromTo.to = to_str;
1808 					fromTo.fromto_info = fromtoinfo1;
1809 					fromTo_list.push_back(fromTo);
1810 				}
1811 			}
1812 		}
1813 	}
1814 }
1815 
get_dir_info(const char * path)1816 const std::string *Libs::get_dir_info(const char *path)
1817 {
1818 	if (path[0]!='/')
1819 		return NULL;
1820 	DictInfoItem *info_item = root_info_item;
1821 	std::string item;
1822 	const char *p = path+1;
1823 	const char *p1;
1824 	bool found;
1825 	do {
1826 		p1 = strchr(p, '/');
1827 		if (p1) {
1828 			item.assign(p, p1-p);
1829 			if (!item.empty()) {
1830 				found = false;
1831 				for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1832 					if ((*i)->isdir == 1) {
1833 						if ((*i)->dir->name == item) {
1834 							info_item = (*i);
1835 							found = true;
1836 							break;
1837 						}
1838 					}
1839 				}
1840 				if (!found)
1841 					return NULL;
1842 			}
1843 			p = p1+1;
1844 		}
1845 	} while (p1);
1846 	if (*p)
1847 		return NULL; // Not end by '/'.
1848 	DictInfoDirItem *dir = info_item->dir;
1849 	if (dir->info_string.empty()) {
1850 		dir->info_string += "<parent>";
1851 		dir->info_string += path;
1852 		dir->info_string += "</parent>";
1853 		gchar *etext;
1854 		for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1855 			if ((*i)->isdir == 1) {
1856 				dir->info_string += "<dir><name>";
1857 				dir->info_string += (*i)->dir->name;
1858 				dir->info_string += "</name><dirname>";
1859 				dir->info_string += (*i)->dir->dirname;
1860 				dir->info_string += "</dirname><dictcount>";
1861 				gchar *dictcount = g_strdup_printf("%u", (*i)->dir->dictcount);
1862 				dir->info_string += dictcount;
1863 				g_free(dictcount);
1864 				dir->info_string += "</dictcount></dir>";
1865 			} else {
1866 				dir->info_string += "<dict>";
1867 				if ((*i)->isdir == 2)
1868 					dir->info_string += "<islink>1</islink>";
1869 				if ((*i)->dict->level != 0) {
1870 					dir->info_string += "<level>";
1871 					gchar *level = g_strdup_printf("%u", (*i)->dict->level);
1872 					dir->info_string += level;
1873 					g_free(level);
1874 					dir->info_string += "</level>";
1875 				}
1876 				dir->info_string += "<uid>";
1877 				dir->info_string += (*i)->dict->uid;
1878 				dir->info_string += "</uid><bookname>";
1879 				etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1880 				dir->info_string += etext;
1881 				g_free(etext);
1882 				dir->info_string += "</bookname><wordcount>";
1883 				gchar *wc = g_strdup_printf("%ld", oLib[(*i)->dict->id]->narticles());
1884 				dir->info_string += wc;
1885 				g_free(wc);
1886 				dir->info_string += "</wordcount></dict>";
1887 			}
1888 		}
1889 	}
1890 	return &(dir->info_string);
1891 }
1892 
get_dict_level(const char * uid)1893 int Libs::get_dict_level(const char *uid)
1894 {
1895 	std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1896 	uid_iter = uidmap.find(uid);
1897 	if (uid_iter==uidmap.end())
1898 		return -1;
1899 	return uid_iter->second->level;
1900 }
1901 
get_dicts_list(const char * dictmask,int max_dict_count,int userLevel)1902 std::string Libs::get_dicts_list(const char *dictmask, int max_dict_count, int userLevel)
1903 {
1904 	std::list<std::string> uid_list;
1905 	std::string uid;
1906 	const char *p, *p1;
1907 	p = dictmask;
1908 	do {
1909 		p1 = strchr(p, ' ');
1910 		if (p1) {
1911 			uid.assign(p, p1-p);
1912 			if (!uid.empty())
1913 				uid_list.push_back(uid);
1914 			p = p1+1;
1915 		}
1916 	} while (p1);
1917 	uid = p;
1918 	if (!uid.empty())
1919 		uid_list.push_back(uid);
1920 
1921 	std::string dictmask_str;
1922 	int count = 0;
1923 	const std::string *info_string;
1924 	int level;
1925 	for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1926 		level = get_dict_level((*i).c_str());
1927 		if (level < 0 || level > userLevel)
1928 			continue;
1929 		info_string = get_dict_info(i->c_str(), true);
1930 		if (info_string) {
1931 			if (count>=max_dict_count)
1932 				break;
1933 			dictmask_str += info_string->c_str();
1934 			count++;
1935 		}
1936 	}
1937 	return dictmask_str;
1938 }
1939 
get_dict_info(const char * uid,bool is_short)1940 const std::string *Libs::get_dict_info(const char *uid, bool is_short)
1941 {
1942 	std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1943 	uid_iter = uidmap.find(uid);
1944 	if (uid_iter==uidmap.end())
1945 		return NULL;
1946 	DictInfoDictItem *dict;
1947 	dict = uid_iter->second;
1948 	if (is_short) {
1949 		if (dict->short_info_string.empty()) {
1950 			gchar *etext;
1951 			dict->short_info_string += "<dict><uid>";
1952 			dict->short_info_string += uid;
1953 			dict->short_info_string += "</uid><bookname>";
1954 			etext = g_markup_escape_text(oLib[dict->id]->dict_name().c_str(), -1);
1955 			dict->short_info_string += etext;
1956 			g_free(etext);
1957 			dict->short_info_string += "</bookname><wordcount>";
1958 			gchar *wc = g_strdup_printf("%ld", oLib[dict->id]->narticles());
1959 			dict->short_info_string += wc;
1960 			g_free(wc);
1961 			dict->short_info_string += "</wordcount></dict>";
1962 		}
1963 		return &(dict->short_info_string);
1964 	} else {
1965 		if (dict->info_string.empty()) {
1966 			gchar *etext;
1967 			DictInfo dict_info;
1968 			if (!dict_info.load_from_ifo_file(oLib[dict->id]->ifofilename(),
1969 				DictInfoType_NormDict))
1970 				return NULL;
1971 			dict->info_string += "<dictinfo><bookname>";
1972 			etext = g_markup_escape_text(dict_info.bookname.c_str(), -1);
1973 			dict->info_string += etext;
1974 			g_free(etext);
1975 			dict->info_string += "</bookname><wordcount>";
1976 			gchar *wc = g_strdup_printf("%u", dict_info.wordcount);
1977 			dict->info_string += wc;
1978 			g_free(wc);
1979 			dict->info_string += "</wordcount>";
1980 			if (dict_info.synwordcount!=0) {
1981 				dict->info_string += "<synwordcount>";
1982 				wc = g_strdup_printf("%u", dict_info.synwordcount);
1983 				dict->info_string += wc;
1984 				g_free(wc);
1985 				dict->info_string += "</synwordcount>";
1986 			}
1987 			dict->info_string += "<author>";
1988 			etext = g_markup_escape_text(dict_info.author.c_str(), -1);
1989 			dict->info_string += etext;
1990 			g_free(etext);
1991 			dict->info_string += "</author><email>";
1992 			etext = g_markup_escape_text(dict_info.email.c_str(), -1);
1993 			dict->info_string += etext;
1994 			g_free(etext);
1995 			dict->info_string += "</email><website>";
1996 			etext = g_markup_escape_text(dict_info.website.c_str(), -1);
1997 			dict->info_string += etext;
1998 			g_free(etext);
1999 			dict->info_string += "</website><description>";
2000 			etext = g_markup_escape_text(dict_info.description.c_str(), -1);
2001 			dict->info_string += etext;
2002 			g_free(etext);
2003 			dict->info_string += "</description><date>";
2004 			etext = g_markup_escape_text(dict_info.date.c_str(), -1);
2005 			dict->info_string += etext;
2006 			g_free(etext);
2007 			dict->info_string += "</date><download>";
2008 			etext = g_markup_escape_text(dict->download.c_str(), -1);
2009 			dict->info_string += etext;
2010 			g_free(etext);
2011 			dict->info_string += "</download></dictinfo>";
2012 		}
2013 		return &(dict->info_string);
2014 	}
2015 }
2016 
SetServerDictMask(std::vector<InstantDictIndex> & dictmask,const char * dicts,int max,int userLevel)2017 void Libs::SetServerDictMask(std::vector<InstantDictIndex> &dictmask, const char *dicts, int max, int userLevel)
2018 {
2019 	InstantDictIndex instance_dict_index;
2020 	instance_dict_index.type = InstantDictType_LOCAL;
2021 	dictmask.clear();
2022 	std::list<std::string> uid_list;
2023 	std::string uid;
2024 	const char *p, *p1;
2025 	p = dicts;
2026 	do {
2027 		p1 = strchr(p, ' ');
2028 		if (p1) {
2029 			uid.assign(p, p1-p);
2030 			if (!uid.empty())
2031 				uid_list.push_back(uid);
2032 			p = p1+1;
2033 		}
2034 	} while (p1);
2035 	uid = p;
2036 	if (!uid.empty())
2037 		uid_list.push_back(uid);
2038 	int count = 0;
2039 	std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
2040 	for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
2041 		uid_iter = uidmap.find(*i);
2042 		if (uid_iter!=uidmap.end()) {
2043 			if (max>=0 && count >= max)
2044 				break;
2045 			if (userLevel>=0 && (unsigned int)userLevel< uid_iter->second->level)
2046 				continue;
2047 			instance_dict_index.index = uid_iter->second->id;
2048 			dictmask.push_back(instance_dict_index);
2049 			count++;
2050 		}
2051 	}
2052 }
2053 
LoadCollateFile(std::vector<InstantDictIndex> & dictmask,CollateFunctions cltfuc)2054 void Libs::LoadCollateFile(std::vector<InstantDictIndex> &dictmask, CollateFunctions cltfuc)
2055 {
2056 	for (std::vector<InstantDictIndex>::iterator i = dictmask.begin(); i!=dictmask.end(); ++i) {
2057 		if ((*i).type == InstantDictType_LOCAL) {
2058 			oLib[(*i).index]->idx_file->collate_load(cltfuc, CollationLevel_MULTI);
2059 			if (oLib[(*i).index]->syn_file.get() != NULL)
2060 				oLib[(*i).index]->syn_file->collate_load(cltfuc, CollationLevel_MULTI);
2061 		}
2062 	}
2063 }
2064 #endif
2065 
2066 #ifdef SD_CLIENT_CODE
find_lib_by_id(const DictItemId & id,size_t & iLib)2067 bool Libs::find_lib_by_id(const DictItemId& id, size_t &iLib)
2068 {
2069 	for (std::vector<Dict *>::size_type i =0; i < oLib.size(); i++) {
2070 		if (oLib[i]->id() == id) {
2071 			iLib = i;
2072 			return true;
2073 		}
2074 	}
2075 	return false;
2076 }
2077 
load(const std::list<std::string> & load_list)2078 void Libs::load(const std::list<std::string> &load_list)
2079 {
2080 	for (std::list<std::string>::const_iterator i = load_list.begin(); i != load_list.end(); ++i) {
2081 		load_dict(*i, show_progress);
2082 	}
2083 }
2084 
reload(const std::list<std::string> & load_list,CollationLevelType NewCollationLevel,CollateFunctions collf)2085 void Libs::reload(const std::list<std::string> &load_list, CollationLevelType NewCollationLevel, CollateFunctions collf)
2086 {
2087 	ValidateCollateParams(NewCollationLevel, collf);
2088 	if (NewCollationLevel == CollationLevel && collf == CollateFunction) {
2089 		std::vector<Dict *> prev(oLib);
2090 		oLib.clear();
2091 		for (std::list<std::string>::const_iterator i = load_list.begin(); i != load_list.end(); ++i) {
2092 			std::vector<Dict *>::iterator it;
2093 			for (it=prev.begin(); it!=prev.end(); ++it) {
2094 				if ((*it)->ifofilename()==*i)
2095 					break;
2096 			}
2097 			if (it==prev.end()) {
2098 				load_dict(*i, show_progress);
2099 			} else {
2100 				Dict *res=*it;
2101 				prev.erase(it);
2102 				oLib.push_back(res);
2103 			}
2104 		}
2105 		for (std::vector<Dict *>::iterator it=prev.begin(); it!=prev.end(); ++it) {
2106 			delete *it;
2107 		}
2108 	} else {
2109 		for (std::vector<Dict *>::iterator it = oLib.begin(); it != oLib.end(); ++it)
2110 			delete *it;
2111 		oLib.clear();
2112 		free_collations();
2113 		CollationLevel = NewCollationLevel;
2114 		CollateFunction = CollateFunctions(collf);
2115 		init_collations();
2116 		load(load_list);
2117 	}
2118 }
2119 #endif
2120 
CltIndexToOrig(glong cltidx,size_t iLib,int servercollatefunc)2121 glong Libs::CltIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2122 {
2123 	if (CollationLevel == CollationLevel_NONE)
2124 		return cltidx;
2125 	if (CollationLevel == CollationLevel_SINGLE) {
2126 		if (cltidx == INVALID_INDEX)
2127 			return cltidx;
2128 		return oLib[iLib]->idx_file->get_clt_file()->GetOrigIndex(cltidx);
2129 	}
2130 	if (servercollatefunc == 0)
2131 		return cltidx;
2132 	if (cltidx == INVALID_INDEX)
2133 		return cltidx;
2134 	oLib[iLib]->idx_file->collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
2135 	return oLib[iLib]->idx_file->get_clt_file(servercollatefunc-1)->GetOrigIndex(cltidx);
2136 }
2137 
CltSynIndexToOrig(glong cltidx,size_t iLib,int servercollatefunc)2138 glong Libs::CltSynIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2139 {
2140 	if (CollationLevel == CollationLevel_NONE)
2141 		return cltidx;
2142 	if (CollationLevel == CollationLevel_SINGLE) {
2143 		if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2144 			return cltidx;
2145 		return oLib[iLib]->syn_file->get_clt_file()->GetOrigIndex(cltidx);
2146 	}
2147 	if (servercollatefunc == 0)
2148 		return cltidx;
2149 	if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2150 		return cltidx;
2151 	oLib[iLib]->syn_file->collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
2152 	return oLib[iLib]->syn_file->get_clt_file(servercollatefunc-1)->GetOrigIndex(cltidx);
2153 }
2154 
GetSuggestWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2155 const gchar *Libs::GetSuggestWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2156 {
2157 	const gchar *poCurrentWord = NULL;
2158 	const gchar *word;
2159 	gint best =0;
2160 	gint back;
2161 	std::vector<InstantDictIndex>::size_type iLib;
2162 	std::vector<Dict *>::size_type iRealLib;
2163 	for (iLib=0; iLib < dictmask.size(); iLib++) {
2164 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2165 			continue;
2166 		if(iCurrent[iLib].idx_suggest == INVALID_INDEX || iCurrent[iLib].idx_suggest == UNSET_INDEX)
2167 			continue;
2168 		iRealLib = dictmask[iLib].index;
2169 		if ( poCurrentWord == NULL ) {
2170 			poCurrentWord = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2171 			best = prefix_match (sWord, poCurrentWord);
2172 		} else {
2173 			word = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2174 			back = prefix_match (sWord, word);
2175 			if (back > best) {
2176 				best = back;
2177 				poCurrentWord = word;
2178 			} else if (back == best) {
2179 				gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2180 				if (x > 0) {
2181 					poCurrentWord = word;
2182 				}
2183 			}
2184 		}
2185 	}
2186 	for (iLib=0; iLib<dictmask.size(); iLib++) {
2187 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2188 			continue;
2189 		if (iCurrent[iLib].synidx_suggest==INVALID_INDEX || iCurrent[iLib].synidx_suggest==UNSET_INDEX)
2190 			continue;
2191 		iRealLib = dictmask[iLib].index;
2192 		if ( poCurrentWord == NULL ) {
2193 			poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2194 			best = prefix_match (sWord, poCurrentWord);
2195 		} else {
2196 			word = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2197 			back = prefix_match (sWord, word);
2198 			if (back > best) {
2199 				best = back;
2200 				poCurrentWord = word;
2201 			} else if (back == best) {
2202 				gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2203 				if (x > 0) {
2204 					poCurrentWord = word;
2205 				}
2206 			}
2207 		}
2208 	}
2209 	return poCurrentWord;
2210 }
2211 
poGetCurrentWord(CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2212 const gchar *Libs::poGetCurrentWord(CurrentIndex * iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2213 {
2214 	const gchar *poCurrentWord = NULL;
2215 	const gchar *word;
2216 	std::vector<InstantDictIndex>::size_type iLib;
2217 	std::vector<Dict *>::size_type iRealLib;
2218 	for (iLib=0; iLib < dictmask.size(); iLib++) {
2219 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2220 			continue;
2221 		iRealLib = dictmask[iLib].index;
2222 		if (iCurrent[iLib].idx==INVALID_INDEX)
2223 			continue;
2224 		if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2225 			continue;
2226 		if ( poCurrentWord == NULL ) {
2227 			poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2228 		} else {
2229 			word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2230 			gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2231 			if (x > 0) {
2232 				poCurrentWord = word;
2233 			}
2234 		}
2235 	}
2236 	for (iLib=0; iLib<dictmask.size(); iLib++) {
2237 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2238 			continue;
2239 		iRealLib = dictmask[iLib].index;
2240 		if (iCurrent[iLib].synidx==UNSET_INDEX)
2241 			continue;
2242 		if (iCurrent[iLib].synidx==INVALID_INDEX)
2243 			continue;
2244 		if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2245 			continue;
2246 		if ( poCurrentWord == NULL ) {
2247 			poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2248 		} else {
2249 			word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2250 			gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2251 			if (x > 0) {
2252 				poCurrentWord = word;
2253 			}
2254 		}
2255 	}
2256 	return poCurrentWord;
2257 }
2258 
2259 const gchar *
poGetNextWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2260 Libs::poGetNextWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2261 {
2262 	// the input can be:
2263 	// (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
2264 	// (NULL,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
2265 	const gchar *poCurrentWord = NULL;
2266 	std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2267 	bool isLib = false;
2268 	const gchar *word;
2269 
2270 	std::vector<InstantDictIndex>::size_type iLib;
2271 	std::vector<Dict *>::size_type iRealLib;
2272 	for (iLib=0; iLib < dictmask.size(); iLib++) {
2273 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2274 			continue;
2275 		iRealLib = dictmask[iLib].index;
2276 		if (sWord) {
2277 			oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, CollationLevel, servercollatefunc);
2278 		}
2279 		if (iCurrent[iLib].idx==INVALID_INDEX)
2280 			continue;
2281 		if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2282 			continue;
2283 		if (poCurrentWord == NULL ) {
2284 			poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2285 			iCurrentLib = iLib;
2286 			iCurrentRealLib = iRealLib;
2287 			isLib=true;
2288 		} else {
2289 			gint x;
2290 			word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2291 			x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2292 			if (x > 0) {
2293 				poCurrentWord = word;
2294 				iCurrentLib = iLib;
2295 				iCurrentRealLib = iRealLib;
2296 				isLib=true;
2297 			}
2298 		}
2299 	}
2300 	for (iLib=0; iLib < dictmask.size(); iLib++) {
2301 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2302 			continue;
2303 		iRealLib = dictmask[iLib].index;
2304 		if (sWord) {
2305 			oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, CollationLevel, servercollatefunc);
2306 		}
2307 		if (iCurrent[iLib].synidx==UNSET_INDEX)
2308 			continue;
2309 		if (iCurrent[iLib].synidx==INVALID_INDEX)
2310 			continue;
2311 		if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2312 			continue;
2313 		if (poCurrentWord == NULL ) {
2314 			poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2315 			iCurrentLib = iLib;
2316 			iCurrentRealLib = iRealLib;
2317 			isLib=false;
2318 		} else {
2319 			gint x;
2320 			word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2321 			x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2322 			if (x > 0 ) {
2323 				poCurrentWord = word;
2324 				iCurrentLib = iLib;
2325 				iCurrentRealLib = iRealLib;
2326 				isLib=false;
2327 			}
2328 		}
2329 	}
2330 	if (poCurrentWord) {
2331 		for (iLib=0; iLib < dictmask.size(); iLib++) {
2332 			if (dictmask[iLib].type != InstantDictType_LOCAL)
2333 				continue;
2334 			iRealLib = dictmask[iLib].index;
2335 			if (isLib && (iLib == iCurrentLib))
2336 				continue;
2337 			if (iCurrent[iLib].idx==INVALID_INDEX)
2338 				continue;
2339 			if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2340 				continue;
2341 			word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2342 			if (strcmp(poCurrentWord, word) == 0) {
2343 				GetWordNext(iCurrent[iLib].idx, iRealLib, true, servercollatefunc);
2344 			}
2345 		}
2346 		for (iLib=0; iLib < dictmask.size(); iLib++) {
2347 			if (dictmask[iLib].type != InstantDictType_LOCAL)
2348 				continue;
2349 			iRealLib = dictmask[iLib].index;
2350 			if ((!isLib) && (iLib == iCurrentLib))
2351 				continue;
2352 			if (iCurrent[iLib].synidx==UNSET_INDEX)
2353 				continue;
2354 			if (iCurrent[iLib].synidx==INVALID_INDEX)
2355 				continue;
2356 			if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2357 				continue;
2358 			word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2359 			if (strcmp(poCurrentWord, word) == 0) {
2360 				GetWordNext(iCurrent[iLib].synidx, iRealLib, false, servercollatefunc);
2361 			}
2362 		}
2363 		//GetWordNext will change poCurrentWord's content, so do it at the last.
2364 		if (isLib) {
2365 			GetWordNext(iCurrent[iCurrentLib].idx, iCurrentRealLib, true, servercollatefunc);
2366 		} else {
2367 			GetWordNext(iCurrent[iCurrentLib].synidx, iCurrentRealLib, false, servercollatefunc);
2368 		}
2369 		poCurrentWord = poGetCurrentWord(iCurrent, dictmask, servercollatefunc);
2370 	}
2371 	return poCurrentWord;
2372 }
2373 
2374 const gchar *
poGetPreWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2375 Libs::poGetPreWord(const gchar *sWord, CurrentIndex* iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2376 {
2377 	// used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
2378 	const gchar *poCurrentWord = NULL;
2379 	std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2380 	bool isLib = false;
2381 
2382 	const gchar *word;
2383 	glong pidx;
2384 	std::vector<InstantDictIndex>::size_type iLib;
2385 	std::vector<Dict *>::size_type iRealLib;
2386 	// lookup in index
2387 	for (iLib=0;iLib<dictmask.size();iLib++) {
2388 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2389 			continue;
2390 		iRealLib = dictmask[iLib].index;
2391 		if (sWord) {
2392 			oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, CollationLevel, servercollatefunc);
2393 		}
2394 		if (iCurrent[iLib].idx!=INVALID_INDEX) {
2395 			if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2396 				continue;
2397 		}
2398 		if ( poCurrentWord == NULL ) {
2399 			if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2400 				poCurrentWord = poGetWord(pidx, iRealLib, servercollatefunc);
2401 				iCurrentLib = iLib;
2402 				iCurrentRealLib = iRealLib;
2403 				isLib=true;
2404 			}
2405 		} else {
2406 			if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2407 				gint x;
2408 				word = poGetWord(pidx, iRealLib, servercollatefunc);
2409 				x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2410 				if (x < 0 ) {
2411 					poCurrentWord = word;
2412 					iCurrentLib = iLib;
2413 					iCurrentRealLib = iRealLib;
2414 					isLib=true;
2415 				}
2416 			}
2417 		}
2418 	}
2419 	// lookup synonyms
2420 	for (iLib=0;iLib<dictmask.size();iLib++) {
2421 		if (dictmask[iLib].type != InstantDictType_LOCAL)
2422 			continue;
2423 		iRealLib = dictmask[iLib].index;
2424 		if (sWord) {
2425 			oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, CollationLevel, servercollatefunc);
2426 		}
2427 		if (iCurrent[iLib].synidx==UNSET_INDEX)
2428 			continue;
2429 		if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2430 			if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2431 				continue;
2432 		}
2433 		if ( poCurrentWord == NULL ) {
2434 			if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2435 				poCurrentWord = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2436 				iCurrentLib = iLib;
2437 				iCurrentRealLib = iRealLib;
2438 				isLib=false;
2439 			}
2440 		} else {
2441 			if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2442 				gint x;
2443 				word = poGetSynonymWord(pidx,iRealLib, servercollatefunc);
2444 				x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2445 				if (x < 0 ) {
2446 					poCurrentWord = word;
2447 					iCurrentLib = iLib;
2448 					iCurrentRealLib = iRealLib;
2449 					isLib=false;
2450 				}
2451 			}
2452 		}
2453 	}
2454 	if (poCurrentWord) {
2455 		/* poCurrentWord - the "previous" word for the sWord word among all word in
2456 		 * all local dictionaries specified by dictmask */
2457 		for (iLib=0;iLib<dictmask.size();iLib++) {
2458 			if (dictmask[iLib].type != InstantDictType_LOCAL)
2459 				continue;
2460 			iRealLib = dictmask[iLib].index;
2461 			if (isLib && (iLib == iCurrentLib))
2462 				continue;
2463 			if (iCurrent[iLib].idx!=INVALID_INDEX) {
2464 				if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2465 					continue;
2466 			}
2467 			if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2468 				word = poGetWord(pidx, iRealLib, servercollatefunc);
2469 				if (strcmp(poCurrentWord, word) == 0) {
2470 					iCurrent[iLib].idx=pidx;
2471 				}
2472 			}
2473 		}
2474 		for (iLib=0;iLib<dictmask.size();iLib++) {
2475 			if (dictmask[iLib].type != InstantDictType_LOCAL)
2476 				continue;
2477 			iRealLib = dictmask[iLib].index;
2478 			if ((!isLib) && (iLib == iCurrentLib))
2479 				continue;
2480 			if (iCurrent[iLib].synidx==UNSET_INDEX)
2481 				continue;
2482 			if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2483 				if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2484 					continue;
2485 			}
2486 			if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2487 				word = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2488 				if (strcmp(poCurrentWord, word) == 0) {
2489 					iCurrent[iLib].synidx=pidx;
2490 				}
2491 			}
2492 		}
2493 		if (isLib) {
2494 			GetWordPrev(iCurrent[iCurrentLib].idx, pidx, iCurrentRealLib, true, servercollatefunc);
2495 			iCurrent[iCurrentLib].idx = pidx;
2496 		} else {
2497 			GetWordPrev(iCurrent[iCurrentLib].synidx, pidx, iCurrentRealLib, false, servercollatefunc);
2498 			iCurrent[iCurrentLib].synidx = pidx;
2499 		}
2500 	}
2501 	return poCurrentWord;
2502 }
2503 
LookupSynonymSimilarWord(const gchar * sWord,glong & iSynonymWordIndex,glong & synidx_suggest,size_t iLib,int servercollatefunc)2504 bool Libs::LookupSynonymSimilarWord(const gchar* sWord, glong &iSynonymWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2505 {
2506 	if (oLib[iLib]->syn_file.get() == NULL)
2507 		return false;
2508 
2509 	glong iIndex;
2510 	glong iIndex_suggest;
2511 	bool bFound=false;
2512 	gchar *casestr;
2513 	bool bLookup;
2514 
2515 	if (!bFound) {
2516 		// to lower case.
2517 		casestr = g_utf8_strdown(sWord, -1);
2518 		if (strcmp(casestr, sWord)) {
2519 			bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2520 			if(bLookup)
2521 				bFound=true;
2522 		}
2523 		g_free(casestr);
2524 		// to upper case.
2525 		if (!bFound) {
2526 			casestr = g_utf8_strup(sWord, -1);
2527 			if (strcmp(casestr, sWord)) {
2528 				bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2529 				if(bLookup)
2530 					bFound=true;
2531 			}
2532 			g_free(casestr);
2533 		}
2534 		// Upper the first character and lower others.
2535 		if (!bFound) {
2536 			gchar *nextchar = g_utf8_next_char(sWord);
2537 			gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2538 			nextchar = g_utf8_strdown(nextchar, -1);
2539 			casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2540 			g_free(firstchar);
2541 			g_free(nextchar);
2542 			if (strcmp(casestr, sWord)) {
2543 				bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2544 				if(bLookup)
2545 					bFound=true;
2546 			}
2547 			g_free(casestr);
2548 		}
2549 		if (!bFound) {
2550 			iIndex = iSynonymWordIndex;
2551 			glong pidx;
2552 			const gchar *cword;
2553 			do {
2554 				if (GetWordPrev(iIndex, pidx, iLib, false, servercollatefunc)) {
2555 					cword = poGetSynonymWord(pidx, iLib, servercollatefunc);
2556 					if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2557 						iIndex = pidx;
2558 						bFound=true;
2559 					} else {
2560 						break;
2561 					}
2562 				} else {
2563 					break;
2564 				}
2565 			} while (true);
2566 			if (!bFound) {
2567 				if (iIndex!=INVALID_INDEX) {
2568 					cword = poGetSynonymWord(iIndex, iLib, servercollatefunc);
2569 					if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2570 						bFound=true;
2571 					}
2572 				}
2573 			}
2574 		}
2575 	}
2576 	if (bFound) {
2577 		iSynonymWordIndex = iIndex;
2578 		synidx_suggest = iIndex_suggest;
2579 	}
2580 	return bFound;
2581 }
2582 
2583 /* A helper function for LookupSimilarWord method.
2584  * It accepts too many parameters but simplifies the main function a bit...
2585  * Return value - whether the lookup was successful.
2586  * idx_suggest is updated if a better partial match is found. */
LookupSimilarWordTryWord(const gchar * sTryWord,const gchar * sWord,int servercollatefunc,size_t iLib,glong & iIndex,glong & idx_suggest,gint & best_match)2587 bool Libs::LookupSimilarWordTryWord(const gchar *sTryWord, const gchar *sWord,
2588 	int servercollatefunc, size_t iLib,
2589 	glong &iIndex, glong &idx_suggest, gint &best_match)
2590 {
2591 	glong iIndexSuggest;
2592 	if(oLib[iLib]->Lookup(sTryWord, iIndex, iIndexSuggest, CollationLevel, servercollatefunc)) {
2593 		best_match = g_utf8_strlen(sTryWord, -1);
2594 		idx_suggest = iIndexSuggest;
2595 		return true;
2596 	} else {
2597 		gint cur_match = prefix_match(sWord, poGetWord(iIndexSuggest, iLib, servercollatefunc));
2598 		if(cur_match > best_match) {
2599 			best_match = cur_match;
2600 			idx_suggest = iIndexSuggest;
2601 		}
2602 		return false;
2603 	}
2604 }
2605 
2606 /* Search for a word similar to sWord.
2607  * Return true if a similar word is found.
2608  * If a similar word is found, iWordIndex and idx_suggest point to the found word.
2609  * If a similar word is not found, idx_suggest points to the best partial match
2610  * found so far, iWordIndex does not change.
2611  * Input parameters:
2612  * iWordIndex must be initialized with a valid index. The value is used a basis
2613  * for searching a similar word. iWordIndex may be INVALID_INDEX.
2614  * idx_suggest must be initialized. If it is a valid index, it participates in
2615  * searching for the best partial match. */
LookupSimilarWord(const gchar * sWord,glong & iWordIndex,glong & idx_suggest,size_t iLib,int servercollatefunc)2616 bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2617 {
2618 	glong iIndex;
2619 	bool bFound=false;
2620 	gchar *casestr;
2621 	gint best_match = 0;
2622 
2623 	if(idx_suggest != UNSET_INDEX && idx_suggest != INVALID_INDEX) {
2624 		best_match = prefix_match(sWord, poGetWord(idx_suggest, iLib, servercollatefunc));
2625 	}
2626 
2627 	if (!bFound) {
2628 		// to lower case.
2629 		casestr = g_utf8_strdown(sWord, -1);
2630 		if (strcmp(casestr, sWord)) {
2631 			if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2632 				bFound=true;
2633 		}
2634 		g_free(casestr);
2635 		// to upper case.
2636 		if (!bFound) {
2637 			casestr = g_utf8_strup(sWord, -1);
2638 			if (strcmp(casestr, sWord)) {
2639 				if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2640 					bFound=true;
2641 			}
2642 			g_free(casestr);
2643 		}
2644 		// Upper the first character and lower others.
2645 		if (!bFound) {
2646 			gchar *nextchar = g_utf8_next_char(sWord);
2647 			gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2648 			nextchar = g_utf8_strdown(nextchar, -1);
2649 			casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2650 			g_free(firstchar);
2651 			g_free(nextchar);
2652 			if (strcmp(casestr, sWord)) {
2653 				if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2654 					bFound=true;
2655 			}
2656 			g_free(casestr);
2657 		}
2658 		// compare with the preceding words in the index case-insensitive
2659 		// iWordIndex - the base index
2660 		if (!bFound) {
2661 			iIndex = iWordIndex;
2662 			glong pidx;
2663 			const gchar *cword;
2664 			do {
2665 				if (GetWordPrev(iIndex, pidx, iLib, true, servercollatefunc)) {
2666 					cword = poGetWord(pidx, iLib, servercollatefunc);
2667 					if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2668 						iIndex = pidx;
2669 						bFound=true;
2670 					} else {
2671 						gint cur_match = prefix_match(sWord, cword);
2672 						if(cur_match > best_match) {
2673 							best_match = cur_match;
2674 							idx_suggest = pidx;
2675 						}
2676 						break;
2677 					}
2678 				} else {
2679 					break;
2680 				}
2681 			} while (true);
2682 			if (!bFound) {
2683 				if (iIndex!=INVALID_INDEX) {
2684 					cword = poGetWord(iIndex, iLib, servercollatefunc);
2685 					if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2686 						bFound=true;
2687 					} else {
2688 						gint cur_match = prefix_match(sWord, cword);
2689 						if(cur_match > best_match) {
2690 							best_match = cur_match;
2691 							idx_suggest = iIndex;
2692 						}
2693 					}
2694 				}
2695 			}
2696 			if(bFound) {
2697 				best_match = g_utf8_strlen(poGetWord(iIndex, iLib, servercollatefunc), -1);
2698 				idx_suggest = iIndex;
2699 			}
2700 		}
2701 	}
2702 
2703 	if (IsASCII(sWord)) {
2704 		// If not Found, try other status of sWord.
2705 		size_t iWordLen=strlen(sWord);
2706 		bool isupcase;
2707 
2708 		gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
2709 
2710 		//cut one char "s" or "d"
2711 		if(!bFound && iWordLen>1) {
2712 			isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2);
2713 			if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2714 				strcpy(sNewWord,sWord);
2715 				sNewWord[iWordLen-1]='\0'; // cut "s" or "d"
2716 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2717 					bFound=true;
2718 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2719 					casestr = g_ascii_strdown(sNewWord, -1);
2720 					if (strcmp(casestr, sNewWord)) {
2721 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2722 							bFound=true;
2723 					}
2724 					g_free(casestr);
2725 				}
2726 			}
2727 		}
2728 
2729 		//cut "ly"
2730 		if(!bFound && iWordLen>2) {
2731 			isupcase = !strncmp(&sWord[iWordLen-2],"LY",2);
2732 			if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) {
2733 				strcpy(sNewWord,sWord);
2734 				sNewWord[iWordLen-2]='\0';  // cut "ly"
2735 				if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4]
2736 				    && !bIsVowel(sNewWord[iWordLen-4]) &&
2737 				    bIsVowel(sNewWord[iWordLen-5])) {//doubled
2738 
2739 					sNewWord[iWordLen-3]='\0';
2740 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2741 						bFound=true;
2742 					else {
2743 						if (isupcase || g_ascii_isupper(sWord[0])) {
2744 							casestr = g_ascii_strdown(sNewWord, -1);
2745 							if (strcmp(casestr, sNewWord)) {
2746 								if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2747 									bFound=true;
2748 							}
2749 							g_free(casestr);
2750 						}
2751 						if (!bFound)
2752 							sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
2753 					}
2754 				}
2755 				if (!bFound) {
2756 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2757 						bFound=true;
2758 					else if (isupcase || g_ascii_isupper(sWord[0])) {
2759 						casestr = g_ascii_strdown(sNewWord, -1);
2760 						if (strcmp(casestr, sNewWord)) {
2761 							if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2762 								bFound=true;
2763 						}
2764 						g_free(casestr);
2765 					}
2766 				}
2767 			}
2768 		}
2769 
2770 		//cut "ing"
2771 		if(!bFound && iWordLen>3) {
2772 			isupcase = !strncmp(&sWord[iWordLen-3],"ING",3);
2773 			if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) {
2774 				strcpy(sNewWord,sWord);
2775 				sNewWord[iWordLen-3]='\0';
2776 				if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])
2777 				     && !bIsVowel(sNewWord[iWordLen-5]) &&
2778 				     bIsVowel(sNewWord[iWordLen-6])) {  //doubled
2779 					sNewWord[iWordLen-4]='\0';
2780 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2781 						bFound=true;
2782 					else {
2783 						if (isupcase || g_ascii_isupper(sWord[0])) {
2784 							casestr = g_ascii_strdown(sNewWord, -1);
2785 							if (strcmp(casestr, sNewWord)) {
2786 								if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2787 									bFound=true;
2788 							}
2789 							g_free(casestr);
2790 						}
2791 						if (!bFound)
2792 							sNewWord[iWordLen-4]=sNewWord[iWordLen-5];  //restore
2793 					}
2794 				}
2795 				if( !bFound ) {
2796 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2797 						bFound=true;
2798 					else if (isupcase || g_ascii_isupper(sWord[0])) {
2799 						casestr = g_ascii_strdown(sNewWord, -1);
2800 						if (strcmp(casestr, sNewWord)) {
2801 							if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2802 								bFound=true;
2803 						}
2804 						g_free(casestr);
2805 					}
2806 				}
2807 				if(!bFound) {
2808 					if (isupcase)
2809 						strcat(sNewWord,"E"); // add a char "E"
2810 					else
2811 						strcat(sNewWord,"e"); // add a char "e"
2812 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2813 						bFound=true;
2814 					else if (isupcase || g_ascii_isupper(sWord[0])) {
2815 						casestr = g_ascii_strdown(sNewWord, -1);
2816 						if (strcmp(casestr, sNewWord)) {
2817 							if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2818 								bFound=true;
2819 						}
2820 						g_free(casestr);
2821 					}
2822 				}
2823 			}
2824 		}
2825 
2826 		//cut two char "es"
2827 		if(!bFound && iWordLen>3) {
2828 			isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) &&
2829 				    (sWord[iWordLen-3] == 'S' ||
2830 				     sWord[iWordLen-3] == 'X' ||
2831 				     sWord[iWordLen-3] == 'O' ||
2832 				     (iWordLen >4 && sWord[iWordLen-3] == 'H' &&
2833 				      (sWord[iWordLen-4] == 'C' ||
2834 				       sWord[iWordLen-4] == 'S'))));
2835 			if (isupcase ||
2836 			    (!strncmp(&sWord[iWordLen-2],"es",2) &&
2837 			     (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' ||
2838 			      sWord[iWordLen-3] == 'o' ||
2839 			      (iWordLen >4 && sWord[iWordLen-3] == 'h' &&
2840 			       (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) {
2841 				strcpy(sNewWord,sWord);
2842 				sNewWord[iWordLen-2]='\0';
2843 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2844 					bFound=true;
2845 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2846 					casestr = g_ascii_strdown(sNewWord, -1);
2847 					if (strcmp(casestr, sNewWord)) {
2848 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2849 							bFound=true;
2850 					}
2851 					g_free(casestr);
2852 				}
2853 			}
2854 		}
2855 
2856 		//cut "ed"
2857 		if (!bFound && iWordLen>3) {
2858 			isupcase = !strncmp(&sWord[iWordLen-2],"ED",2);
2859 			if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2860 				strcpy(sNewWord,sWord);
2861 				sNewWord[iWordLen-2]='\0';
2862 				if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
2863 				    && !bIsVowel(sNewWord[iWordLen-4]) &&
2864 				    bIsVowel(sNewWord[iWordLen-5])) {//doubled
2865 					sNewWord[iWordLen-3]='\0';
2866 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2867 						bFound=true;
2868 					else {
2869 						if (isupcase || g_ascii_isupper(sWord[0])) {
2870 							casestr = g_ascii_strdown(sNewWord, -1);
2871 							if (strcmp(casestr, sNewWord)) {
2872 								if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2873 									bFound=true;
2874 							}
2875 							g_free(casestr);
2876 						}
2877 						if (!bFound)
2878 							sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
2879 					}
2880 				}
2881 				if (!bFound) {
2882 					if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2883 						bFound=true;
2884 					else if (isupcase || g_ascii_isupper(sWord[0])) {
2885 						casestr = g_ascii_strdown(sNewWord, -1);
2886 						if (strcmp(casestr, sNewWord)) {
2887 							if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2888 								bFound=true;
2889 						}
2890 						g_free(casestr);
2891 					}
2892 				}
2893 			}
2894 		}
2895 
2896 		// cut "ied" , add "y".
2897 		if (!bFound && iWordLen>3) {
2898 			isupcase = !strncmp(&sWord[iWordLen-3],"IED",3);
2899 			if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) {
2900 				strcpy(sNewWord,sWord);
2901 				sNewWord[iWordLen-3]='\0';
2902 				if (isupcase)
2903 					strcat(sNewWord,"Y"); // add a char "Y"
2904 				else
2905 					strcat(sNewWord,"y"); // add a char "y"
2906 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2907 					bFound=true;
2908 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2909 					casestr = g_ascii_strdown(sNewWord, -1);
2910 					if (strcmp(casestr, sNewWord)) {
2911 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2912 							bFound=true;
2913 					}
2914 					g_free(casestr);
2915 				}
2916 			}
2917 		}
2918 
2919 		// cut "ies" , add "y".
2920 		if (!bFound && iWordLen>3) {
2921 			isupcase = !strncmp(&sWord[iWordLen-3],"IES",3);
2922 			if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) {
2923 				strcpy(sNewWord,sWord);
2924 				sNewWord[iWordLen-3]='\0';
2925 				if (isupcase)
2926 					strcat(sNewWord,"Y"); // add a char "Y"
2927 				else
2928 					strcat(sNewWord,"y"); // add a char "y"
2929 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2930 					bFound=true;
2931 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2932 					casestr = g_ascii_strdown(sNewWord, -1);
2933 					if (strcmp(casestr, sNewWord)) {
2934 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2935 							bFound=true;
2936 					}
2937 					g_free(casestr);
2938 				}
2939 			}
2940 		}
2941 
2942 		// cut "er".
2943 		if (!bFound && iWordLen>2) {
2944 			isupcase = !strncmp(&sWord[iWordLen-2],"ER",2);
2945 			if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) {
2946 				strcpy(sNewWord,sWord);
2947 				sNewWord[iWordLen-2]='\0';
2948 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2949 					bFound=true;
2950 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2951 					casestr = g_ascii_strdown(sNewWord, -1);
2952 					if (strcmp(casestr, sNewWord)) {
2953 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2954 							bFound=true;
2955 					}
2956 					g_free(casestr);
2957 				}
2958 			}
2959 		}
2960 
2961 		// cut "est".
2962 		if (!bFound && iWordLen>3) {
2963 			isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3);
2964 			if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) {
2965 				strcpy(sNewWord,sWord);
2966 				sNewWord[iWordLen-3]='\0';
2967 				if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2968 					bFound=true;
2969 				else if (isupcase || g_ascii_isupper(sWord[0])) {
2970 					casestr = g_ascii_strdown(sNewWord, -1);
2971 					if (strcmp(casestr, sNewWord)) {
2972 						if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2973 							bFound=true;
2974 					}
2975 					g_free(casestr);
2976 				}
2977 			}
2978 		}
2979 
2980 		g_free(sNewWord);
2981 	}
2982 
2983 	if (bFound)
2984 		iWordIndex = iIndex;
2985 #if 0
2986 	else {
2987 		//don't change iWordIndex here.
2988 		//when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.
2989 		//iWordIndex = INVALID_INDEX;
2990 	}
2991 #endif
2992 	return bFound;
2993 }
2994 
SimpleLookupWord(const gchar * sWord,glong & iWordIndex,glong & idx_suggest,size_t iLib,int servercollatefunc)2995 bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2996 {
2997 	bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex, idx_suggest, CollationLevel, servercollatefunc);
2998 	if (!bFound)
2999 		bFound = LookupSimilarWord(sWord, iWordIndex, idx_suggest, iLib, servercollatefunc);
3000 	return bFound;
3001 }
3002 
SimpleLookupSynonymWord(const gchar * sWord,glong & iWordIndex,glong & synidx_suggest,size_t iLib,int servercollatefunc)3003 bool Libs::SimpleLookupSynonymWord(const gchar* sWord, glong & iWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
3004 {
3005 	bool bFound = oLib[iLib]->LookupSynonym(sWord, iWordIndex, synidx_suggest, CollationLevel, servercollatefunc);
3006 	if (!bFound)
3007 		bFound = LookupSynonymSimilarWord(sWord, iWordIndex, synidx_suggest, iLib, servercollatefunc);
3008 	return bFound;
3009 }
3010 
3011 struct Fuzzystruct {
3012 	char * pMatchWord;
3013 	int iMatchWordDistance;
3014 };
3015 
operator <(const Fuzzystruct & lh,const Fuzzystruct & rh)3016 static inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {
3017 	if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
3018 		return lh.iMatchWordDistance<rh.iMatchWordDistance;
3019 
3020 	if (lh.pMatchWord && rh.pMatchWord)
3021 		return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
3022 
3023 	return false;
3024 }
3025 
unicode_strdown(gunichar * str)3026 static inline void unicode_strdown(gunichar *str)
3027 {
3028 	while (*str) {
3029 		*str=g_unichar_tolower(*str);
3030 		++str;
3031 	}
3032 }
3033 
LookupWithFuzzy(const gchar * sWord,gchar * reslist[],gint reslist_size,std::vector<InstantDictIndex> & dictmask)3034 bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size, std::vector<InstantDictIndex> &dictmask)
3035 {
3036 	if (sWord[0] == '\0')
3037 		return false;
3038 
3039 	std::vector<Fuzzystruct> oFuzzystruct(reslist_size);
3040 
3041 	for (int i=0; i<reslist_size; i++) {
3042 		oFuzzystruct[i].pMatchWord = NULL;
3043 		oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
3044 	}
3045 	int iMaxDistance = iMaxFuzzyDistance;
3046 	int iDistance;
3047 	bool Found = false;
3048 	EditDistance oEditDistance;
3049 
3050 	glong iCheckWordLen;
3051 	const char *sCheck;
3052 	gunichar *ucs4_str1, *ucs4_str2;
3053 	glong ucs4_str2_len;
3054 
3055 	ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
3056 	unicode_strdown(ucs4_str2);
3057 
3058 	std::vector<Dict *>::size_type iRealLib;
3059 	for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3060 		if (dictmask[iLib].type != InstantDictType_LOCAL)
3061 			continue;
3062 		iRealLib = dictmask[iLib].index;
3063 		for (gint synLib=0; synLib<2; synLib++) {
3064 			if (synLib==1) {
3065 				if (oLib[iRealLib]->syn_file.get()==NULL)
3066 					break;
3067 			}
3068 			show_progress->notify_about_work();
3069 
3070 			//if (stardict_strcmp(sWord, poGetWord(0,iRealLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iRealLib)-1,iRealLib))<=0) {
3071 			//there are Chinese dicts and English dicts...
3072 			if (TRUE) {
3073 				glong iwords;
3074 				if (synLib==0)
3075 					iwords = narticles(iRealLib);
3076 				else
3077 					iwords = nsynarticles(iRealLib);
3078 				for (glong index=0; index<iwords; index++) {
3079 					// Need to deal with same word in index? But this will slow down processing in most case.
3080 					if (synLib==0)
3081 						sCheck = poGetOrigWord(index,iRealLib);
3082 					else
3083 						sCheck = poGetOrigSynonymWord(index,iRealLib);
3084 					// tolower and skip too long or too short words
3085 					iCheckWordLen = g_utf8_strlen(sCheck, -1);
3086 					if (iCheckWordLen-ucs4_str2_len>=iMaxDistance ||
3087 					    ucs4_str2_len-iCheckWordLen>=iMaxDistance)
3088 						continue;
3089 					ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);
3090 					if (iCheckWordLen > ucs4_str2_len)
3091 						ucs4_str1[ucs4_str2_len]=0;
3092 					unicode_strdown(ucs4_str1);
3093 
3094 					iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);
3095 					g_free(ucs4_str1);
3096 					if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) {
3097 						// when ucs4_str2_len=1,2 we need less fuzzy.
3098 						Found = true;
3099 						bool bAlreadyInList = false;
3100 						int iMaxDistanceAt=0;
3101 						for (int j=0; j<reslist_size; j++) {
3102 							if (oFuzzystruct[j].pMatchWord &&
3103 							    strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list
3104 								bAlreadyInList = true;
3105 								break;
3106 							}
3107 							//find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
3108 							if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) {
3109 								iMaxDistanceAt = j;
3110 							}
3111 						}
3112 						if (!bAlreadyInList) {
3113 							if (oFuzzystruct[iMaxDistanceAt].pMatchWord)
3114 								g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);
3115 							oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);
3116 							oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;
3117 							// calc new iMaxDistance
3118 							iMaxDistance = iDistance;
3119 							for (int j=0; j<reslist_size; j++) {
3120 								if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)
3121 									iMaxDistance = oFuzzystruct[j].iMatchWordDistance;
3122 							} // calc new iMaxDistance
3123 						}   // add to list
3124 					}   // find one
3125 				}   // each word
3126 			}   // ok for search
3127 		}  // synLib
3128 	}   // each lib
3129 	g_free(ucs4_str2);
3130 
3131 	if (Found)// sort with distance
3132 		std::sort(oFuzzystruct.begin(), oFuzzystruct.end());
3133 
3134 	for (gint i=0; i<reslist_size; ++i)
3135 		reslist[i]=oFuzzystruct[i].pMatchWord;
3136 
3137 	return Found;
3138 }
3139 
less_for_compare(const char * lh,const char * rh)3140 static inline bool less_for_compare(const char *lh, const char *rh) {
3141 	return stardict_strcmp(lh, rh)<0;
3142 }
3143 
LookupWithRule(const gchar * word,gchar ** ppMatchWord,std::vector<InstantDictIndex> & dictmask)3144 gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3145 {
3146 	glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3147 	gint iMatchCount = 0;
3148 	GPatternSpec *pspec = g_pattern_spec_new(word);
3149 
3150 	const gchar * sMatchWord;
3151 	bool bAlreadyInList;
3152 	std::vector<Dict *>::size_type iRealLib;
3153 	for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3154 		//if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3155 		// -iMatchCount,so save time,but may got less result and the word may repeat.
3156 		if (dictmask[iLib].type != InstantDictType_LOCAL)
3157 			continue;
3158 		iRealLib = dictmask[iLib].index;
3159 		if (oLib[iRealLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3160 			show_progress->notify_about_work();
3161 			for (int i=0; aiIndex[i]!=-1; i++) {
3162 				sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3163 				bAlreadyInList = false;
3164 				for (int j=0; j<iMatchCount; j++) {
3165 					if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3166 						bAlreadyInList = true;
3167 						break;
3168 					}
3169 				}
3170 				if (!bAlreadyInList)
3171 					ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3172 			}
3173 		}
3174 		if (oLib[iRealLib]->LookupWithRuleSynonym(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3175 			show_progress->notify_about_work();
3176 			for (int i=0; aiIndex[i]!=-1; i++) {
3177 				sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3178 				bAlreadyInList = false;
3179 				for (int j=0; j<iMatchCount; j++) {
3180 					if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3181 						bAlreadyInList = true;
3182 						break;
3183 					}
3184 				}
3185 				if (!bAlreadyInList)
3186 					ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3187 			}
3188 		}
3189 	}
3190 	g_pattern_spec_free(pspec);
3191 
3192 	if (iMatchCount)// sort it.
3193 		std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3194 	return iMatchCount;
3195 }
3196 
LookupWithRegex(const gchar * word,gchar ** ppMatchWord,std::vector<InstantDictIndex> & dictmask)3197 gint Libs::LookupWithRegex(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3198 {
3199 	glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3200 	gint iMatchCount = 0;
3201 	GRegex *regex = g_regex_new(word, G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
3202 
3203 	const gchar * sMatchWord;
3204 	bool bAlreadyInList;
3205 	std::vector<Dict *>::size_type iRealLib;
3206 	for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3207 		//if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3208 		// -iMatchCount,so save time,but may got less result and the word may repeat.
3209 		if (dictmask[iLib].type != InstantDictType_LOCAL)
3210 			continue;
3211 		iRealLib = dictmask[iLib].index;
3212 		if (oLib[iRealLib]->LookupWithRegex(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3213 			show_progress->notify_about_work();
3214 			for (int i=0; aiIndex[i]!=-1; i++) {
3215 				sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3216 				bAlreadyInList = false;
3217 				for (int j=0; j<iMatchCount; j++) {
3218 					if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3219 						bAlreadyInList = true;
3220 						break;
3221 					}
3222 				}
3223 				if (!bAlreadyInList)
3224 					ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3225 			}
3226 		}
3227 		if (oLib[iRealLib]->LookupWithRegexSynonym(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3228 			show_progress->notify_about_work();
3229 			for (int i=0; aiIndex[i]!=-1; i++) {
3230 				sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3231 				bAlreadyInList = false;
3232 				for (int j=0; j<iMatchCount; j++) {
3233 					if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3234 						bAlreadyInList = true;
3235 						break;
3236 					}
3237 				}
3238 				if (!bAlreadyInList)
3239 					ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3240 			}
3241 		}
3242 	}
3243 	g_regex_unref(regex);
3244 
3245 	if (iMatchCount)// sort it.
3246 		std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3247 	return iMatchCount;
3248 }
3249 
LookupData(const gchar * sWord,std::vector<gchar * > * reslist,updateSearchDialog_func search_func,gpointer search_data,bool * cancel,std::vector<InstantDictIndex> & dictmask)3250 bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist, updateSearchDialog_func search_func, gpointer search_data, bool *cancel, std::vector<InstantDictIndex> &dictmask)
3251 {
3252 	std::vector<std::string> SearchWords;
3253 	std::string SearchWord;
3254 	const char *p=sWord;
3255 	while (*p) {
3256 		if (*p=='\\') {
3257 			p++;
3258 			switch (*p) {
3259 			case ' ':
3260 				SearchWord+=' ';
3261 				break;
3262 			case '\\':
3263 				SearchWord+='\\';
3264 				break;
3265 			case 't':
3266 				SearchWord+='\t';
3267 				break;
3268 			case 'n':
3269 				SearchWord+='\n';
3270 				break;
3271 			default:
3272 				SearchWord+=*p;
3273 			}
3274 		} else if (*p == ' ') {
3275 			if (!SearchWord.empty()) {
3276 				SearchWords.push_back(SearchWord);
3277 				SearchWord.clear();
3278 			}
3279 		} else {
3280 			SearchWord+=*p;
3281 		}
3282 		p++;
3283 	}
3284 	if (!SearchWord.empty()) {
3285 		SearchWords.push_back(SearchWord);
3286 		SearchWord.clear();
3287 	}
3288 	if (SearchWords.empty())
3289 		return false;
3290 
3291 	glong search_count=0;
3292 	glong total_count=0;
3293 	if (search_func) {
3294 		for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3295 			if (dictmask[i].type == InstantDictType_LOCAL)
3296 				total_count += narticles(dictmask[i].index);
3297 		}
3298 	}
3299 
3300 	guint32 max_size =0;
3301 	gchar *origin_data = NULL;
3302 	std::vector<InstantDictIndex>::size_type iRealLib;
3303 	for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3304 		if (dictmask[i].type != InstantDictType_LOCAL)
3305 			continue;
3306 		iRealLib = dictmask[i].index;
3307 		if (!oLib[iRealLib]->containSearchData())
3308 			continue;
3309 		const gulong iwords = narticles(iRealLib);
3310 		const gchar *key;
3311 		guint32 offset, size;
3312 		for (gulong j=0; j<iwords; ++j) {
3313 			if (search_func) {
3314 				if (*cancel)
3315 					goto search_out;
3316 				if (search_count % 10000 == 0) {
3317 					search_func(search_data, (gdouble)search_count/(gdouble)total_count);
3318 				}
3319 				search_count++;
3320 			}
3321 			oLib[iRealLib]->get_key_and_data(j, &key, &offset, &size);
3322 			if (size>max_size) {
3323 				origin_data = (gchar *)g_realloc(origin_data, size);
3324 				max_size = size;
3325 			}
3326 			if (oLib[iRealLib]->SearchData(SearchWords, offset, size, origin_data)) {
3327 				if (reslist[i].empty() || strcmp(reslist[i].back(), key))
3328 					reslist[i].push_back(g_strdup(key));
3329 			}
3330 		}
3331 	}
3332 search_out:
3333 	g_free(origin_data);
3334 	//KMP_end();
3335 
3336 	std::vector<InstantDictIndex>::size_type i;
3337 	for (i=0; i<dictmask.size(); ++i)
3338 		if (!reslist[i].empty())
3339 			break;
3340 
3341 	return i!=dictmask.size();
3342 }
3343 
GetStorageType(size_t iLib)3344 StorageType Libs::GetStorageType(size_t iLib)
3345 {
3346 	if (oLib[iLib]->storage == NULL)
3347 		return StorageType_UNKNOWN;
3348 	return oLib[iLib]->storage->get_storage_type();
3349 }
3350 
GetStorageFilePath(size_t iLib,const std::string & key)3351 FileHolder Libs::GetStorageFilePath(size_t iLib, const std::string &key)
3352 {
3353 	if (oLib[iLib]->storage == NULL)
3354 		return FileHolder();
3355 	return oLib[iLib]->storage->get_file_path(key);
3356 }
3357 
GetStorageFileContent(size_t iLib,const std::string & key)3358 const char *Libs::GetStorageFileContent(size_t iLib, const std::string &key)
3359 {
3360 	if (oLib[iLib]->storage == NULL)
3361 		return NULL;
3362 	return oLib[iLib]->storage->get_file_content(key);
3363 }
3364 
init_collations()3365 void Libs::init_collations()
3366 {
3367 	if (CollationLevel == CollationLevel_SINGLE) {
3368 		if (utf8_collate_init(CollateFunction))
3369 			g_print("Init collate function failed!\n");
3370 	} else if (CollationLevel == CollationLevel_MULTI){
3371 		if (utf8_collate_init_all())
3372 			g_print("Init collate functions failed!\n");
3373 	}
3374 }
3375 
free_collations()3376 void Libs::free_collations()
3377 {
3378 	if(CollationLevel == CollationLevel_SINGLE)
3379 		utf8_collate_end(CollateFunction);
3380 	else if(CollationLevel == CollationLevel_MULTI)
3381 		utf8_collate_end_all();
3382 }
3383 
ValidateCollateParams(CollationLevelType & level,CollateFunctions & func)3384 void Libs::ValidateCollateParams(CollationLevelType& level, CollateFunctions& func)
3385 {
3386 	if(level == CollationLevel_SINGLE) {
3387 		if(func == COLLATE_FUNC_NONE) {
3388 			g_print(_("Invalid collate function. Disable collation."));
3389 			level = CollationLevel_NONE;
3390 		}
3391 	} else {
3392 		func = COLLATE_FUNC_NONE;
3393 	}
3394 }
3395