1 /*
2 * Copyright 2011 kubtek <kubtek@mail.com>
3 *
4 * This file is part of StarDict.
5 *
6 * StarDict is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * StarDict is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with StarDict. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21 * Implementation of class to work with standard StarDict's dictionaries
22 * lookup word, get articles and so on.
23 *
24 * Notice: read doc/StarDictFileFormat for the dictionary
25 * file's format information!
26 */
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30
31 #include <cstring>
32 #include <glib.h>
33 #include <glib/gi18n.h>
34 #include <glib/gstdio.h>
35 #include <stdlib.h>
36 #include <algorithm>
37 #include <memory>
38
39 #include "ifo_file.h"
40 #include "edit-distance.h"
41 //#include "kmp.h"
42 #include "mapfile.h"
43 #include "iappdirs.h"
44
45 #include "stddict.h"
46 #include "utils.h"
47
stardict_collate(const gchar * str1,const gchar * str2,CollateFunctions func)48 static gint stardict_collate(const gchar *str1, const gchar *str2, CollateFunctions func)
49 {
50 gint x = utf8_collate(str1, str2, func);
51 if (x == 0)
52 return strcmp(str1, str2);
53 else
54 return x;
55 }
56
stardict_server_collate(const gchar * str1,const gchar * str2,CollationLevelType CollationLevel,CollateFunctions func,int servercollatefunc)57 gint stardict_server_collate(const gchar *str1, const gchar *str2, CollationLevelType CollationLevel, CollateFunctions func, int servercollatefunc)
58 {
59 if (CollationLevel == CollationLevel_NONE)
60 return stardict_strcmp(str1, str2);
61 if (CollationLevel == CollationLevel_SINGLE)
62 return stardict_collate(str1, str2, func);
63 if (servercollatefunc == 0)
64 return stardict_strcmp(str1, str2);
65 return stardict_collate(str1, str2, (CollateFunctions)(servercollatefunc-1));
66 }
67
68 // not perfect case-insensitive comparison of strings
stardict_strcasecmp(const gchar * s1,const gchar * s2)69 static gint stardict_strcasecmp(const gchar *s1, const gchar *s2)
70 {
71 gchar *sci1 = g_utf8_casefold(s1, -1);
72 gchar *sci2 = g_utf8_casefold(s2, -1);
73 gint res = g_utf8_collate(sci1, sci2);
74 g_free(sci1);
75 g_free(sci2);
76 return res;
77 }
78
stardict_casecmp(const gchar * s1,const gchar * s2,CollationLevelType CollationLevel,CollateFunctions func,int servercollatefunc)79 static gint stardict_casecmp(const gchar *s1, const gchar *s2, CollationLevelType CollationLevel, CollateFunctions func, int servercollatefunc)
80 {
81 if (CollationLevel == CollationLevel_NONE)
82 return stardict_strcasecmp(s1, s2);
83 if (CollationLevel == CollationLevel_SINGLE)
84 return utf8_collate(s1, s2, func);
85 if (servercollatefunc == 0)
86 return stardict_strcasecmp(s1, s2);
87 return utf8_collate(s1, s2, (CollateFunctions)(servercollatefunc-1));
88 }
89
90 /* return the length of the common prefix of two strings in characters
91 * comparison is case-insensitive */
prefix_match(const gchar * s1,const gchar * s2)92 static inline gint prefix_match(const gchar *s1, const gchar *s2)
93 {
94 if(!s1 || !s2)
95 return 0;
96 gint ret=-1;
97 gunichar u1, u2;
98 do {
99 u1 = g_utf8_get_char(s1);
100 u2 = g_utf8_get_char(s2);
101 s1 = g_utf8_next_char(s1);
102 s2 = g_utf8_next_char(s2);
103 ret++;
104 } while (u1 && g_unichar_tolower(u1) == g_unichar_tolower(u2));
105 return ret;
106 }
107
108 /* check that string str has length allowed for index word
109 * strlen(str) < MAX_INDEX_KEY_SIZE
110 * This function does not read more than MAX_INDEX_KEY_SIZE or buf_size chars,
111 * which one is smaller.
112 * return value:
113 * true - ok,
114 * false - string length exceeded. */
check_key_str_len(const gchar * str,size_t buf_size)115 static bool check_key_str_len(const gchar* str, size_t buf_size)
116 {
117 size_t max = MAX_INDEX_KEY_SIZE;
118 if(buf_size < max)
119 max = buf_size;
120 for(size_t i = 0; i < max; ++i)
121 if(!str[i])
122 return true;
123 return false;
124 }
125
bIsVowel(gchar inputchar)126 static inline bool bIsVowel(gchar inputchar)
127 {
128 gchar ch = g_ascii_toupper(inputchar);
129 return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );
130 }
131
132 class offset_index : public index_file {
133 public:
134 offset_index();
135 ~offset_index();
136 bool load(const std::string& url, gulong wc, gulong fsize,
137 bool CreateCacheFile, CollationLevelType CollationLevel,
138 CollateFunctions _CollateFunction, show_progress_t *sp);
139 void get_data(glong idx);
140 const gchar *get_key_and_data(glong idx);
141 private:
142 const gchar *get_key(glong idx);
143 bool lookup(const char *str, glong &idx, glong &idx_suggest);
144
145 static const gint ENTR_PER_PAGE=32;
146
147 /* oft_file.get_wordoffset(page_num) - offset of the first element on the page
148 * number page_num. 0<= page_num <= npages-2
149 * oft_file.get_wordoffset(npages-1) - offset of the next to the last element
150 * in the index file
151 * oft_file.get_wordoffset(page_num+1) - oft_file.get_wordoffset(page_num)
152 * - size of data on the page number page_num, in bytes. */
153 cache_file oft_file;
154 FILE *idxfile;
155 /* number of pages = ((wordcount-1)/ENTR_PER_PAGE) + 2
156 * The page number npages-2 always contains at least one element.
157 * It may contain from 1 to ENTR_PER_PAGE elements.
158 * To be exact it contains nentr elements, that may be calculated as follows:
159 * nentr = wordcount%ENTR_PER_PAGE;
160 * if(nentr == 0)
161 * nentr = ENTR_PER_PAGE;
162 * The page number npages-1 (the last) is always empty. */
163 gulong npages;
164
165 // The length of "word_str" should be less than MAX_INDEX_KEY_SIZE.
166 // See doc/StarDictFileFormat.
167 gchar wordentry_buf[MAX_INDEX_KEY_SIZE+sizeof(guint32)*2];
168 struct index_entry {
169 glong idx; // page number
170 std::string keystr;
assignoffset_index::index_entry171 void assign(glong i, const std::string& str) {
172 idx=i;
173 keystr.assign(str);
174 }
175 };
176 /* first - first word on the first page - first word in the index
177 * last - first word on the pre-last page (last page addressing real data)
178 * middle - first word on the middle page
179 * read_last - last word in the index */
180 index_entry first, last, middle, real_last;
181
182 struct page_entry {
183 gchar *keystr;
184 guint32 off, size;
185 };
186 std::vector<gchar> page_data;
187 struct page_t {
188 glong idx;
189 page_entry entries[ENTR_PER_PAGE];
190
page_toffset_index::page_t191 page_t(): idx(-1) {}
192 void fill(gchar *data, gint nent, glong idx_);
193 } page;
194 gulong load_page(glong page_idx);
195 const gchar *read_first_on_page_key(glong page_idx);
196 const gchar *get_first_on_page_key(glong page_idx);
197 };
198
199 /* class for compressed index (file ends with ".gz") */
200 class compressed_index : public index_file {
201 public:
202 compressed_index();
203 ~compressed_index();
204 bool load(const std::string& url, gulong wc, gulong fsize,
205 bool CreateCacheFile, CollationLevelType CollationLevel,
206 CollateFunctions _CollateFunction, show_progress_t *sp);
207 void get_data(glong idx);
208 const gchar *get_key_and_data(glong idx);
209 private:
210 const gchar *get_key(glong idx);
211 bool lookup(const char *str, glong &idx, glong &idx_suggest);
212
213 /* whole uncompressed index file in memory */
214 gchar *idxdatabuf;
215 /* pointers to the words-keys in idxdatabuf. Each word is '\0'-terminated and
216 * followed by data offset and size. See ".idx" file format.
217 * wordlist.size() == number of words + 1 */
218 std::vector<gchar *> wordlist;
219 };
220
offset_index()221 offset_index::offset_index() : oft_file(CacheFileType_oft, COLLATE_FUNC_NONE)
222 {
223 idxfile = NULL;
224 npages = 0;
225 }
226
~offset_index()227 offset_index::~offset_index()
228 {
229 if (idxfile)
230 fclose(idxfile);
231 }
232
fill(gchar * data,gint nent,glong idx_)233 void offset_index::page_t::fill(gchar *data, gint nent, glong idx_)
234 {
235 idx=idx_;
236 gchar *p=data;
237 glong len;
238 for (gint i=0; i<nent; ++i) {
239 entries[i].keystr=p;
240 len=strlen(p);
241 p+=len+1;
242 entries[i].off=g_ntohl(get_uint32(p));
243 p+=sizeof(guint32);
244 entries[i].size=g_ntohl(get_uint32(p));
245 p+=sizeof(guint32);
246 }
247 }
248
read_first_on_page_key(glong page_idx)249 inline const gchar *offset_index::read_first_on_page_key(glong page_idx)
250 {
251 g_assert(gulong(page_idx+1) < npages);
252 fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
253 guint32 page_size=oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx);
254 gulong minsize = sizeof(wordentry_buf);
255 if (page_size < minsize) {
256 minsize = page_size;
257 }
258 size_t fread_size;
259 fread_size = fread(wordentry_buf, minsize, 1, idxfile);
260 if (fread_size != 1) {
261 g_print("fread error!\n");
262 }
263 if(!check_key_str_len(wordentry_buf, minsize)) {
264 wordentry_buf[minsize-1] = '\0';
265 g_critical("Index key length exceeds allowed limit. Key: %s, "
266 "max length = %i", wordentry_buf, MAX_INDEX_KEY_SIZE - 1);
267 return NULL;
268 }
269 return wordentry_buf;
270 }
271
get_first_on_page_key(glong page_idx)272 inline const gchar *offset_index::get_first_on_page_key(glong page_idx)
273 {
274 if (page_idx<middle.idx) {
275 if (page_idx==first.idx)
276 return first.keystr.c_str();
277 return read_first_on_page_key(page_idx);
278 } else if (page_idx>middle.idx) {
279 if (page_idx==last.idx)
280 return last.keystr.c_str();
281 return read_first_on_page_key(page_idx);
282 } else
283 return middle.keystr.c_str();
284 }
285
cache_file(CacheFileType _cachefiletype,CollateFunctions _cltfunc)286 cache_file::cache_file(CacheFileType _cachefiletype, CollateFunctions _cltfunc)
287 {
288 wordoffset = NULL;
289 npages = 0;
290 mf = NULL;
291 cachefiletype = _cachefiletype;
292 cltfunc = _cltfunc;
293 }
294
295
~cache_file()296 cache_file::~cache_file()
297 {
298 if (mf)
299 delete mf;
300 else
301 g_free(wordoffset);
302 }
303
304 #define OFFSETFILE_MAGIC_DATA "StarDict's oft file\nversion=2.4.8\n"
305 #define COLLATIONFILE_MAGIC_DATA "StarDict's clt file\nversion=2.4.8\n"
306
find_and_load_cache_file(const gchar * filename,const std::string & url,const std::string & saveurl,glong filedatasize,int next) const307 MapFile* cache_file::find_and_load_cache_file(const gchar *filename,
308 const std::string &url, const std::string &saveurl,
309 glong filedatasize, int next) const
310 {
311 stardict_stat_t cachestat;
312 if (g_stat(filename, &cachestat)!=0)
313 return NULL;
314 std::auto_ptr<MapFile> mf(new MapFile);
315 if (!mf->open(filename, cachestat.st_size))
316 return NULL;
317 guint32 word_off_size = (get_uint32(mf->begin()) + 1) * sizeof(guint32);
318 if (word_off_size >= static_cast<guint32>(cachestat.st_size) ||
319 *(mf->begin() + cachestat.st_size - 1) != '\0')
320 return NULL;
321
322 gchar *p = mf->begin() + word_off_size;
323 gboolean has_prefix;
324 if (cachefiletype == CacheFileType_oft)
325 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
326 else
327 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
328 if (!has_prefix)
329 return NULL;
330 if (cachefiletype == CacheFileType_oft)
331 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
332 else
333 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
334 gchar *p2;
335 p2 = strstr(p, "\nurl=");
336 if (!p2)
337 return NULL;
338 p2+=sizeof("\nurl=")-1;
339 gchar *p3;
340 p3 = strchr(p2, '\n');
341 if (!p3)
342 return NULL;
343 std::string tmpstr(p2, p3-p2);
344 #ifdef _WIN32
345 tmpstr = abs_path_to_data_dir(tmpstr);
346 #endif
347 if (is_equal_paths(saveurl, tmpstr)) {
348 if (cachefiletype == CacheFileType_clt) {
349 p2 = strstr(p, "\nfunc=");
350 if (!p2)
351 return NULL;
352 p2 += sizeof("\nfunc=")-1;
353 p3 = strchr(p2, '\n');
354 if (!p3)
355 return NULL;
356 tmpstr.assign(p2, p3-p2);
357 if (atoi(tmpstr.c_str())!=cltfunc)
358 return NULL;
359 }
360
361 if (static_cast<gulong>(cachestat.st_size)
362 != static_cast<gulong>(filedatasize + sizeof(guint32) + strlen(mf->begin() + word_off_size) +1))
363 return NULL;
364 stardict_stat_t idxstat;
365 if (g_stat(url.c_str(), &idxstat)!=0)
366 return NULL;
367 if (cachestat.st_mtime<idxstat.st_mtime)
368 return NULL;
369 //g_print("Using map file: %s\n", filename);
370 return mf.release();
371 }
372 mf.reset();
373 glib::CharStr basename(g_path_get_basename(saveurl.c_str()));
374 p = strrchr(get_impl(basename), '.');
375 if (!p)
376 return NULL;
377 *p='\0';
378 gchar *extendname = p+1;
379 glib::CharStr dirname(g_path_get_dirname(filename));
380 glib::CharStr nextfilename(get_next_filename(get_impl(dirname),
381 get_impl(basename), next, extendname));
382 return find_and_load_cache_file(get_impl(nextfilename), url, saveurl, filedatasize, next+1);
383 }
384
load_cache(const std::string & url,const std::string & saveurl,glong filedatasize)385 bool cache_file::load_cache(const std::string& url, const std::string& saveurl,
386 glong filedatasize)
387 {
388 g_assert(!wordoffset);
389 std::string oftfilename;
390 build_primary_cache_filename(saveurl, oftfilename);
391 /* First search the file in the dictionary directory, then in the cache
392 * directory. */
393 for (int i=0; i<2; i++) {
394 if (i==1) {
395 if (!build_primary_cache_filename_in_user_cache(saveurl, oftfilename, false))
396 break;
397 }
398 mf = find_and_load_cache_file(oftfilename.c_str(), url, saveurl, filedatasize, 2);
399 if (!mf)
400 continue;
401 wordoffset = reinterpret_cast<guint32 *>(mf->begin()) + 1;
402 npages = get_uint32(mf->begin());
403 return true;
404 }
405 return false;
406 }
407
build_primary_cache_filename_in_user_cache(const std::string & url,std::string & cachefilename,bool create) const408 bool cache_file::build_primary_cache_filename_in_user_cache(const std::string& url, std::string &cachefilename, bool create) const
409 {
410 const std::string cache_dir(app_dirs->get_user_cache_dir());
411 if (create) {
412 if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
413 if (-1 == g_mkdir_with_parents(cache_dir.c_str(), 0700))
414 return false;
415 }
416 }
417 if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR))
418 return false;
419
420 gchar *base=g_path_get_basename(url.c_str());
421 build_primary_cache_filename(build_path(cache_dir, base), cachefilename);
422 g_free(base);
423 return true;
424 }
425
find_and_open_for_overwrite_cache_file(const gchar * filename,const std::string & saveurl,int next,std::string & cfilename) const426 FILE* cache_file::find_and_open_for_overwrite_cache_file(const gchar *filename, const std::string &saveurl, int next, std::string &cfilename) const
427 {
428 cfilename = filename;
429 stardict_stat_t oftstat;
430 if (g_stat(filename, &oftstat)!=0) {
431 return fopen(filename, "wb");
432 }
433 MapFile mf;
434 if (!mf.open(filename, oftstat.st_size)) {
435 return fopen(filename, "wb");
436 }
437 guint32 word_off_size = (get_uint32(mf.begin()) + 1) * sizeof(guint32);
438 if (word_off_size >= static_cast<guint32>(oftstat.st_size) ||
439 *(mf.begin() + oftstat.st_size - 1) != '\0')
440 return fopen(filename, "wb");
441
442 gchar *p = mf.begin() + word_off_size;
443 bool has_prefix;
444 if (cachefiletype == CacheFileType_oft)
445 has_prefix = g_str_has_prefix(p, OFFSETFILE_MAGIC_DATA);
446 else
447 has_prefix = g_str_has_prefix(p, COLLATIONFILE_MAGIC_DATA);
448 if (!has_prefix) {
449 return fopen(filename, "wb");
450 }
451 if (cachefiletype == CacheFileType_oft)
452 p+= sizeof(OFFSETFILE_MAGIC_DATA)-1-1;
453 else
454 p+= sizeof(COLLATIONFILE_MAGIC_DATA)-1-1;
455 gchar *p2;
456 p2 = strstr(p, "\nurl=");
457 if (!p2) {
458 return fopen(filename, "wb");
459 }
460 p2+=sizeof("\nurl=")-1;
461 gchar *p3;
462 p3 = strchr(p2, '\n');
463 if (!p3) {
464 return fopen(filename, "wb");
465 }
466 std::string tmpstr(p2, p3-p2);
467 #ifdef _WIN32
468 tmpstr = abs_path_to_data_dir(tmpstr);
469 #endif
470 if (is_equal_paths(saveurl, tmpstr)) {
471 return fopen(filename, "wb");
472 }
473 mf.close();
474 glib::CharStr basename(g_path_get_basename(saveurl.c_str()));
475 p = strrchr(get_impl(basename), '.');
476 if (!p)
477 return NULL;
478 *p='\0';
479 gchar *extendname = p+1;
480 glib::CharStr dirname(g_path_get_dirname(filename));
481 glib::CharStr nextfilename(get_next_filename(get_impl(dirname),
482 get_impl(basename), next, extendname));
483 return find_and_open_for_overwrite_cache_file(get_impl(nextfilename), saveurl, next+1, cfilename);
484 }
485
save_cache(const std::string & saveurl) const486 bool cache_file::save_cache(const std::string& saveurl) const
487 {
488 std::string oftfilename;
489 build_primary_cache_filename(saveurl, oftfilename);
490 for (int i=0;i<2;i++) {
491 if (i==1) {
492 if (!build_primary_cache_filename_in_user_cache(saveurl, oftfilename, true))
493 break;
494 }
495 std::string cfilename;
496 FILE *out= find_and_open_for_overwrite_cache_file(oftfilename.c_str(), saveurl, 2, cfilename);
497 if (!out)
498 continue;
499 guint32 nentries = npages;
500 fwrite(&nentries, sizeof(nentries), 1, out);
501 fwrite(wordoffset, sizeof(guint32), npages, out);
502 if (cachefiletype == CacheFileType_oft)
503 fwrite(OFFSETFILE_MAGIC_DATA, 1, sizeof(OFFSETFILE_MAGIC_DATA)-1, out);
504 else
505 fwrite(COLLATIONFILE_MAGIC_DATA, 1, sizeof(COLLATIONFILE_MAGIC_DATA)-1, out);
506 fwrite("url=", 1, sizeof("url=")-1, out);
507 #ifdef _WIN32
508 const std::string url_rel(rel_path_to_data_dir(saveurl));
509 fwrite(url_rel.c_str(), 1, url_rel.length(), out);
510 #else
511 fwrite(saveurl.c_str(), 1, saveurl.length(), out);
512 #endif
513 if (cachefiletype == CacheFileType_clt) {
514 #ifdef _MSC_VER
515 fprintf_s(out, "\nfunc=%d", cltfunc);
516 #else
517 fprintf(out, "\nfunc=%d", cltfunc);
518 #endif
519 }
520 fwrite("\n", 1, 2, out);
521 fclose(out);
522 g_print("Save cache file: %s\n", cfilename.c_str());
523 return true;
524 }
525 return false;
526 }
527
allocate_wordoffset(size_t _npages)528 void cache_file::allocate_wordoffset(size_t _npages)
529 {
530 g_assert(!wordoffset);
531 if(mf) {
532 delete mf;
533 mf = NULL;
534 }
535 wordoffset = (guint32 *)g_malloc(_npages * sizeof(guint32));
536 npages = _npages;
537 }
538
get_next_filename(const gchar * dirname,const gchar * basename,int num,const gchar * extendname) const539 gchar *cache_file::get_next_filename(
540 const gchar *dirname, const gchar *basename, int num,
541 const gchar *extendname) const
542 {
543 if (cachefiletype == CacheFileType_oft)
544 return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.oft", dirname, basename, num, extendname);
545 else if (cachefiletype == CacheFileType_clt)
546 return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.clt", dirname, basename, num, extendname);
547 else
548 return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s(%d).%s.%d.clt", dirname, basename, num, extendname, cltfunc);
549 }
550
build_primary_cache_filename(const std::string & url,std::string & filename) const551 void cache_file::build_primary_cache_filename(const std::string &url,
552 std::string &filename) const
553 {
554 if (cachefiletype == CacheFileType_oft) {
555 filename=url+".oft";
556 } else if (cachefiletype == CacheFileType_clt) {
557 filename=url+".clt";
558 } else {
559 gchar *func = g_strdup_printf("%d", cltfunc);
560 filename=url+'.'+func+".clt";
561 g_free(func);
562 }
563 }
564
collation_file(idxsyn_file * _idx_file,CacheFileType _cachefiletype,CollateFunctions _CollateFunction)565 collation_file::collation_file(idxsyn_file *_idx_file, CacheFileType _cachefiletype,
566 CollateFunctions _CollateFunction)
567 : cache_file(_cachefiletype, _CollateFunction),
568 idx_file(_idx_file)
569 {
570 g_assert(_cachefiletype == CacheFileType_clt || _cachefiletype == CacheFileType_server_clt);
571
572 }
573
GetWord(glong idx)574 const gchar *collation_file::GetWord(glong idx)
575 {
576 return idx_file->get_key(get_wordoffset(idx));
577 }
578
GetOrigIndex(glong cltidx)579 glong collation_file::GetOrigIndex(glong cltidx)
580 {
581 return get_wordoffset(cltidx);
582 }
583
lookup(const char * sWord,glong & idx,glong & idx_suggest)584 bool collation_file::lookup(const char *sWord, glong &idx, glong &idx_suggest)
585 {
586 bool bFound=false;
587 glong iTo=idx_file->get_word_count()-1;
588 if (stardict_collate(sWord, GetWord(0), get_CollateFunction())<0) {
589 idx = 0;
590 idx_suggest = 0;
591 } else if (stardict_collate(sWord, GetWord(iTo), get_CollateFunction()) >0) {
592 idx = INVALID_INDEX;
593 idx_suggest = iTo;
594 } else {
595 glong iThisIndex=0;
596 glong iFrom=0;
597 gint cmpint;
598 while (iFrom<=iTo) {
599 iThisIndex=(iFrom+iTo)/2;
600 cmpint = stardict_collate(sWord, GetWord(iThisIndex), get_CollateFunction());
601 if (cmpint>0)
602 iFrom=iThisIndex+1;
603 else if (cmpint<0)
604 iTo=iThisIndex-1;
605 else {
606 bFound=true;
607 break;
608 }
609 }
610 if (!bFound) {
611 idx = iFrom; //next
612 idx_suggest = iFrom;
613 gint best, back;
614 best = prefix_match (sWord, GetWord(idx_suggest));
615 for (;;) {
616 if ((iTo=idx_suggest-1) < 0)
617 break;
618 back = prefix_match (sWord, GetWord(iTo));
619 if (!back || back < best)
620 break;
621 best = back;
622 idx_suggest = iTo;
623 }
624 } else {
625 idx = iThisIndex;
626 idx_suggest = iThisIndex;
627 }
628 }
629 return bFound;
630 }
631
632 struct sort_collation_index_user_data {
633 idxsyn_file *idx_file;
634 CollateFunctions cltfunc;
635 };
636
sort_collation_index(gconstpointer a,gconstpointer b,gpointer user_data)637 static gint sort_collation_index(gconstpointer a, gconstpointer b, gpointer user_data)
638 {
639 sort_collation_index_user_data *data = (sort_collation_index_user_data*)user_data;
640 gchar *str1 = g_strdup(data->idx_file->get_key(*((guint32 *)a)));
641 const gchar *str2 = data->idx_file->get_key(*((guint32 *)b));
642 gint x = stardict_collate(str1, str2, data->cltfunc);
643 g_free(str1);
644 if (x==0)
645 return *((guint32 *)a) - *((guint32 *)b);
646 else
647 return x;
648 }
649
idxsyn_file()650 idxsyn_file::idxsyn_file()
651 :
652 clt_file(NULL),
653 wordcount(0)
654 {
655 memset(clt_files, 0, sizeof(clt_files));
656 }
657
~idxsyn_file()658 idxsyn_file::~idxsyn_file()
659 {
660 delete clt_file;
661 for(size_t i=0; i<COLLATE_FUNC_NUMS; ++i)
662 delete clt_files[i];
663 }
664
getWord(glong idx,CollationLevelType CollationLevel,int servercollatefunc)665 const gchar *idxsyn_file::getWord(glong idx, CollationLevelType CollationLevel, int servercollatefunc)
666 {
667 if (CollationLevel == CollationLevel_NONE)
668 return get_key(idx);
669 if (CollationLevel == CollationLevel_SINGLE)
670 return clt_file->GetWord(idx);
671 if (servercollatefunc == 0)
672 return get_key(idx);
673 collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
674 return clt_files[servercollatefunc-1]->GetWord(idx);
675 }
676
Lookup(const char * str,glong & idx,glong & idx_suggest,CollationLevelType CollationLevel,int servercollatefunc)677 bool idxsyn_file::Lookup(const char *str, glong &idx, glong &idx_suggest, CollationLevelType CollationLevel, int servercollatefunc)
678 {
679 if (CollationLevel == CollationLevel_NONE)
680 return lookup(str, idx, idx_suggest);
681 if (CollationLevel == CollationLevel_SINGLE)
682 return clt_file->lookup(str, idx, idx_suggest);
683 if (servercollatefunc == 0)
684 return lookup(str, idx, idx_suggest);
685 collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
686 return clt_files[servercollatefunc-1]->lookup(str, idx, idx_suggest);
687 }
688
collate_save_info(const std::string & _url,const std::string & _saveurl)689 void idxsyn_file::collate_save_info(const std::string& _url, const std::string& _saveurl)
690 {
691 url = _url;
692 saveurl = _saveurl;
693 }
694
collate_load(CollateFunctions collf,CollationLevelType CollationLevel,show_progress_t * sp)695 void idxsyn_file::collate_load(CollateFunctions collf, CollationLevelType CollationLevel, show_progress_t *sp)
696 {
697 g_assert(CollationLevel == CollationLevel_SINGLE || CollationLevel == CollationLevel_MULTI);
698 if(CollationLevel == CollationLevel_SINGLE) {
699 if(clt_file)
700 return;
701 clt_file = collate_load_impl(url, saveurl, collf, sp, CacheFileType_clt);
702 } else if(CollationLevel == CollationLevel_MULTI) {
703 if (clt_files[collf])
704 return;
705 clt_files[collf] = collate_load_impl(url, saveurl, collf, sp, CacheFileType_server_clt);
706 }
707 }
708
collate_load_impl(const std::string & _url,const std::string & _saveurl,CollateFunctions collf,show_progress_t * sp,CacheFileType CacheType)709 collation_file * idxsyn_file::collate_load_impl(
710 const std::string& _url, const std::string& _saveurl,
711 CollateFunctions collf, show_progress_t *sp, CacheFileType CacheType)
712 {
713 collation_file * _clt_file = new collation_file(this, CacheType, collf);
714 if (!_clt_file->load_cache(_url, _saveurl, wordcount*sizeof(guint32))) {
715 if(sp)
716 sp->notify_about_start(_("Sorting, please wait..."));
717 _clt_file->allocate_wordoffset(wordcount);
718 for (glong i=0; i<wordcount; i++)
719 _clt_file->get_wordoffset(i) = i;
720 sort_collation_index_user_data data;
721 data.idx_file = this;
722 data.cltfunc = collf;
723 g_qsort_with_data(_clt_file->get_wordoffset(), wordcount, sizeof(guint32), sort_collation_index, &data);
724 if (!_clt_file->save_cache(_saveurl))
725 g_printerr("Cache update failed.\n");
726 }
727 return _clt_file;
728 }
729
load(const std::string & url,gulong wc,gulong fsize,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)730 bool offset_index::load(const std::string& url, gulong wc, gulong fsize,
731 bool CreateCacheFile, CollationLevelType CollationLevel,
732 CollateFunctions _CollateFunction, show_progress_t *sp)
733 {
734 wordcount=wc;
735 npages=(wc-1)/ENTR_PER_PAGE+2;
736 if (!oft_file.load_cache(url, url, npages*sizeof(guint32))) {
737 MapFile map_file;
738 if (!map_file.open(url.c_str(), fsize))
739 return false;
740 const gchar *idxdatabuffer=map_file.begin();
741 /* oft_file.wordoffset[i] holds offset of the i-th page in the index file */
742 oft_file.allocate_wordoffset(npages);
743 const gchar *p1 = idxdatabuffer;
744 gulong index_size;
745 guint32 j=0;
746 for (guint32 i=0; i<wc; i++) {
747 index_size=strlen(p1) +1 + 2*sizeof(guint32);
748 if (i % ENTR_PER_PAGE==0) {
749 oft_file.get_wordoffset(j)=p1-idxdatabuffer;
750 ++j;
751 }
752 p1 += index_size;
753 }
754 oft_file.get_wordoffset(j)=p1-idxdatabuffer;
755 map_file.close();
756 if (CreateCacheFile) {
757 if (!oft_file.save_cache(url))
758 g_printerr("Cache update failed.\n");
759 }
760 }
761
762 if (!(idxfile = fopen(url.c_str(), "rb"))) {
763 return false;
764 }
765
766 first.assign(0, read_first_on_page_key(0));
767 last.assign(npages-2, read_first_on_page_key(npages-2));
768 middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
769 real_last.assign(wc-1, get_key(wc-1));
770
771 if (CollationLevel == CollationLevel_NONE) {
772 } else if (CollationLevel == CollationLevel_SINGLE) {
773 collate_save_info(url, url);
774 collate_load(_CollateFunction, CollationLevel_SINGLE, sp);
775 } else if (CollationLevel == CollationLevel_MULTI) {
776 collate_save_info(url, url);
777 }
778
779 return true;
780 }
781
load_page(glong page_idx)782 inline gulong offset_index::load_page(glong page_idx)
783 {
784 gulong nentr=ENTR_PER_PAGE;
785 if (page_idx==glong(npages-2))
786 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
787 nentr=ENTR_PER_PAGE;
788
789
790 if (page_idx!=page.idx) {
791 page_data.resize(oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx));
792 fseek(idxfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
793 size_t fread_size;
794 size_t page_data_size = page_data.size();
795 fread_size = fread(&page_data[0], 1, page_data_size, idxfile);
796 if (fread_size != page_data_size) {
797 g_print("fread error!\n");
798 }
799 page.fill(&page_data[0], nentr, page_idx);
800 }
801
802 return nentr;
803 }
804
get_key(glong idx)805 const gchar *offset_index::get_key(glong idx)
806 {
807 load_page(idx/ENTR_PER_PAGE);
808 glong idx_in_page=idx%ENTR_PER_PAGE;
809 wordentry_offset=page.entries[idx_in_page].off;
810 wordentry_size=page.entries[idx_in_page].size;
811
812 return page.entries[idx_in_page].keystr;
813 }
814
get_data(glong idx)815 void offset_index::get_data(glong idx)
816 {
817 get_key(idx);
818 }
819
get_key_and_data(glong idx)820 const gchar *offset_index::get_key_and_data(glong idx)
821 {
822 return get_key(idx);
823 }
824
825 /* Search for string str.
826 * Returns true if the string is found and false otherwise.
827 * If the string is found, idx - index of the search string.
828 * If the string is not found, idx - index of the "next" item in the index.
829 * idx == INVALID_INDEX if the search word is greater then the last word of
830 * the index.
831 * idx_suggest - index of the closest word in the index.
832 * It's always a valid index. */
lookup(const char * str,glong & idx,glong & idx_suggest)833 bool offset_index::lookup(const char *str, glong &idx, glong &idx_suggest)
834 {
835 bool bFound=false;
836 glong iFrom;
837 glong iTo=npages-2;
838 gint cmpint;
839 glong iThisIndex;
840 if (stardict_strcmp(str, first.keystr.c_str())<0) {
841 idx = 0;
842 idx_suggest = 0;
843 return false;
844 } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
845 idx = INVALID_INDEX;
846 idx_suggest = wordcount-1;
847 return false;
848 } else {
849 // find the page number where the search word might be
850 iFrom=0;
851 iThisIndex=0;
852 while (iFrom<=iTo) {
853 iThisIndex=(iFrom+iTo)/2;
854 cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
855 if (cmpint>0)
856 iFrom=iThisIndex+1;
857 else if (cmpint<0)
858 iTo=iThisIndex-1;
859 else {
860 bFound=true;
861 break;
862 }
863 }
864 if (!bFound) {
865 idx = iTo; //prev
866 } else {
867 idx = iThisIndex;
868 }
869 }
870 if (!bFound) {
871 // the search word is on the page number idx if it's anywhere
872 gulong netr=load_page(idx);
873 iFrom=1; // Needn't search the first word anymore.
874 iTo=netr-1;
875 iThisIndex=0;
876 while (iFrom<=iTo) {
877 iThisIndex=(iFrom+iTo)/2;
878 cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
879 if (cmpint>0)
880 iFrom=iThisIndex+1;
881 else if (cmpint<0)
882 iTo=iThisIndex-1;
883 else {
884 bFound=true;
885 break;
886 }
887 }
888 idx*=ENTR_PER_PAGE;
889 if (!bFound) {
890 idx += iFrom; //next
891 idx_suggest = idx;
892 gint best, back;
893 best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
894 for (;;) {
895 if ((iTo=idx_suggest-1) < 0)
896 break;
897 if (idx_suggest % ENTR_PER_PAGE == 0)
898 load_page(iTo / ENTR_PER_PAGE);
899 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
900 if (!back || back < best)
901 break;
902 best = back;
903 idx_suggest = iTo;
904 }
905 } else {
906 idx += iThisIndex;
907 idx_suggest = idx;
908 }
909 } else {
910 idx*=ENTR_PER_PAGE;
911 idx_suggest = idx;
912 }
913 return bFound;
914 }
915
compressed_index()916 compressed_index::compressed_index()
917 {
918 idxdatabuf = NULL;
919 }
920
~compressed_index()921 compressed_index::~compressed_index()
922 {
923 g_free(idxdatabuf);
924 }
925
926 /* Parameters:
927 * url - index file path, has suffix ".idx.gz".
928 * wc - number of words in the index
929 * fsize - uncompressed index size
930 * */
load(const std::string & url,gulong wc,gulong fsize,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)931 bool compressed_index::load(const std::string& url, gulong wc, gulong fsize,
932 bool CreateCacheFile, CollationLevelType CollationLevel,
933 CollateFunctions _CollateFunction, show_progress_t *sp)
934 {
935 wordcount=wc;
936 gzFile in = gzopen(url.c_str(), "rb");
937 if (in == NULL)
938 return false;
939
940 idxdatabuf = (gchar *)g_malloc(fsize);
941
942 gulong len = gzread(in, idxdatabuf, fsize);
943 gzclose(in);
944 if (len < 0)
945 return false;
946
947 if (len != fsize)
948 return false;
949
950 wordlist.resize(wc+1);
951 gchar *p1 = idxdatabuf;
952 guint32 i;
953 for (i=0; i<wc; i++) {
954 wordlist[i] = p1;
955 p1 += strlen(p1) +1 + 2*sizeof(guint32);
956 }
957 /* pointer to the next to last word entry */
958 wordlist[wc] = p1;
959
960 if (CollationLevel == CollationLevel_NONE) {
961 } else {
962 std::string saveurl = url;
963 saveurl.erase(saveurl.length()-sizeof(".gz")+1, sizeof(".gz")-1);
964 if (CollationLevel == CollationLevel_SINGLE) {
965 collate_save_info(url, saveurl);
966 collate_load(_CollateFunction, CollationLevel_SINGLE, sp);
967 } else if (CollationLevel == CollationLevel_MULTI) {
968 collate_save_info(url, saveurl);
969 }
970 }
971 return true;
972 }
973
get_key(glong idx)974 const gchar *compressed_index::get_key(glong idx)
975 {
976 return wordlist[idx];
977 }
978
get_data(glong idx)979 void compressed_index::get_data(glong idx)
980 {
981 gchar *p1 = wordlist[idx]+strlen(wordlist[idx])+sizeof(gchar);
982 wordentry_offset = g_ntohl(get_uint32(p1));
983 p1 += sizeof(guint32);
984 wordentry_size = g_ntohl(get_uint32(p1));
985 }
986
get_key_and_data(glong idx)987 const gchar *compressed_index::get_key_and_data(glong idx)
988 {
989 get_data(idx);
990 return get_key(idx);
991 }
992
lookup(const char * str,glong & idx,glong & idx_suggest)993 bool compressed_index::lookup(const char *str, glong &idx, glong &idx_suggest)
994 {
995 bool bFound=false;
996 glong iTo=wordlist.size()-2;
997
998 if (stardict_strcmp(str, get_key(0))<0) {
999 idx = 0;
1000 idx_suggest = 0;
1001 } else if (stardict_strcmp(str, get_key(iTo)) >0) {
1002 idx = INVALID_INDEX;
1003 idx_suggest = iTo;
1004 } else {
1005 glong iThisIndex=0;
1006 glong iFrom=0;
1007 gint cmpint;
1008 while (iFrom<=iTo) {
1009 iThisIndex=(iFrom+iTo)/2;
1010 cmpint = stardict_strcmp(str, get_key(iThisIndex));
1011 if (cmpint>0)
1012 iFrom=iThisIndex+1;
1013 else if (cmpint<0)
1014 iTo=iThisIndex-1;
1015 else {
1016 bFound=true;
1017 break;
1018 }
1019 }
1020 if (!bFound) {
1021 idx = iFrom; //next
1022 idx_suggest = iFrom;
1023 gint best, back;
1024 best = prefix_match (str, get_key(idx_suggest));
1025 for (;;) {
1026 if ((iTo=idx_suggest-1) < 0)
1027 break;
1028 back = prefix_match (str, get_key(iTo));
1029 if (!back || back < best)
1030 break;
1031 best = back;
1032 idx_suggest = iTo;
1033 }
1034 } else {
1035 idx = iThisIndex;
1036 idx_suggest = iThisIndex;
1037 }
1038 }
1039 return bFound;
1040 }
1041
1042 //===================================================================
Create(const std::string & filebasename,const char * mainext,std::string & fullfilename)1043 index_file* index_file::Create(const std::string& filebasename,
1044 const char* mainext, std::string& fullfilename)
1045 {
1046 index_file *index = NULL;
1047
1048 fullfilename = filebasename + "." + mainext + ".gz";
1049 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1050 index = new compressed_index;
1051 } else {
1052 fullfilename = filebasename + "." + mainext;
1053 index = new offset_index;
1054 }
1055 return index;
1056 }
1057
1058 //===================================================================
fill(gchar * data,gint nent,glong idx_)1059 void synonym_file::page_t::fill(gchar *data, gint nent, glong idx_)
1060 {
1061 idx=idx_;
1062 gchar *p=data;
1063 glong len;
1064 for (gint i=0; i<nent; ++i) {
1065 entries[i].keystr=p;
1066 len=strlen(p);
1067 p+=len+1;
1068 entries[i].index=g_ntohl(get_uint32(p));
1069 p+=sizeof(guint32);
1070 }
1071 }
1072
synonym_file()1073 synonym_file::synonym_file() : oft_file(CacheFileType_oft, COLLATE_FUNC_NONE)
1074 {
1075 }
1076
~synonym_file()1077 synonym_file::~synonym_file()
1078 {
1079 if (synfile)
1080 fclose(synfile);
1081 }
1082
read_first_on_page_key(glong page_idx)1083 inline const gchar *synonym_file::read_first_on_page_key(glong page_idx)
1084 {
1085 fseek(synfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
1086 guint32 page_size=oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx);
1087 gulong minsize = sizeof(wordentry_buf);
1088 if (page_size < minsize) {
1089 minsize = page_size;
1090 }
1091 size_t fread_size;
1092 fread_size = fread(wordentry_buf, minsize, 1, synfile); //TODO: check returned values, deal with word entry that strlen>255.
1093 if (fread_size != 1) {
1094 g_print("fread error!\n");
1095 }
1096 return wordentry_buf;
1097 }
1098
get_first_on_page_key(glong page_idx)1099 inline const gchar *synonym_file::get_first_on_page_key(glong page_idx)
1100 {
1101 if (page_idx<middle.idx) {
1102 if (page_idx==first.idx)
1103 return first.keystr.c_str();
1104 return read_first_on_page_key(page_idx);
1105 } else if (page_idx>middle.idx) {
1106 if (page_idx==last.idx)
1107 return last.keystr.c_str();
1108 return read_first_on_page_key(page_idx);
1109 } else
1110 return middle.keystr.c_str();
1111 }
1112
load(const std::string & url,gulong wc,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions _CollateFunction,show_progress_t * sp)1113 bool synonym_file::load(const std::string& url, gulong wc, bool CreateCacheFile,
1114 CollationLevelType CollationLevel, CollateFunctions _CollateFunction,
1115 show_progress_t *sp)
1116 {
1117 wordcount=wc;
1118 npages=(wc-1)/ENTR_PER_PAGE+2;
1119 if (!oft_file.load_cache(url, url, npages*sizeof(guint32))) {
1120 stardict_stat_t stats;
1121 if (g_stat(url.c_str(), &stats) == -1)
1122 return false;
1123 MapFile map_file;
1124 if (!map_file.open(url.c_str(), stats.st_size))
1125 return false;
1126 const gchar *syndatabuffer=map_file.begin();
1127 oft_file.allocate_wordoffset(npages);
1128 const gchar *p1 = syndatabuffer;
1129 gulong index_size;
1130 guint32 j=0;
1131 for (guint32 i=0; i<wc; i++) {
1132 index_size=strlen(p1) +1 + sizeof(guint32);
1133 if (i % ENTR_PER_PAGE==0) {
1134 oft_file.get_wordoffset(j)=p1-syndatabuffer;
1135 ++j;
1136 }
1137 p1 += index_size;
1138 }
1139 oft_file.get_wordoffset(j)=p1-syndatabuffer;
1140 map_file.close();
1141 if (CreateCacheFile) {
1142 if (!oft_file.save_cache(url))
1143 g_printerr("Cache update failed.\n");
1144 }
1145 }
1146
1147 if (!(synfile = fopen(url.c_str(), "rb"))) {
1148 return false;
1149 }
1150
1151 first.assign(0, read_first_on_page_key(0));
1152 last.assign(npages-2, read_first_on_page_key(npages-2));
1153 middle.assign((npages-2)/2, read_first_on_page_key((npages-2)/2));
1154 real_last.assign(wc-1, get_key(wc-1));
1155
1156 if (CollationLevel == CollationLevel_NONE) {
1157 } else if (CollationLevel == CollationLevel_SINGLE) {
1158 collate_save_info(url, url);
1159 collate_load(_CollateFunction,CollationLevel_SINGLE, sp);
1160 } else if (CollationLevel == CollationLevel_MULTI) {
1161 collate_save_info(url, url);
1162 }
1163
1164 return true;
1165 }
1166
load_page(glong page_idx)1167 inline gulong synonym_file::load_page(glong page_idx)
1168 {
1169 gulong nentr=ENTR_PER_PAGE;
1170 if (page_idx==glong(npages-2))
1171 if ((nentr=wordcount%ENTR_PER_PAGE)==0)
1172 nentr=ENTR_PER_PAGE;
1173
1174
1175 if (page_idx!=page.idx) {
1176 page_data.resize(oft_file.get_wordoffset(page_idx+1)-oft_file.get_wordoffset(page_idx));
1177 fseek(synfile, oft_file.get_wordoffset(page_idx), SEEK_SET);
1178 size_t fread_size;
1179 size_t page_data_size = page_data.size();
1180 fread_size = fread(&page_data[0], 1, page_data_size, synfile);
1181 if (fread_size != page_data_size) {
1182 g_print("fread error!\n");
1183 }
1184 page.fill(&page_data[0], nentr, page_idx);
1185 }
1186
1187 return nentr;
1188 }
1189
get_key(glong idx)1190 const gchar *synonym_file::get_key(glong idx)
1191 {
1192 load_page(idx/ENTR_PER_PAGE);
1193 glong idx_in_page=idx%ENTR_PER_PAGE;
1194 wordentry_index=page.entries[idx_in_page].index;
1195
1196 return page.entries[idx_in_page].keystr;
1197 }
1198
lookup(const char * str,glong & idx,glong & idx_suggest)1199 bool synonym_file::lookup(const char *str, glong &idx, glong &idx_suggest)
1200 {
1201 bool bFound=false;
1202 glong iFrom;
1203 glong iTo=npages-2;
1204 gint cmpint;
1205 glong iThisIndex;
1206 if (stardict_strcmp(str, first.keystr.c_str())<0) {
1207 idx = 0;
1208 idx_suggest = 0;
1209 return false;
1210 } else if (stardict_strcmp(str, real_last.keystr.c_str()) >0) {
1211 idx = INVALID_INDEX;
1212 idx_suggest = wordcount-1;
1213 return false;
1214 } else {
1215 iFrom=0;
1216 iThisIndex=0;
1217 while (iFrom<=iTo) {
1218 iThisIndex=(iFrom+iTo)/2;
1219 cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
1220 if (cmpint>0)
1221 iFrom=iThisIndex+1;
1222 else if (cmpint<0)
1223 iTo=iThisIndex-1;
1224 else {
1225 bFound=true;
1226 break;
1227 }
1228 }
1229 if (!bFound)
1230 idx = iTo; //prev
1231 else
1232 idx = iThisIndex;
1233 }
1234 if (!bFound) {
1235 gulong netr=load_page(idx);
1236 iFrom=1; // Needn't search the first word anymore.
1237 iTo=netr-1;
1238 iThisIndex=0;
1239 while (iFrom<=iTo) {
1240 iThisIndex=(iFrom+iTo)/2;
1241 cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
1242 if (cmpint>0)
1243 iFrom=iThisIndex+1;
1244 else if (cmpint<0)
1245 iTo=iThisIndex-1;
1246 else {
1247 bFound=true;
1248 break;
1249 }
1250 }
1251 idx*=ENTR_PER_PAGE;
1252 if (!bFound) {
1253 idx += iFrom; //next
1254 idx_suggest = idx;
1255 gint best, back;
1256 best = prefix_match (str, page.entries[idx_suggest % ENTR_PER_PAGE].keystr);
1257 for (;;) {
1258 if ((iTo=idx_suggest-1) < 0)
1259 break;
1260 if (idx_suggest % ENTR_PER_PAGE == 0)
1261 load_page(iTo / ENTR_PER_PAGE);
1262 back = prefix_match (str, page.entries[iTo % ENTR_PER_PAGE].keystr);
1263 if (!back || back < best)
1264 break;
1265 best = back;
1266 idx_suggest = iTo;
1267 }
1268 } else {
1269 idx += iThisIndex;
1270 idx_suggest = idx;
1271 }
1272 } else {
1273 idx*=ENTR_PER_PAGE;
1274 idx_suggest = idx;
1275 }
1276 return bFound;
1277 }
1278
1279 //===================================================================
Dict()1280 Dict::Dict()
1281 {
1282 storage = NULL;
1283 }
1284
~Dict()1285 Dict::~Dict()
1286 {
1287 delete storage;
1288 }
1289
load(const std::string & ifofilename,bool CreateCacheFile,CollationLevelType CollationLevel,CollateFunctions CollateFunction,show_progress_t * sp)1290 bool Dict::load(const std::string& ifofilename, bool CreateCacheFile,
1291 CollationLevelType CollationLevel, CollateFunctions CollateFunction,
1292 show_progress_t *sp)
1293 {
1294 gulong idxfilesize;
1295 glong wordcount, synwordcount;
1296 if (!load_ifofile(ifofilename, idxfilesize, wordcount, synwordcount))
1297 return false;
1298 sp->notify_about_start(_("Loading..."));
1299
1300 // ifofilename without extension - base file name
1301 std::string filebasename
1302 = ifofilename.substr(0, ifofilename.length()-sizeof(".ifo")+1);
1303 if(!DictBase::load(filebasename, "dict"))
1304 return false;
1305
1306 std::string fullfilename;
1307 idx_file.reset(index_file::Create(filebasename, "idx", fullfilename));
1308 if (!idx_file->load(fullfilename, wordcount, idxfilesize,
1309 CreateCacheFile, CollationLevel,
1310 CollateFunction, sp))
1311 return false;
1312
1313 if (synwordcount) {
1314 fullfilename = filebasename + ".syn";
1315 if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
1316 syn_file.reset(new synonym_file);
1317 if (!syn_file->load(fullfilename, synwordcount,
1318 CreateCacheFile, CollationLevel,
1319 CollateFunction, sp))
1320 return false;
1321 }
1322 }
1323
1324 gchar *dirname = g_path_get_dirname(ifofilename.c_str());
1325 storage = ResourceStorage::create(dirname, CreateCacheFile, sp);
1326 g_free(dirname);
1327
1328 g_print("bookname: %s, wordcount %lu\n", bookname.c_str(), wordcount);
1329 return true;
1330 }
1331
load_ifofile(const std::string & ifofilename,gulong & idxfilesize,glong & wordcount,glong & synwordcount)1332 bool Dict::load_ifofile(const std::string& ifofilename, gulong &idxfilesize, glong &wordcount, glong &synwordcount)
1333 {
1334 DictInfo dict_info;
1335 if (!dict_info.load_from_ifo_file(ifofilename, DictInfoType_NormDict))
1336 return false;
1337
1338 ifo_file_name=dict_info.ifo_file_name;
1339 bookname=dict_info.get_bookname();
1340
1341 idxfilesize=dict_info.get_index_file_size();
1342 wordcount=dict_info.get_wordcount();
1343 synwordcount=dict_info.get_synwordcount();
1344
1345 sametypesequence=dict_info.get_sametypesequence();
1346 dicttype=dict_info.get_dicttype();
1347
1348 return true;
1349 }
1350
nsynarticles() const1351 glong Dict::nsynarticles() const
1352 {
1353 if (syn_file.get() == NULL)
1354 return 0;
1355 return syn_file->get_word_count();
1356 }
1357
GetWordPrev(glong idx,glong & pidx,bool isidx,CollationLevelType CollationLevel,int servercollatefunc)1358 bool Dict::GetWordPrev(glong idx, glong &pidx, bool isidx, CollationLevelType CollationLevel, int servercollatefunc)
1359 {
1360 idxsyn_file *is_file;
1361 if (isidx)
1362 is_file = idx_file.get();
1363 else
1364 is_file = syn_file.get();
1365 if (idx==INVALID_INDEX) {
1366 pidx = is_file->get_word_count()-1;
1367 return true;
1368 }
1369 pidx = idx;
1370 gchar *cWord = g_strdup(is_file->getWord(pidx, CollationLevel, servercollatefunc));
1371 const gchar *pWord;
1372 bool found=false;
1373 while (pidx>0) {
1374 pWord = is_file->getWord(pidx-1, CollationLevel, servercollatefunc);
1375 if (strcmp(pWord, cWord)!=0) {
1376 found=true;
1377 break;
1378 }
1379 pidx--;
1380 }
1381 g_free(cWord);
1382 if (found) {
1383 pidx--;
1384 return true;
1385 } else {
1386 return false;
1387 }
1388 }
1389
GetWordNext(glong & idx,bool isidx,CollationLevelType CollationLevel,int servercollatefunc)1390 void Dict::GetWordNext(glong &idx, bool isidx, CollationLevelType CollationLevel, int servercollatefunc)
1391 {
1392 idxsyn_file *is_file;
1393 if (isidx)
1394 is_file = idx_file.get();
1395 else
1396 is_file = syn_file.get();
1397 gchar *cWord = g_strdup(is_file->getWord(idx, CollationLevel, servercollatefunc));
1398 const gchar *pWord;
1399 bool found=false;
1400 while (idx < is_file->get_word_count()-1) {
1401 pWord = is_file->getWord(idx+1, CollationLevel, servercollatefunc);
1402 if (strcmp(pWord, cWord)!=0) {
1403 found=true;
1404 break;
1405 }
1406 idx++;
1407 }
1408 g_free(cWord);
1409 if (found)
1410 idx++;
1411 else
1412 idx=INVALID_INDEX;
1413 }
1414
GetOrigWordCount(glong & idx,bool isidx)1415 gint Dict::GetOrigWordCount(glong& idx, bool isidx)
1416 {
1417 idxsyn_file *is_file;
1418 if (isidx)
1419 is_file = idx_file.get();
1420 else
1421 is_file = syn_file.get();
1422 gchar *cWord = g_strdup(is_file->get_key(idx));
1423 const gchar *pWord;
1424 gint count = 1;
1425 glong idx1 = idx;
1426 while (idx1>0) {
1427 pWord = is_file->get_key(idx1-1);
1428 if (strcmp(pWord, cWord)!=0)
1429 break;
1430 count++;
1431 idx1--;
1432 }
1433 glong idx2=idx;
1434 while (idx2<is_file->get_word_count()-1) {
1435 pWord = is_file->get_key(idx2+1);
1436 if (strcmp(pWord, cWord)!=0)
1437 break;
1438 count++;
1439 idx2++;
1440 }
1441 idx=idx1;
1442 g_free(cWord);
1443 return count;
1444 }
1445
LookupSynonym(const char * str,glong & synidx,glong & synidx_suggest,CollationLevelType CollationLevel,int servercollatefunc)1446 bool Dict::LookupSynonym(const char *str, glong &synidx, glong &synidx_suggest, CollationLevelType CollationLevel, int servercollatefunc)
1447 {
1448 if (syn_file.get() == NULL) {
1449 synidx = UNSET_INDEX;
1450 synidx_suggest = UNSET_INDEX;
1451 return false;
1452 }
1453 return syn_file->Lookup(str, synidx, synidx_suggest, CollationLevel, servercollatefunc);
1454 }
1455
LookupWithRule(GPatternSpec * pspec,glong * aIndex,int iBuffLen)1456 bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1457 {
1458 int iIndexCount=0;
1459 for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1460 // Need to deal with same word in index? But this will slow down processing in most case.
1461 if (g_pattern_match_string(pspec, idx_file->getWord(i, CollationLevel_NONE, 0)))
1462 aIndex[iIndexCount++]=i;
1463 aIndex[iIndexCount]= -1; // -1 is the end.
1464 return (iIndexCount>0);
1465 }
1466
LookupWithRuleSynonym(GPatternSpec * pspec,glong * aIndex,int iBuffLen)1467 bool Dict::LookupWithRuleSynonym(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
1468 {
1469 if (syn_file.get() == NULL)
1470 return false;
1471 int iIndexCount=0;
1472 for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1473 // Need to deal with same word in index? But this will slow down processing in most case.
1474 if (g_pattern_match_string(pspec, syn_file->getWord(i, CollationLevel_NONE, 0)))
1475 aIndex[iIndexCount++]=i;
1476 aIndex[iIndexCount]= -1; // -1 is the end.
1477 return (iIndexCount>0);
1478 }
1479
LookupWithRegex(GRegex * regex,glong * aIndex,int iBuffLen)1480 bool Dict::LookupWithRegex(GRegex *regex, glong *aIndex, int iBuffLen)
1481 {
1482 int iIndexCount=0;
1483 for (glong i=0; i<narticles() && iIndexCount<iBuffLen-1; i++)
1484 // Need to deal with same word in index? But this will slow down processing in most case.
1485 if (g_regex_match(regex, idx_file->getWord(i, CollationLevel_NONE, 0), (GRegexMatchFlags)0, NULL))
1486 aIndex[iIndexCount++]=i;
1487 aIndex[iIndexCount]= -1; // -1 is the end.
1488 return (iIndexCount>0);
1489 }
1490
LookupWithRegexSynonym(GRegex * regex,glong * aIndex,int iBuffLen)1491 bool Dict::LookupWithRegexSynonym(GRegex *regex, glong *aIndex, int iBuffLen)
1492 {
1493 if (syn_file.get() == NULL)
1494 return false;
1495 int iIndexCount=0;
1496 for (glong i=0; i<nsynarticles() && iIndexCount<iBuffLen-1; i++)
1497 // Need to deal with same word in index? But this will slow down processing in most case.
1498 if (g_regex_match(regex, syn_file->getWord(i, CollationLevel_NONE, 0), (GRegexMatchFlags)0, NULL))
1499 aIndex[iIndexCount++]=i;
1500 aIndex[iIndexCount]= -1; // -1 is the end.
1501 return (iIndexCount>0);
1502 }
1503
1504 //===================================================================
1505 show_progress_t Libs::default_show_progress;
1506
Libs(show_progress_t * sp,bool create_cache_files,CollationLevelType level,CollateFunctions func)1507 Libs::Libs(show_progress_t *sp, bool create_cache_files, CollationLevelType level, CollateFunctions func)
1508 :
1509 iMaxFuzzyDistance(MAX_FUZZY_DISTANCE),
1510 show_progress(NULL),
1511 CreateCacheFile(create_cache_files)
1512 {
1513 #ifdef SD_SERVER_CODE
1514 root_info_item = NULL;
1515 #endif
1516 ValidateCollateParams(level, func);
1517 CollationLevel = level;
1518 CollateFunction = func;
1519 set_show_progress(sp);
1520 init_collations();
1521 }
1522
~Libs()1523 Libs::~Libs()
1524 {
1525 #ifdef SD_SERVER_CODE
1526 if (root_info_item)
1527 delete root_info_item;
1528 #endif
1529 for (std::vector<Dict *>::iterator p=oLib.begin(); p!=oLib.end(); ++p)
1530 delete *p;
1531 free_collations();
1532 }
1533
load_dict(const std::string & url,show_progress_t * sp)1534 bool Libs::load_dict(const std::string& url, show_progress_t *sp)
1535 {
1536 Dict *lib=new Dict;
1537 if (lib->load(url, CreateCacheFile, CollationLevel, CollateFunction, sp)) {
1538 oLib.push_back(lib);
1539 return true;
1540 } else {
1541 delete lib;
1542 return false;
1543 }
1544 }
1545
1546 #ifdef SD_SERVER_CODE
LoadFromXML()1547 void Libs::LoadFromXML()
1548 {
1549 root_info_item = new DictInfoItem();
1550 root_info_item->isdir = 1;
1551 root_info_item->dir = new DictInfoDirItem();
1552 root_info_item->dir->name='/';
1553 LoadXMLDir("/usr/share/stardict/dic", root_info_item);
1554 GenLinkDict(root_info_item);
1555 }
1556
GenLinkDict(DictInfoItem * info_item)1557 void Libs::GenLinkDict(DictInfoItem *info_item)
1558 {
1559 std::list<std::list<DictInfoItem *>::iterator> eraselist;
1560 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1561 if ((*i)->isdir == 1) {
1562 GenLinkDict(*i);
1563 } else if ((*i)->isdir == 2) {
1564 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1565 uid_iter = uidmap.find(*((*i)->linkuid));
1566 if (uid_iter!=uidmap.end()) {
1567 delete (*i)->linkuid;
1568 (*i)->dict = uid_iter->second;
1569 } else {
1570 g_print("Error, linkdict uid not found! %s\n", (*i)->linkuid->c_str());
1571 delete (*i)->linkuid;
1572 eraselist.push_back(i);
1573 }
1574 }
1575 }
1576 for (std::list<std::list<DictInfoItem *>::iterator>::iterator i = eraselist.begin(); i!= eraselist.end(); ++i) {
1577 info_item->dir->info_item_list.erase(*i);
1578 }
1579 }
1580
func_parse_start_element(GMarkupParseContext * context,const gchar * element_name,const gchar ** attribute_names,const gchar ** attribute_values,gpointer user_data,GError ** error)1581 void Libs::func_parse_start_element(GMarkupParseContext *context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values, gpointer user_data, GError **error)
1582 {
1583 if (strcmp(element_name, "dict")==0) {
1584 ParseUserData *Data = (ParseUserData *)user_data;
1585 Data->indict = true;
1586 Data->path.clear();
1587 Data->uid.clear();
1588 Data->level.clear();
1589 Data->download.clear();
1590 Data->from.clear();
1591 Data->to.clear();
1592 } else if (strcmp(element_name, "linkdict")==0) {
1593 ParseUserData *Data = (ParseUserData *)user_data;
1594 Data->inlinkdict = true;
1595 Data->linkuid.clear();
1596 }
1597 }
1598
func_parse_end_element(GMarkupParseContext * context,const gchar * element_name,gpointer user_data,GError ** error)1599 void Libs::func_parse_end_element(GMarkupParseContext *context, const gchar *element_name, gpointer user_data, GError **error)
1600 {
1601 if (strcmp(element_name, "dict")==0) {
1602 ParseUserData *Data = (ParseUserData *)user_data;
1603 Data->indict = false;
1604 if (!Data->path.empty() && !Data->uid.empty()) {
1605 std::string url;
1606 url = Data->dir;
1607 url += G_DIR_SEPARATOR;
1608 url += Data->path;
1609 if (Data->oLibs->load_dict(url, Data->oLibs->show_progress)) {
1610 DictInfoItem *sub_info_item = new DictInfoItem();
1611 sub_info_item->isdir = 0;
1612 sub_info_item->dict = new DictInfoDictItem();
1613 sub_info_item->dict->uid = Data->uid;
1614 sub_info_item->dict->download = Data->download;
1615 sub_info_item->dict->from = Data->from;
1616 sub_info_item->dict->to = Data->to;
1617 if (Data->level.empty())
1618 sub_info_item->dict->level = 0;
1619 else
1620 sub_info_item->dict->level = atoi(Data->level.c_str());
1621 sub_info_item->dict->id = Data->oLibs->oLib.size()-1;
1622 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1623 Data->oLibs->uidmap[Data->uid] = sub_info_item->dict;
1624 }
1625 }
1626 } else if (strcmp(element_name, "linkdict")==0) {
1627 ParseUserData *Data = (ParseUserData *)user_data;
1628 Data->inlinkdict = false;
1629 if (!Data->linkuid.empty()) {
1630 DictInfoItem *sub_info_item = new DictInfoItem();
1631 sub_info_item->isdir = 2;
1632 sub_info_item->linkuid = new std::string(Data->linkuid);
1633 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1634 }
1635 }
1636 }
1637
func_parse_text(GMarkupParseContext * context,const gchar * text,gsize text_len,gpointer user_data,GError ** error)1638 void Libs::func_parse_text(GMarkupParseContext *context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
1639 {
1640 const gchar *element = g_markup_parse_context_get_element(context);
1641 if (!element)
1642 return;
1643 ParseUserData *Data = (ParseUserData *)user_data;
1644 if (strcmp(element, "subdir")==0) {
1645 std::string subdir;
1646 subdir = Data->dir;
1647 subdir += G_DIR_SEPARATOR;
1648 subdir.append(text, text_len);
1649 DictInfoItem *sub_info_item = new DictInfoItem();
1650 sub_info_item->isdir = 1;
1651 sub_info_item->dir = new DictInfoDirItem();
1652 sub_info_item->dir->name.assign(text, text_len);
1653 Data->oLibs->LoadXMLDir(subdir.c_str(), sub_info_item);
1654 Data->info_item->dir->info_item_list.push_back(sub_info_item);
1655 } else if (strcmp(element, "dirname")==0) {
1656 Data->info_item->dir->dirname.assign(text, text_len);
1657 } else if (strcmp(element, "path")==0) {
1658 Data->path.assign(text, text_len);
1659 } else if (strcmp(element, "uid")==0) {
1660 if (Data->indict) {
1661 std::string uid(text, text_len);
1662 if (uid.find_first_of(' ')!=std::string::npos) {
1663 g_print("Error: uid contains space! %s: %s\n", Data->dir, uid.c_str());
1664 } else {
1665 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1666 uid_iter = Data->oLibs->uidmap.find(uid);
1667 if (uid_iter!=Data->oLibs->uidmap.end()) {
1668 g_print("Error: uid duplicated! %s: %s\n", Data->dir, uid.c_str());
1669 } else {
1670 Data->uid = uid;
1671 }
1672 }
1673 } else if (Data->inlinkdict) {
1674 Data->linkuid.assign(text, text_len);
1675 }
1676 } else if (strcmp(element, "level")==0) {
1677 Data->level.assign(text, text_len);
1678 } else if (strcmp(element, "download")==0) {
1679 Data->download.assign(text, text_len);
1680 } else if (strcmp(element, "from")==0) {
1681 Data->from.assign(text, text_len);
1682 } else if (strcmp(element, "to")==0) {
1683 Data->to.assign(text, text_len);
1684 }
1685 }
1686
LoadXMLDir(const char * dir,DictInfoItem * info_item)1687 void Libs::LoadXMLDir(const char *dir, DictInfoItem *info_item)
1688 {
1689 std::string filename;
1690 filename = build_path(dir, "stardictd.xml");
1691 stardict_stat_t filestat;
1692 if (g_stat(filename.c_str(), &filestat)!=0)
1693 return;
1694 MapFile mf;
1695 if (!mf.open(filename.c_str(), filestat.st_size))
1696 return;
1697 ParseUserData Data;
1698 Data.oLibs = this;
1699 Data.dir = dir;
1700 Data.info_item = info_item;
1701 Data.indict = false;
1702 Data.inlinkdict = false;
1703 GMarkupParser parser;
1704 parser.start_element = func_parse_start_element;
1705 parser.end_element = func_parse_end_element;
1706 parser.text = func_parse_text;
1707 parser.passthrough = NULL;
1708 parser.error = NULL;
1709 GMarkupParseContext* context = g_markup_parse_context_new(&parser, (GMarkupParseFlags)0, &Data, NULL);
1710 g_markup_parse_context_parse(context, mf.begin(), filestat.st_size, NULL);
1711 g_markup_parse_context_end_parse(context, NULL);
1712 g_markup_parse_context_free(context);
1713 mf.close();
1714 info_item->dir->dictcount = 0;
1715 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1716 if ((*i)->isdir == 1) {
1717 info_item->dir->dictcount += (*i)->dir->dictcount;
1718 } else if ((*i)->isdir == 0) {
1719 info_item->dir->dictcount++;
1720 }
1721 }
1722 }
1723
get_fromto_info()1724 const std::string &Libs::get_fromto_info() {
1725 if(cache_fromto.empty()){
1726 std::map<std::string, std::list<FromTo> > map_fromto;
1727 gen_fromto_info(root_info_item, map_fromto);
1728 cache_fromto+="<lang>";
1729 for (std::map<std::string, std::list<FromTo> >::iterator map_it = map_fromto.begin(); map_it != map_fromto.end(); ++map_it){
1730 cache_fromto+="<from lang=\"";
1731 cache_fromto+=map_it->first;
1732 cache_fromto+="\">";
1733 std::list<FromTo> &fromTo = map_it->second;
1734 for (std::list<FromTo>::iterator i = fromTo.begin() ; i!= fromTo.end(); ++i){
1735 cache_fromto+="<to lang=\"";
1736 cache_fromto+= i->to;
1737 cache_fromto+="\">";
1738 std::list<FromToInfo> &fromtoinfo = i->fromto_info;
1739 for (std::list<FromToInfo>::iterator j = fromtoinfo.begin() ; j!= fromtoinfo.end(); ++j){
1740 cache_fromto+="<dict><uid>";
1741 cache_fromto+=j->uid;
1742 cache_fromto+="</uid><bookname>";
1743 cache_fromto+= j->bookname;
1744 cache_fromto+="</bookname></dict>";
1745 }
1746 cache_fromto+="</to>";
1747 }
1748 cache_fromto+="</from>";
1749 }
1750 cache_fromto+="</lang>";
1751 }
1752 return cache_fromto;
1753 }
1754
gen_fromto_info(struct DictInfoItem * info_item,std::map<std::string,std::list<FromTo>> & map_fromto)1755 void Libs::gen_fromto_info(struct DictInfoItem *info_item, std::map<std::string, std::list<FromTo> > &map_fromto) {
1756 gchar *etext;
1757 for(std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin() ; i!= info_item->dir->info_item_list.end(); ++i){
1758 if ((*i)->isdir == 1) {
1759 gen_fromto_info((*i), map_fromto);
1760 } else {
1761 std::string from_str = (*i)->dict->from;
1762 std::string to_str = (*i)->dict->to;
1763 if(from_str.empty() || to_str.empty()){
1764 continue;
1765 }
1766 std::string uid_str = (*i)->dict->uid;
1767 etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1768 std::string bookname_str = etext;
1769 g_free(etext);
1770 std::map<std::string, std::list<FromTo> >::iterator fromto1 = map_fromto.find(from_str);
1771 if (fromto1==map_fromto.end()) {
1772 //if an from_str element not already in map, add new from_str to map
1773 FromToInfo fromtoinfo;
1774 fromtoinfo.uid = uid_str;
1775 fromtoinfo.bookname = bookname_str;
1776 std::list<FromToInfo> list_fromtoinfo ;
1777 list_fromtoinfo.push_back(fromtoinfo);
1778 FromTo new_fromTo;
1779 new_fromTo.to = to_str;
1780 new_fromTo.fromto_info = list_fromtoinfo;
1781 std::list<FromTo> list_fromTo;
1782 list_fromTo.push_back(new_fromTo);
1783 map_fromto[from_str] = list_fromTo;
1784 } else {
1785 // else if from_str already in map, so comparison to_str and from_to1 , then choose insert.
1786 std::list<FromTo> &fromTo_list = fromto1->second;
1787 std::string from_name1 = fromto1->first;
1788 bool found = false;
1789 for (std::list<FromTo>::iterator new_fromTo = fromTo_list.begin(); new_fromTo != fromTo_list.end(); ++new_fromTo) {
1790 if(to_str == new_fromTo->to) {
1791 std::list<FromToInfo> &fromtoinfo1 = new_fromTo->fromto_info;
1792 FromToInfo fromtoinfo;
1793 fromtoinfo.uid = uid_str;
1794 fromtoinfo.bookname = bookname_str;
1795 fromtoinfo1.push_back(fromtoinfo);
1796 found = true;
1797 break;
1798 }
1799 }
1800 if(!found){
1801 FromToInfo fromtoinfo;
1802 fromtoinfo.uid = uid_str;
1803 fromtoinfo.bookname = bookname_str;
1804 std::list<FromToInfo> fromtoinfo1;
1805 fromtoinfo1.push_back(fromtoinfo);
1806 FromTo fromTo;
1807 fromTo.to = to_str;
1808 fromTo.fromto_info = fromtoinfo1;
1809 fromTo_list.push_back(fromTo);
1810 }
1811 }
1812 }
1813 }
1814 }
1815
get_dir_info(const char * path)1816 const std::string *Libs::get_dir_info(const char *path)
1817 {
1818 if (path[0]!='/')
1819 return NULL;
1820 DictInfoItem *info_item = root_info_item;
1821 std::string item;
1822 const char *p = path+1;
1823 const char *p1;
1824 bool found;
1825 do {
1826 p1 = strchr(p, '/');
1827 if (p1) {
1828 item.assign(p, p1-p);
1829 if (!item.empty()) {
1830 found = false;
1831 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1832 if ((*i)->isdir == 1) {
1833 if ((*i)->dir->name == item) {
1834 info_item = (*i);
1835 found = true;
1836 break;
1837 }
1838 }
1839 }
1840 if (!found)
1841 return NULL;
1842 }
1843 p = p1+1;
1844 }
1845 } while (p1);
1846 if (*p)
1847 return NULL; // Not end by '/'.
1848 DictInfoDirItem *dir = info_item->dir;
1849 if (dir->info_string.empty()) {
1850 dir->info_string += "<parent>";
1851 dir->info_string += path;
1852 dir->info_string += "</parent>";
1853 gchar *etext;
1854 for (std::list<DictInfoItem *>::iterator i = info_item->dir->info_item_list.begin(); i!= info_item->dir->info_item_list.end(); ++i) {
1855 if ((*i)->isdir == 1) {
1856 dir->info_string += "<dir><name>";
1857 dir->info_string += (*i)->dir->name;
1858 dir->info_string += "</name><dirname>";
1859 dir->info_string += (*i)->dir->dirname;
1860 dir->info_string += "</dirname><dictcount>";
1861 gchar *dictcount = g_strdup_printf("%u", (*i)->dir->dictcount);
1862 dir->info_string += dictcount;
1863 g_free(dictcount);
1864 dir->info_string += "</dictcount></dir>";
1865 } else {
1866 dir->info_string += "<dict>";
1867 if ((*i)->isdir == 2)
1868 dir->info_string += "<islink>1</islink>";
1869 if ((*i)->dict->level != 0) {
1870 dir->info_string += "<level>";
1871 gchar *level = g_strdup_printf("%u", (*i)->dict->level);
1872 dir->info_string += level;
1873 g_free(level);
1874 dir->info_string += "</level>";
1875 }
1876 dir->info_string += "<uid>";
1877 dir->info_string += (*i)->dict->uid;
1878 dir->info_string += "</uid><bookname>";
1879 etext = g_markup_escape_text(oLib[(*i)->dict->id]->dict_name().c_str(), -1);
1880 dir->info_string += etext;
1881 g_free(etext);
1882 dir->info_string += "</bookname><wordcount>";
1883 gchar *wc = g_strdup_printf("%ld", oLib[(*i)->dict->id]->narticles());
1884 dir->info_string += wc;
1885 g_free(wc);
1886 dir->info_string += "</wordcount></dict>";
1887 }
1888 }
1889 }
1890 return &(dir->info_string);
1891 }
1892
get_dict_level(const char * uid)1893 int Libs::get_dict_level(const char *uid)
1894 {
1895 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1896 uid_iter = uidmap.find(uid);
1897 if (uid_iter==uidmap.end())
1898 return -1;
1899 return uid_iter->second->level;
1900 }
1901
get_dicts_list(const char * dictmask,int max_dict_count,int userLevel)1902 std::string Libs::get_dicts_list(const char *dictmask, int max_dict_count, int userLevel)
1903 {
1904 std::list<std::string> uid_list;
1905 std::string uid;
1906 const char *p, *p1;
1907 p = dictmask;
1908 do {
1909 p1 = strchr(p, ' ');
1910 if (p1) {
1911 uid.assign(p, p1-p);
1912 if (!uid.empty())
1913 uid_list.push_back(uid);
1914 p = p1+1;
1915 }
1916 } while (p1);
1917 uid = p;
1918 if (!uid.empty())
1919 uid_list.push_back(uid);
1920
1921 std::string dictmask_str;
1922 int count = 0;
1923 const std::string *info_string;
1924 int level;
1925 for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
1926 level = get_dict_level((*i).c_str());
1927 if (level < 0 || level > userLevel)
1928 continue;
1929 info_string = get_dict_info(i->c_str(), true);
1930 if (info_string) {
1931 if (count>=max_dict_count)
1932 break;
1933 dictmask_str += info_string->c_str();
1934 count++;
1935 }
1936 }
1937 return dictmask_str;
1938 }
1939
get_dict_info(const char * uid,bool is_short)1940 const std::string *Libs::get_dict_info(const char *uid, bool is_short)
1941 {
1942 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
1943 uid_iter = uidmap.find(uid);
1944 if (uid_iter==uidmap.end())
1945 return NULL;
1946 DictInfoDictItem *dict;
1947 dict = uid_iter->second;
1948 if (is_short) {
1949 if (dict->short_info_string.empty()) {
1950 gchar *etext;
1951 dict->short_info_string += "<dict><uid>";
1952 dict->short_info_string += uid;
1953 dict->short_info_string += "</uid><bookname>";
1954 etext = g_markup_escape_text(oLib[dict->id]->dict_name().c_str(), -1);
1955 dict->short_info_string += etext;
1956 g_free(etext);
1957 dict->short_info_string += "</bookname><wordcount>";
1958 gchar *wc = g_strdup_printf("%ld", oLib[dict->id]->narticles());
1959 dict->short_info_string += wc;
1960 g_free(wc);
1961 dict->short_info_string += "</wordcount></dict>";
1962 }
1963 return &(dict->short_info_string);
1964 } else {
1965 if (dict->info_string.empty()) {
1966 gchar *etext;
1967 DictInfo dict_info;
1968 if (!dict_info.load_from_ifo_file(oLib[dict->id]->ifofilename(),
1969 DictInfoType_NormDict))
1970 return NULL;
1971 dict->info_string += "<dictinfo><bookname>";
1972 etext = g_markup_escape_text(dict_info.bookname.c_str(), -1);
1973 dict->info_string += etext;
1974 g_free(etext);
1975 dict->info_string += "</bookname><wordcount>";
1976 gchar *wc = g_strdup_printf("%u", dict_info.wordcount);
1977 dict->info_string += wc;
1978 g_free(wc);
1979 dict->info_string += "</wordcount>";
1980 if (dict_info.synwordcount!=0) {
1981 dict->info_string += "<synwordcount>";
1982 wc = g_strdup_printf("%u", dict_info.synwordcount);
1983 dict->info_string += wc;
1984 g_free(wc);
1985 dict->info_string += "</synwordcount>";
1986 }
1987 dict->info_string += "<author>";
1988 etext = g_markup_escape_text(dict_info.author.c_str(), -1);
1989 dict->info_string += etext;
1990 g_free(etext);
1991 dict->info_string += "</author><email>";
1992 etext = g_markup_escape_text(dict_info.email.c_str(), -1);
1993 dict->info_string += etext;
1994 g_free(etext);
1995 dict->info_string += "</email><website>";
1996 etext = g_markup_escape_text(dict_info.website.c_str(), -1);
1997 dict->info_string += etext;
1998 g_free(etext);
1999 dict->info_string += "</website><description>";
2000 etext = g_markup_escape_text(dict_info.description.c_str(), -1);
2001 dict->info_string += etext;
2002 g_free(etext);
2003 dict->info_string += "</description><date>";
2004 etext = g_markup_escape_text(dict_info.date.c_str(), -1);
2005 dict->info_string += etext;
2006 g_free(etext);
2007 dict->info_string += "</date><download>";
2008 etext = g_markup_escape_text(dict->download.c_str(), -1);
2009 dict->info_string += etext;
2010 g_free(etext);
2011 dict->info_string += "</download></dictinfo>";
2012 }
2013 return &(dict->info_string);
2014 }
2015 }
2016
SetServerDictMask(std::vector<InstantDictIndex> & dictmask,const char * dicts,int max,int userLevel)2017 void Libs::SetServerDictMask(std::vector<InstantDictIndex> &dictmask, const char *dicts, int max, int userLevel)
2018 {
2019 InstantDictIndex instance_dict_index;
2020 instance_dict_index.type = InstantDictType_LOCAL;
2021 dictmask.clear();
2022 std::list<std::string> uid_list;
2023 std::string uid;
2024 const char *p, *p1;
2025 p = dicts;
2026 do {
2027 p1 = strchr(p, ' ');
2028 if (p1) {
2029 uid.assign(p, p1-p);
2030 if (!uid.empty())
2031 uid_list.push_back(uid);
2032 p = p1+1;
2033 }
2034 } while (p1);
2035 uid = p;
2036 if (!uid.empty())
2037 uid_list.push_back(uid);
2038 int count = 0;
2039 std::map<std::string, DictInfoDictItem *>::iterator uid_iter;
2040 for (std::list<std::string>::iterator i = uid_list.begin(); i!= uid_list.end(); ++i) {
2041 uid_iter = uidmap.find(*i);
2042 if (uid_iter!=uidmap.end()) {
2043 if (max>=0 && count >= max)
2044 break;
2045 if (userLevel>=0 && (unsigned int)userLevel< uid_iter->second->level)
2046 continue;
2047 instance_dict_index.index = uid_iter->second->id;
2048 dictmask.push_back(instance_dict_index);
2049 count++;
2050 }
2051 }
2052 }
2053
LoadCollateFile(std::vector<InstantDictIndex> & dictmask,CollateFunctions cltfuc)2054 void Libs::LoadCollateFile(std::vector<InstantDictIndex> &dictmask, CollateFunctions cltfuc)
2055 {
2056 for (std::vector<InstantDictIndex>::iterator i = dictmask.begin(); i!=dictmask.end(); ++i) {
2057 if ((*i).type == InstantDictType_LOCAL) {
2058 oLib[(*i).index]->idx_file->collate_load(cltfuc, CollationLevel_MULTI);
2059 if (oLib[(*i).index]->syn_file.get() != NULL)
2060 oLib[(*i).index]->syn_file->collate_load(cltfuc, CollationLevel_MULTI);
2061 }
2062 }
2063 }
2064 #endif
2065
2066 #ifdef SD_CLIENT_CODE
find_lib_by_id(const DictItemId & id,size_t & iLib)2067 bool Libs::find_lib_by_id(const DictItemId& id, size_t &iLib)
2068 {
2069 for (std::vector<Dict *>::size_type i =0; i < oLib.size(); i++) {
2070 if (oLib[i]->id() == id) {
2071 iLib = i;
2072 return true;
2073 }
2074 }
2075 return false;
2076 }
2077
load(const std::list<std::string> & load_list)2078 void Libs::load(const std::list<std::string> &load_list)
2079 {
2080 for (std::list<std::string>::const_iterator i = load_list.begin(); i != load_list.end(); ++i) {
2081 load_dict(*i, show_progress);
2082 }
2083 }
2084
reload(const std::list<std::string> & load_list,CollationLevelType NewCollationLevel,CollateFunctions collf)2085 void Libs::reload(const std::list<std::string> &load_list, CollationLevelType NewCollationLevel, CollateFunctions collf)
2086 {
2087 ValidateCollateParams(NewCollationLevel, collf);
2088 if (NewCollationLevel == CollationLevel && collf == CollateFunction) {
2089 std::vector<Dict *> prev(oLib);
2090 oLib.clear();
2091 for (std::list<std::string>::const_iterator i = load_list.begin(); i != load_list.end(); ++i) {
2092 std::vector<Dict *>::iterator it;
2093 for (it=prev.begin(); it!=prev.end(); ++it) {
2094 if ((*it)->ifofilename()==*i)
2095 break;
2096 }
2097 if (it==prev.end()) {
2098 load_dict(*i, show_progress);
2099 } else {
2100 Dict *res=*it;
2101 prev.erase(it);
2102 oLib.push_back(res);
2103 }
2104 }
2105 for (std::vector<Dict *>::iterator it=prev.begin(); it!=prev.end(); ++it) {
2106 delete *it;
2107 }
2108 } else {
2109 for (std::vector<Dict *>::iterator it = oLib.begin(); it != oLib.end(); ++it)
2110 delete *it;
2111 oLib.clear();
2112 free_collations();
2113 CollationLevel = NewCollationLevel;
2114 CollateFunction = CollateFunctions(collf);
2115 init_collations();
2116 load(load_list);
2117 }
2118 }
2119 #endif
2120
CltIndexToOrig(glong cltidx,size_t iLib,int servercollatefunc)2121 glong Libs::CltIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2122 {
2123 if (CollationLevel == CollationLevel_NONE)
2124 return cltidx;
2125 if (CollationLevel == CollationLevel_SINGLE) {
2126 if (cltidx == INVALID_INDEX)
2127 return cltidx;
2128 return oLib[iLib]->idx_file->get_clt_file()->GetOrigIndex(cltidx);
2129 }
2130 if (servercollatefunc == 0)
2131 return cltidx;
2132 if (cltidx == INVALID_INDEX)
2133 return cltidx;
2134 oLib[iLib]->idx_file->collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
2135 return oLib[iLib]->idx_file->get_clt_file(servercollatefunc-1)->GetOrigIndex(cltidx);
2136 }
2137
CltSynIndexToOrig(glong cltidx,size_t iLib,int servercollatefunc)2138 glong Libs::CltSynIndexToOrig(glong cltidx, size_t iLib, int servercollatefunc)
2139 {
2140 if (CollationLevel == CollationLevel_NONE)
2141 return cltidx;
2142 if (CollationLevel == CollationLevel_SINGLE) {
2143 if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2144 return cltidx;
2145 return oLib[iLib]->syn_file->get_clt_file()->GetOrigIndex(cltidx);
2146 }
2147 if (servercollatefunc == 0)
2148 return cltidx;
2149 if (cltidx == UNSET_INDEX || cltidx == INVALID_INDEX)
2150 return cltidx;
2151 oLib[iLib]->syn_file->collate_load((CollateFunctions)(servercollatefunc-1), CollationLevel_MULTI);
2152 return oLib[iLib]->syn_file->get_clt_file(servercollatefunc-1)->GetOrigIndex(cltidx);
2153 }
2154
GetSuggestWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2155 const gchar *Libs::GetSuggestWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2156 {
2157 const gchar *poCurrentWord = NULL;
2158 const gchar *word;
2159 gint best =0;
2160 gint back;
2161 std::vector<InstantDictIndex>::size_type iLib;
2162 std::vector<Dict *>::size_type iRealLib;
2163 for (iLib=0; iLib < dictmask.size(); iLib++) {
2164 if (dictmask[iLib].type != InstantDictType_LOCAL)
2165 continue;
2166 if(iCurrent[iLib].idx_suggest == INVALID_INDEX || iCurrent[iLib].idx_suggest == UNSET_INDEX)
2167 continue;
2168 iRealLib = dictmask[iLib].index;
2169 if ( poCurrentWord == NULL ) {
2170 poCurrentWord = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2171 best = prefix_match (sWord, poCurrentWord);
2172 } else {
2173 word = poGetWord(iCurrent[iLib].idx_suggest, iRealLib, servercollatefunc);
2174 back = prefix_match (sWord, word);
2175 if (back > best) {
2176 best = back;
2177 poCurrentWord = word;
2178 } else if (back == best) {
2179 gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2180 if (x > 0) {
2181 poCurrentWord = word;
2182 }
2183 }
2184 }
2185 }
2186 for (iLib=0; iLib<dictmask.size(); iLib++) {
2187 if (dictmask[iLib].type != InstantDictType_LOCAL)
2188 continue;
2189 if (iCurrent[iLib].synidx_suggest==INVALID_INDEX || iCurrent[iLib].synidx_suggest==UNSET_INDEX)
2190 continue;
2191 iRealLib = dictmask[iLib].index;
2192 if ( poCurrentWord == NULL ) {
2193 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2194 best = prefix_match (sWord, poCurrentWord);
2195 } else {
2196 word = poGetSynonymWord(iCurrent[iLib].synidx_suggest, iRealLib, servercollatefunc);
2197 back = prefix_match (sWord, word);
2198 if (back > best) {
2199 best = back;
2200 poCurrentWord = word;
2201 } else if (back == best) {
2202 gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2203 if (x > 0) {
2204 poCurrentWord = word;
2205 }
2206 }
2207 }
2208 }
2209 return poCurrentWord;
2210 }
2211
poGetCurrentWord(CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2212 const gchar *Libs::poGetCurrentWord(CurrentIndex * iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2213 {
2214 const gchar *poCurrentWord = NULL;
2215 const gchar *word;
2216 std::vector<InstantDictIndex>::size_type iLib;
2217 std::vector<Dict *>::size_type iRealLib;
2218 for (iLib=0; iLib < dictmask.size(); iLib++) {
2219 if (dictmask[iLib].type != InstantDictType_LOCAL)
2220 continue;
2221 iRealLib = dictmask[iLib].index;
2222 if (iCurrent[iLib].idx==INVALID_INDEX)
2223 continue;
2224 if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2225 continue;
2226 if ( poCurrentWord == NULL ) {
2227 poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2228 } else {
2229 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2230 gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2231 if (x > 0) {
2232 poCurrentWord = word;
2233 }
2234 }
2235 }
2236 for (iLib=0; iLib<dictmask.size(); iLib++) {
2237 if (dictmask[iLib].type != InstantDictType_LOCAL)
2238 continue;
2239 iRealLib = dictmask[iLib].index;
2240 if (iCurrent[iLib].synidx==UNSET_INDEX)
2241 continue;
2242 if (iCurrent[iLib].synidx==INVALID_INDEX)
2243 continue;
2244 if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2245 continue;
2246 if ( poCurrentWord == NULL ) {
2247 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2248 } else {
2249 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2250 gint x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2251 if (x > 0) {
2252 poCurrentWord = word;
2253 }
2254 }
2255 }
2256 return poCurrentWord;
2257 }
2258
2259 const gchar *
poGetNextWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2260 Libs::poGetNextWord(const gchar *sWord, CurrentIndex *iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2261 {
2262 // the input can be:
2263 // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
2264 // (NULL,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
2265 const gchar *poCurrentWord = NULL;
2266 std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2267 bool isLib = false;
2268 const gchar *word;
2269
2270 std::vector<InstantDictIndex>::size_type iLib;
2271 std::vector<Dict *>::size_type iRealLib;
2272 for (iLib=0; iLib < dictmask.size(); iLib++) {
2273 if (dictmask[iLib].type != InstantDictType_LOCAL)
2274 continue;
2275 iRealLib = dictmask[iLib].index;
2276 if (sWord) {
2277 oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, CollationLevel, servercollatefunc);
2278 }
2279 if (iCurrent[iLib].idx==INVALID_INDEX)
2280 continue;
2281 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2282 continue;
2283 if (poCurrentWord == NULL ) {
2284 poCurrentWord = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2285 iCurrentLib = iLib;
2286 iCurrentRealLib = iRealLib;
2287 isLib=true;
2288 } else {
2289 gint x;
2290 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2291 x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2292 if (x > 0) {
2293 poCurrentWord = word;
2294 iCurrentLib = iLib;
2295 iCurrentRealLib = iRealLib;
2296 isLib=true;
2297 }
2298 }
2299 }
2300 for (iLib=0; iLib < dictmask.size(); iLib++) {
2301 if (dictmask[iLib].type != InstantDictType_LOCAL)
2302 continue;
2303 iRealLib = dictmask[iLib].index;
2304 if (sWord) {
2305 oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, CollationLevel, servercollatefunc);
2306 }
2307 if (iCurrent[iLib].synidx==UNSET_INDEX)
2308 continue;
2309 if (iCurrent[iLib].synidx==INVALID_INDEX)
2310 continue;
2311 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2312 continue;
2313 if (poCurrentWord == NULL ) {
2314 poCurrentWord = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2315 iCurrentLib = iLib;
2316 iCurrentRealLib = iRealLib;
2317 isLib=false;
2318 } else {
2319 gint x;
2320 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2321 x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2322 if (x > 0 ) {
2323 poCurrentWord = word;
2324 iCurrentLib = iLib;
2325 iCurrentRealLib = iRealLib;
2326 isLib=false;
2327 }
2328 }
2329 }
2330 if (poCurrentWord) {
2331 for (iLib=0; iLib < dictmask.size(); iLib++) {
2332 if (dictmask[iLib].type != InstantDictType_LOCAL)
2333 continue;
2334 iRealLib = dictmask[iLib].index;
2335 if (isLib && (iLib == iCurrentLib))
2336 continue;
2337 if (iCurrent[iLib].idx==INVALID_INDEX)
2338 continue;
2339 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<0)
2340 continue;
2341 word = poGetWord(iCurrent[iLib].idx, iRealLib, servercollatefunc);
2342 if (strcmp(poCurrentWord, word) == 0) {
2343 GetWordNext(iCurrent[iLib].idx, iRealLib, true, servercollatefunc);
2344 }
2345 }
2346 for (iLib=0; iLib < dictmask.size(); iLib++) {
2347 if (dictmask[iLib].type != InstantDictType_LOCAL)
2348 continue;
2349 iRealLib = dictmask[iLib].index;
2350 if ((!isLib) && (iLib == iCurrentLib))
2351 continue;
2352 if (iCurrent[iLib].synidx==UNSET_INDEX)
2353 continue;
2354 if (iCurrent[iLib].synidx==INVALID_INDEX)
2355 continue;
2356 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<0)
2357 continue;
2358 word = poGetSynonymWord(iCurrent[iLib].synidx, iRealLib, servercollatefunc);
2359 if (strcmp(poCurrentWord, word) == 0) {
2360 GetWordNext(iCurrent[iLib].synidx, iRealLib, false, servercollatefunc);
2361 }
2362 }
2363 //GetWordNext will change poCurrentWord's content, so do it at the last.
2364 if (isLib) {
2365 GetWordNext(iCurrent[iCurrentLib].idx, iCurrentRealLib, true, servercollatefunc);
2366 } else {
2367 GetWordNext(iCurrent[iCurrentLib].synidx, iCurrentRealLib, false, servercollatefunc);
2368 }
2369 poCurrentWord = poGetCurrentWord(iCurrent, dictmask, servercollatefunc);
2370 }
2371 return poCurrentWord;
2372 }
2373
2374 const gchar *
poGetPreWord(const gchar * sWord,CurrentIndex * iCurrent,std::vector<InstantDictIndex> & dictmask,int servercollatefunc)2375 Libs::poGetPreWord(const gchar *sWord, CurrentIndex* iCurrent, std::vector<InstantDictIndex> &dictmask, int servercollatefunc)
2376 {
2377 // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
2378 const gchar *poCurrentWord = NULL;
2379 std::vector<Dict *>::size_type iCurrentLib=0, iCurrentRealLib=0;
2380 bool isLib = false;
2381
2382 const gchar *word;
2383 glong pidx;
2384 std::vector<InstantDictIndex>::size_type iLib;
2385 std::vector<Dict *>::size_type iRealLib;
2386 // lookup in index
2387 for (iLib=0;iLib<dictmask.size();iLib++) {
2388 if (dictmask[iLib].type != InstantDictType_LOCAL)
2389 continue;
2390 iRealLib = dictmask[iLib].index;
2391 if (sWord) {
2392 oLib[iRealLib]->Lookup(sWord, iCurrent[iLib].idx, iCurrent[iLib].idx_suggest, CollationLevel, servercollatefunc);
2393 }
2394 if (iCurrent[iLib].idx!=INVALID_INDEX) {
2395 if ( iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2396 continue;
2397 }
2398 if ( poCurrentWord == NULL ) {
2399 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2400 poCurrentWord = poGetWord(pidx, iRealLib, servercollatefunc);
2401 iCurrentLib = iLib;
2402 iCurrentRealLib = iRealLib;
2403 isLib=true;
2404 }
2405 } else {
2406 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2407 gint x;
2408 word = poGetWord(pidx, iRealLib, servercollatefunc);
2409 x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2410 if (x < 0 ) {
2411 poCurrentWord = word;
2412 iCurrentLib = iLib;
2413 iCurrentRealLib = iRealLib;
2414 isLib=true;
2415 }
2416 }
2417 }
2418 }
2419 // lookup synonyms
2420 for (iLib=0;iLib<dictmask.size();iLib++) {
2421 if (dictmask[iLib].type != InstantDictType_LOCAL)
2422 continue;
2423 iRealLib = dictmask[iLib].index;
2424 if (sWord) {
2425 oLib[iRealLib]->LookupSynonym(sWord, iCurrent[iLib].synidx, iCurrent[iLib].synidx_suggest, CollationLevel, servercollatefunc);
2426 }
2427 if (iCurrent[iLib].synidx==UNSET_INDEX)
2428 continue;
2429 if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2430 if ( iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2431 continue;
2432 }
2433 if ( poCurrentWord == NULL ) {
2434 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2435 poCurrentWord = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2436 iCurrentLib = iLib;
2437 iCurrentRealLib = iRealLib;
2438 isLib=false;
2439 }
2440 } else {
2441 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2442 gint x;
2443 word = poGetSynonymWord(pidx,iRealLib, servercollatefunc);
2444 x = stardict_server_collate(poCurrentWord, word, CollationLevel, CollateFunction, servercollatefunc);
2445 if (x < 0 ) {
2446 poCurrentWord = word;
2447 iCurrentLib = iLib;
2448 iCurrentRealLib = iRealLib;
2449 isLib=false;
2450 }
2451 }
2452 }
2453 }
2454 if (poCurrentWord) {
2455 /* poCurrentWord - the "previous" word for the sWord word among all word in
2456 * all local dictionaries specified by dictmask */
2457 for (iLib=0;iLib<dictmask.size();iLib++) {
2458 if (dictmask[iLib].type != InstantDictType_LOCAL)
2459 continue;
2460 iRealLib = dictmask[iLib].index;
2461 if (isLib && (iLib == iCurrentLib))
2462 continue;
2463 if (iCurrent[iLib].idx!=INVALID_INDEX) {
2464 if (iCurrent[iLib].idx>=narticles(iRealLib) || iCurrent[iLib].idx<=0)
2465 continue;
2466 }
2467 if (GetWordPrev(iCurrent[iLib].idx, pidx, iRealLib, true, servercollatefunc)) {
2468 word = poGetWord(pidx, iRealLib, servercollatefunc);
2469 if (strcmp(poCurrentWord, word) == 0) {
2470 iCurrent[iLib].idx=pidx;
2471 }
2472 }
2473 }
2474 for (iLib=0;iLib<dictmask.size();iLib++) {
2475 if (dictmask[iLib].type != InstantDictType_LOCAL)
2476 continue;
2477 iRealLib = dictmask[iLib].index;
2478 if ((!isLib) && (iLib == iCurrentLib))
2479 continue;
2480 if (iCurrent[iLib].synidx==UNSET_INDEX)
2481 continue;
2482 if (iCurrent[iLib].synidx!=INVALID_INDEX) {
2483 if (iCurrent[iLib].synidx>=nsynarticles(iRealLib) || iCurrent[iLib].synidx<=0)
2484 continue;
2485 }
2486 if (GetWordPrev(iCurrent[iLib].synidx, pidx, iRealLib, false, servercollatefunc)) {
2487 word = poGetSynonymWord(pidx, iRealLib, servercollatefunc);
2488 if (strcmp(poCurrentWord, word) == 0) {
2489 iCurrent[iLib].synidx=pidx;
2490 }
2491 }
2492 }
2493 if (isLib) {
2494 GetWordPrev(iCurrent[iCurrentLib].idx, pidx, iCurrentRealLib, true, servercollatefunc);
2495 iCurrent[iCurrentLib].idx = pidx;
2496 } else {
2497 GetWordPrev(iCurrent[iCurrentLib].synidx, pidx, iCurrentRealLib, false, servercollatefunc);
2498 iCurrent[iCurrentLib].synidx = pidx;
2499 }
2500 }
2501 return poCurrentWord;
2502 }
2503
LookupSynonymSimilarWord(const gchar * sWord,glong & iSynonymWordIndex,glong & synidx_suggest,size_t iLib,int servercollatefunc)2504 bool Libs::LookupSynonymSimilarWord(const gchar* sWord, glong &iSynonymWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
2505 {
2506 if (oLib[iLib]->syn_file.get() == NULL)
2507 return false;
2508
2509 glong iIndex;
2510 glong iIndex_suggest;
2511 bool bFound=false;
2512 gchar *casestr;
2513 bool bLookup;
2514
2515 if (!bFound) {
2516 // to lower case.
2517 casestr = g_utf8_strdown(sWord, -1);
2518 if (strcmp(casestr, sWord)) {
2519 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2520 if(bLookup)
2521 bFound=true;
2522 }
2523 g_free(casestr);
2524 // to upper case.
2525 if (!bFound) {
2526 casestr = g_utf8_strup(sWord, -1);
2527 if (strcmp(casestr, sWord)) {
2528 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2529 if(bLookup)
2530 bFound=true;
2531 }
2532 g_free(casestr);
2533 }
2534 // Upper the first character and lower others.
2535 if (!bFound) {
2536 gchar *nextchar = g_utf8_next_char(sWord);
2537 gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2538 nextchar = g_utf8_strdown(nextchar, -1);
2539 casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2540 g_free(firstchar);
2541 g_free(nextchar);
2542 if (strcmp(casestr, sWord)) {
2543 bLookup = oLib[iLib]->LookupSynonym(casestr, iIndex, iIndex_suggest, CollationLevel, servercollatefunc);
2544 if(bLookup)
2545 bFound=true;
2546 }
2547 g_free(casestr);
2548 }
2549 if (!bFound) {
2550 iIndex = iSynonymWordIndex;
2551 glong pidx;
2552 const gchar *cword;
2553 do {
2554 if (GetWordPrev(iIndex, pidx, iLib, false, servercollatefunc)) {
2555 cword = poGetSynonymWord(pidx, iLib, servercollatefunc);
2556 if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2557 iIndex = pidx;
2558 bFound=true;
2559 } else {
2560 break;
2561 }
2562 } else {
2563 break;
2564 }
2565 } while (true);
2566 if (!bFound) {
2567 if (iIndex!=INVALID_INDEX) {
2568 cword = poGetSynonymWord(iIndex, iLib, servercollatefunc);
2569 if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2570 bFound=true;
2571 }
2572 }
2573 }
2574 }
2575 }
2576 if (bFound) {
2577 iSynonymWordIndex = iIndex;
2578 synidx_suggest = iIndex_suggest;
2579 }
2580 return bFound;
2581 }
2582
2583 /* A helper function for LookupSimilarWord method.
2584 * It accepts too many parameters but simplifies the main function a bit...
2585 * Return value - whether the lookup was successful.
2586 * idx_suggest is updated if a better partial match is found. */
LookupSimilarWordTryWord(const gchar * sTryWord,const gchar * sWord,int servercollatefunc,size_t iLib,glong & iIndex,glong & idx_suggest,gint & best_match)2587 bool Libs::LookupSimilarWordTryWord(const gchar *sTryWord, const gchar *sWord,
2588 int servercollatefunc, size_t iLib,
2589 glong &iIndex, glong &idx_suggest, gint &best_match)
2590 {
2591 glong iIndexSuggest;
2592 if(oLib[iLib]->Lookup(sTryWord, iIndex, iIndexSuggest, CollationLevel, servercollatefunc)) {
2593 best_match = g_utf8_strlen(sTryWord, -1);
2594 idx_suggest = iIndexSuggest;
2595 return true;
2596 } else {
2597 gint cur_match = prefix_match(sWord, poGetWord(iIndexSuggest, iLib, servercollatefunc));
2598 if(cur_match > best_match) {
2599 best_match = cur_match;
2600 idx_suggest = iIndexSuggest;
2601 }
2602 return false;
2603 }
2604 }
2605
2606 /* Search for a word similar to sWord.
2607 * Return true if a similar word is found.
2608 * If a similar word is found, iWordIndex and idx_suggest point to the found word.
2609 * If a similar word is not found, idx_suggest points to the best partial match
2610 * found so far, iWordIndex does not change.
2611 * Input parameters:
2612 * iWordIndex must be initialized with a valid index. The value is used a basis
2613 * for searching a similar word. iWordIndex may be INVALID_INDEX.
2614 * idx_suggest must be initialized. If it is a valid index, it participates in
2615 * searching for the best partial match. */
LookupSimilarWord(const gchar * sWord,glong & iWordIndex,glong & idx_suggest,size_t iLib,int servercollatefunc)2616 bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2617 {
2618 glong iIndex;
2619 bool bFound=false;
2620 gchar *casestr;
2621 gint best_match = 0;
2622
2623 if(idx_suggest != UNSET_INDEX && idx_suggest != INVALID_INDEX) {
2624 best_match = prefix_match(sWord, poGetWord(idx_suggest, iLib, servercollatefunc));
2625 }
2626
2627 if (!bFound) {
2628 // to lower case.
2629 casestr = g_utf8_strdown(sWord, -1);
2630 if (strcmp(casestr, sWord)) {
2631 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2632 bFound=true;
2633 }
2634 g_free(casestr);
2635 // to upper case.
2636 if (!bFound) {
2637 casestr = g_utf8_strup(sWord, -1);
2638 if (strcmp(casestr, sWord)) {
2639 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2640 bFound=true;
2641 }
2642 g_free(casestr);
2643 }
2644 // Upper the first character and lower others.
2645 if (!bFound) {
2646 gchar *nextchar = g_utf8_next_char(sWord);
2647 gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
2648 nextchar = g_utf8_strdown(nextchar, -1);
2649 casestr = g_strdup_printf("%s%s", firstchar, nextchar);
2650 g_free(firstchar);
2651 g_free(nextchar);
2652 if (strcmp(casestr, sWord)) {
2653 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2654 bFound=true;
2655 }
2656 g_free(casestr);
2657 }
2658 // compare with the preceding words in the index case-insensitive
2659 // iWordIndex - the base index
2660 if (!bFound) {
2661 iIndex = iWordIndex;
2662 glong pidx;
2663 const gchar *cword;
2664 do {
2665 if (GetWordPrev(iIndex, pidx, iLib, true, servercollatefunc)) {
2666 cword = poGetWord(pidx, iLib, servercollatefunc);
2667 if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2668 iIndex = pidx;
2669 bFound=true;
2670 } else {
2671 gint cur_match = prefix_match(sWord, cword);
2672 if(cur_match > best_match) {
2673 best_match = cur_match;
2674 idx_suggest = pidx;
2675 }
2676 break;
2677 }
2678 } else {
2679 break;
2680 }
2681 } while (true);
2682 if (!bFound) {
2683 if (iIndex!=INVALID_INDEX) {
2684 cword = poGetWord(iIndex, iLib, servercollatefunc);
2685 if (stardict_casecmp(cword, sWord, CollationLevel, CollateFunction, servercollatefunc)==0) {
2686 bFound=true;
2687 } else {
2688 gint cur_match = prefix_match(sWord, cword);
2689 if(cur_match > best_match) {
2690 best_match = cur_match;
2691 idx_suggest = iIndex;
2692 }
2693 }
2694 }
2695 }
2696 if(bFound) {
2697 best_match = g_utf8_strlen(poGetWord(iIndex, iLib, servercollatefunc), -1);
2698 idx_suggest = iIndex;
2699 }
2700 }
2701 }
2702
2703 if (IsASCII(sWord)) {
2704 // If not Found, try other status of sWord.
2705 size_t iWordLen=strlen(sWord);
2706 bool isupcase;
2707
2708 gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
2709
2710 //cut one char "s" or "d"
2711 if(!bFound && iWordLen>1) {
2712 isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2);
2713 if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2714 strcpy(sNewWord,sWord);
2715 sNewWord[iWordLen-1]='\0'; // cut "s" or "d"
2716 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2717 bFound=true;
2718 else if (isupcase || g_ascii_isupper(sWord[0])) {
2719 casestr = g_ascii_strdown(sNewWord, -1);
2720 if (strcmp(casestr, sNewWord)) {
2721 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2722 bFound=true;
2723 }
2724 g_free(casestr);
2725 }
2726 }
2727 }
2728
2729 //cut "ly"
2730 if(!bFound && iWordLen>2) {
2731 isupcase = !strncmp(&sWord[iWordLen-2],"LY",2);
2732 if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) {
2733 strcpy(sNewWord,sWord);
2734 sNewWord[iWordLen-2]='\0'; // cut "ly"
2735 if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4]
2736 && !bIsVowel(sNewWord[iWordLen-4]) &&
2737 bIsVowel(sNewWord[iWordLen-5])) {//doubled
2738
2739 sNewWord[iWordLen-3]='\0';
2740 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2741 bFound=true;
2742 else {
2743 if (isupcase || g_ascii_isupper(sWord[0])) {
2744 casestr = g_ascii_strdown(sNewWord, -1);
2745 if (strcmp(casestr, sNewWord)) {
2746 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2747 bFound=true;
2748 }
2749 g_free(casestr);
2750 }
2751 if (!bFound)
2752 sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore
2753 }
2754 }
2755 if (!bFound) {
2756 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2757 bFound=true;
2758 else if (isupcase || g_ascii_isupper(sWord[0])) {
2759 casestr = g_ascii_strdown(sNewWord, -1);
2760 if (strcmp(casestr, sNewWord)) {
2761 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2762 bFound=true;
2763 }
2764 g_free(casestr);
2765 }
2766 }
2767 }
2768 }
2769
2770 //cut "ing"
2771 if(!bFound && iWordLen>3) {
2772 isupcase = !strncmp(&sWord[iWordLen-3],"ING",3);
2773 if (isupcase || !strncmp(&sWord[iWordLen-3],"ing",3) ) {
2774 strcpy(sNewWord,sWord);
2775 sNewWord[iWordLen-3]='\0';
2776 if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])
2777 && !bIsVowel(sNewWord[iWordLen-5]) &&
2778 bIsVowel(sNewWord[iWordLen-6])) { //doubled
2779 sNewWord[iWordLen-4]='\0';
2780 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2781 bFound=true;
2782 else {
2783 if (isupcase || g_ascii_isupper(sWord[0])) {
2784 casestr = g_ascii_strdown(sNewWord, -1);
2785 if (strcmp(casestr, sNewWord)) {
2786 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2787 bFound=true;
2788 }
2789 g_free(casestr);
2790 }
2791 if (!bFound)
2792 sNewWord[iWordLen-4]=sNewWord[iWordLen-5]; //restore
2793 }
2794 }
2795 if( !bFound ) {
2796 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2797 bFound=true;
2798 else if (isupcase || g_ascii_isupper(sWord[0])) {
2799 casestr = g_ascii_strdown(sNewWord, -1);
2800 if (strcmp(casestr, sNewWord)) {
2801 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2802 bFound=true;
2803 }
2804 g_free(casestr);
2805 }
2806 }
2807 if(!bFound) {
2808 if (isupcase)
2809 strcat(sNewWord,"E"); // add a char "E"
2810 else
2811 strcat(sNewWord,"e"); // add a char "e"
2812 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2813 bFound=true;
2814 else if (isupcase || g_ascii_isupper(sWord[0])) {
2815 casestr = g_ascii_strdown(sNewWord, -1);
2816 if (strcmp(casestr, sNewWord)) {
2817 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2818 bFound=true;
2819 }
2820 g_free(casestr);
2821 }
2822 }
2823 }
2824 }
2825
2826 //cut two char "es"
2827 if(!bFound && iWordLen>3) {
2828 isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) &&
2829 (sWord[iWordLen-3] == 'S' ||
2830 sWord[iWordLen-3] == 'X' ||
2831 sWord[iWordLen-3] == 'O' ||
2832 (iWordLen >4 && sWord[iWordLen-3] == 'H' &&
2833 (sWord[iWordLen-4] == 'C' ||
2834 sWord[iWordLen-4] == 'S'))));
2835 if (isupcase ||
2836 (!strncmp(&sWord[iWordLen-2],"es",2) &&
2837 (sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' ||
2838 sWord[iWordLen-3] == 'o' ||
2839 (iWordLen >4 && sWord[iWordLen-3] == 'h' &&
2840 (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) {
2841 strcpy(sNewWord,sWord);
2842 sNewWord[iWordLen-2]='\0';
2843 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2844 bFound=true;
2845 else if (isupcase || g_ascii_isupper(sWord[0])) {
2846 casestr = g_ascii_strdown(sNewWord, -1);
2847 if (strcmp(casestr, sNewWord)) {
2848 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2849 bFound=true;
2850 }
2851 g_free(casestr);
2852 }
2853 }
2854 }
2855
2856 //cut "ed"
2857 if (!bFound && iWordLen>3) {
2858 isupcase = !strncmp(&sWord[iWordLen-2],"ED",2);
2859 if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) {
2860 strcpy(sNewWord,sWord);
2861 sNewWord[iWordLen-2]='\0';
2862 if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
2863 && !bIsVowel(sNewWord[iWordLen-4]) &&
2864 bIsVowel(sNewWord[iWordLen-5])) {//doubled
2865 sNewWord[iWordLen-3]='\0';
2866 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2867 bFound=true;
2868 else {
2869 if (isupcase || g_ascii_isupper(sWord[0])) {
2870 casestr = g_ascii_strdown(sNewWord, -1);
2871 if (strcmp(casestr, sNewWord)) {
2872 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2873 bFound=true;
2874 }
2875 g_free(casestr);
2876 }
2877 if (!bFound)
2878 sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore
2879 }
2880 }
2881 if (!bFound) {
2882 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2883 bFound=true;
2884 else if (isupcase || g_ascii_isupper(sWord[0])) {
2885 casestr = g_ascii_strdown(sNewWord, -1);
2886 if (strcmp(casestr, sNewWord)) {
2887 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2888 bFound=true;
2889 }
2890 g_free(casestr);
2891 }
2892 }
2893 }
2894 }
2895
2896 // cut "ied" , add "y".
2897 if (!bFound && iWordLen>3) {
2898 isupcase = !strncmp(&sWord[iWordLen-3],"IED",3);
2899 if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) {
2900 strcpy(sNewWord,sWord);
2901 sNewWord[iWordLen-3]='\0';
2902 if (isupcase)
2903 strcat(sNewWord,"Y"); // add a char "Y"
2904 else
2905 strcat(sNewWord,"y"); // add a char "y"
2906 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2907 bFound=true;
2908 else if (isupcase || g_ascii_isupper(sWord[0])) {
2909 casestr = g_ascii_strdown(sNewWord, -1);
2910 if (strcmp(casestr, sNewWord)) {
2911 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2912 bFound=true;
2913 }
2914 g_free(casestr);
2915 }
2916 }
2917 }
2918
2919 // cut "ies" , add "y".
2920 if (!bFound && iWordLen>3) {
2921 isupcase = !strncmp(&sWord[iWordLen-3],"IES",3);
2922 if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) {
2923 strcpy(sNewWord,sWord);
2924 sNewWord[iWordLen-3]='\0';
2925 if (isupcase)
2926 strcat(sNewWord,"Y"); // add a char "Y"
2927 else
2928 strcat(sNewWord,"y"); // add a char "y"
2929 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2930 bFound=true;
2931 else if (isupcase || g_ascii_isupper(sWord[0])) {
2932 casestr = g_ascii_strdown(sNewWord, -1);
2933 if (strcmp(casestr, sNewWord)) {
2934 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2935 bFound=true;
2936 }
2937 g_free(casestr);
2938 }
2939 }
2940 }
2941
2942 // cut "er".
2943 if (!bFound && iWordLen>2) {
2944 isupcase = !strncmp(&sWord[iWordLen-2],"ER",2);
2945 if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) {
2946 strcpy(sNewWord,sWord);
2947 sNewWord[iWordLen-2]='\0';
2948 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2949 bFound=true;
2950 else if (isupcase || g_ascii_isupper(sWord[0])) {
2951 casestr = g_ascii_strdown(sNewWord, -1);
2952 if (strcmp(casestr, sNewWord)) {
2953 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2954 bFound=true;
2955 }
2956 g_free(casestr);
2957 }
2958 }
2959 }
2960
2961 // cut "est".
2962 if (!bFound && iWordLen>3) {
2963 isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3);
2964 if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) {
2965 strcpy(sNewWord,sWord);
2966 sNewWord[iWordLen-3]='\0';
2967 if(LookupSimilarWordTryWord(sNewWord, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2968 bFound=true;
2969 else if (isupcase || g_ascii_isupper(sWord[0])) {
2970 casestr = g_ascii_strdown(sNewWord, -1);
2971 if (strcmp(casestr, sNewWord)) {
2972 if(LookupSimilarWordTryWord(casestr, sWord, servercollatefunc, iLib, iIndex, idx_suggest, best_match))
2973 bFound=true;
2974 }
2975 g_free(casestr);
2976 }
2977 }
2978 }
2979
2980 g_free(sNewWord);
2981 }
2982
2983 if (bFound)
2984 iWordIndex = iIndex;
2985 #if 0
2986 else {
2987 //don't change iWordIndex here.
2988 //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.
2989 //iWordIndex = INVALID_INDEX;
2990 }
2991 #endif
2992 return bFound;
2993 }
2994
SimpleLookupWord(const gchar * sWord,glong & iWordIndex,glong & idx_suggest,size_t iLib,int servercollatefunc)2995 bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, glong &idx_suggest, size_t iLib, int servercollatefunc)
2996 {
2997 bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex, idx_suggest, CollationLevel, servercollatefunc);
2998 if (!bFound)
2999 bFound = LookupSimilarWord(sWord, iWordIndex, idx_suggest, iLib, servercollatefunc);
3000 return bFound;
3001 }
3002
SimpleLookupSynonymWord(const gchar * sWord,glong & iWordIndex,glong & synidx_suggest,size_t iLib,int servercollatefunc)3003 bool Libs::SimpleLookupSynonymWord(const gchar* sWord, glong & iWordIndex, glong &synidx_suggest, size_t iLib, int servercollatefunc)
3004 {
3005 bool bFound = oLib[iLib]->LookupSynonym(sWord, iWordIndex, synidx_suggest, CollationLevel, servercollatefunc);
3006 if (!bFound)
3007 bFound = LookupSynonymSimilarWord(sWord, iWordIndex, synidx_suggest, iLib, servercollatefunc);
3008 return bFound;
3009 }
3010
3011 struct Fuzzystruct {
3012 char * pMatchWord;
3013 int iMatchWordDistance;
3014 };
3015
operator <(const Fuzzystruct & lh,const Fuzzystruct & rh)3016 static inline bool operator<(const Fuzzystruct & lh, const Fuzzystruct & rh) {
3017 if (lh.iMatchWordDistance!=rh.iMatchWordDistance)
3018 return lh.iMatchWordDistance<rh.iMatchWordDistance;
3019
3020 if (lh.pMatchWord && rh.pMatchWord)
3021 return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0;
3022
3023 return false;
3024 }
3025
unicode_strdown(gunichar * str)3026 static inline void unicode_strdown(gunichar *str)
3027 {
3028 while (*str) {
3029 *str=g_unichar_tolower(*str);
3030 ++str;
3031 }
3032 }
3033
LookupWithFuzzy(const gchar * sWord,gchar * reslist[],gint reslist_size,std::vector<InstantDictIndex> & dictmask)3034 bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size, std::vector<InstantDictIndex> &dictmask)
3035 {
3036 if (sWord[0] == '\0')
3037 return false;
3038
3039 std::vector<Fuzzystruct> oFuzzystruct(reslist_size);
3040
3041 for (int i=0; i<reslist_size; i++) {
3042 oFuzzystruct[i].pMatchWord = NULL;
3043 oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
3044 }
3045 int iMaxDistance = iMaxFuzzyDistance;
3046 int iDistance;
3047 bool Found = false;
3048 EditDistance oEditDistance;
3049
3050 glong iCheckWordLen;
3051 const char *sCheck;
3052 gunichar *ucs4_str1, *ucs4_str2;
3053 glong ucs4_str2_len;
3054
3055 ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
3056 unicode_strdown(ucs4_str2);
3057
3058 std::vector<Dict *>::size_type iRealLib;
3059 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3060 if (dictmask[iLib].type != InstantDictType_LOCAL)
3061 continue;
3062 iRealLib = dictmask[iLib].index;
3063 for (gint synLib=0; synLib<2; synLib++) {
3064 if (synLib==1) {
3065 if (oLib[iRealLib]->syn_file.get()==NULL)
3066 break;
3067 }
3068 show_progress->notify_about_work();
3069
3070 //if (stardict_strcmp(sWord, poGetWord(0,iRealLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iRealLib)-1,iRealLib))<=0) {
3071 //there are Chinese dicts and English dicts...
3072 if (TRUE) {
3073 glong iwords;
3074 if (synLib==0)
3075 iwords = narticles(iRealLib);
3076 else
3077 iwords = nsynarticles(iRealLib);
3078 for (glong index=0; index<iwords; index++) {
3079 // Need to deal with same word in index? But this will slow down processing in most case.
3080 if (synLib==0)
3081 sCheck = poGetOrigWord(index,iRealLib);
3082 else
3083 sCheck = poGetOrigSynonymWord(index,iRealLib);
3084 // tolower and skip too long or too short words
3085 iCheckWordLen = g_utf8_strlen(sCheck, -1);
3086 if (iCheckWordLen-ucs4_str2_len>=iMaxDistance ||
3087 ucs4_str2_len-iCheckWordLen>=iMaxDistance)
3088 continue;
3089 ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, NULL);
3090 if (iCheckWordLen > ucs4_str2_len)
3091 ucs4_str1[ucs4_str2_len]=0;
3092 unicode_strdown(ucs4_str1);
3093
3094 iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);
3095 g_free(ucs4_str1);
3096 if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) {
3097 // when ucs4_str2_len=1,2 we need less fuzzy.
3098 Found = true;
3099 bool bAlreadyInList = false;
3100 int iMaxDistanceAt=0;
3101 for (int j=0; j<reslist_size; j++) {
3102 if (oFuzzystruct[j].pMatchWord &&
3103 strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list
3104 bAlreadyInList = true;
3105 break;
3106 }
3107 //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
3108 if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) {
3109 iMaxDistanceAt = j;
3110 }
3111 }
3112 if (!bAlreadyInList) {
3113 if (oFuzzystruct[iMaxDistanceAt].pMatchWord)
3114 g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);
3115 oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);
3116 oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;
3117 // calc new iMaxDistance
3118 iMaxDistance = iDistance;
3119 for (int j=0; j<reslist_size; j++) {
3120 if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)
3121 iMaxDistance = oFuzzystruct[j].iMatchWordDistance;
3122 } // calc new iMaxDistance
3123 } // add to list
3124 } // find one
3125 } // each word
3126 } // ok for search
3127 } // synLib
3128 } // each lib
3129 g_free(ucs4_str2);
3130
3131 if (Found)// sort with distance
3132 std::sort(oFuzzystruct.begin(), oFuzzystruct.end());
3133
3134 for (gint i=0; i<reslist_size; ++i)
3135 reslist[i]=oFuzzystruct[i].pMatchWord;
3136
3137 return Found;
3138 }
3139
less_for_compare(const char * lh,const char * rh)3140 static inline bool less_for_compare(const char *lh, const char *rh) {
3141 return stardict_strcmp(lh, rh)<0;
3142 }
3143
LookupWithRule(const gchar * word,gchar ** ppMatchWord,std::vector<InstantDictIndex> & dictmask)3144 gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3145 {
3146 glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3147 gint iMatchCount = 0;
3148 GPatternSpec *pspec = g_pattern_spec_new(word);
3149
3150 const gchar * sMatchWord;
3151 bool bAlreadyInList;
3152 std::vector<Dict *>::size_type iRealLib;
3153 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3154 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3155 // -iMatchCount,so save time,but may got less result and the word may repeat.
3156 if (dictmask[iLib].type != InstantDictType_LOCAL)
3157 continue;
3158 iRealLib = dictmask[iLib].index;
3159 if (oLib[iRealLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3160 show_progress->notify_about_work();
3161 for (int i=0; aiIndex[i]!=-1; i++) {
3162 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3163 bAlreadyInList = false;
3164 for (int j=0; j<iMatchCount; j++) {
3165 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3166 bAlreadyInList = true;
3167 break;
3168 }
3169 }
3170 if (!bAlreadyInList)
3171 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3172 }
3173 }
3174 if (oLib[iRealLib]->LookupWithRuleSynonym(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3175 show_progress->notify_about_work();
3176 for (int i=0; aiIndex[i]!=-1; i++) {
3177 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3178 bAlreadyInList = false;
3179 for (int j=0; j<iMatchCount; j++) {
3180 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3181 bAlreadyInList = true;
3182 break;
3183 }
3184 }
3185 if (!bAlreadyInList)
3186 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3187 }
3188 }
3189 }
3190 g_pattern_spec_free(pspec);
3191
3192 if (iMatchCount)// sort it.
3193 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3194 return iMatchCount;
3195 }
3196
LookupWithRegex(const gchar * word,gchar ** ppMatchWord,std::vector<InstantDictIndex> & dictmask)3197 gint Libs::LookupWithRegex(const gchar *word, gchar **ppMatchWord, std::vector<InstantDictIndex> &dictmask)
3198 {
3199 glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1];
3200 gint iMatchCount = 0;
3201 GRegex *regex = g_regex_new(word, G_REGEX_OPTIMIZE, (GRegexMatchFlags)0, NULL);
3202
3203 const gchar * sMatchWord;
3204 bool bAlreadyInList;
3205 std::vector<Dict *>::size_type iRealLib;
3206 for (std::vector<InstantDictIndex>::size_type iLib=0; iLib<dictmask.size(); iLib++) {
3207 //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
3208 // -iMatchCount,so save time,but may got less result and the word may repeat.
3209 if (dictmask[iLib].type != InstantDictType_LOCAL)
3210 continue;
3211 iRealLib = dictmask[iLib].index;
3212 if (oLib[iRealLib]->LookupWithRegex(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3213 show_progress->notify_about_work();
3214 for (int i=0; aiIndex[i]!=-1; i++) {
3215 sMatchWord = poGetOrigWord(aiIndex[i],iRealLib);
3216 bAlreadyInList = false;
3217 for (int j=0; j<iMatchCount; j++) {
3218 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3219 bAlreadyInList = true;
3220 break;
3221 }
3222 }
3223 if (!bAlreadyInList)
3224 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3225 }
3226 }
3227 if (oLib[iRealLib]->LookupWithRegexSynonym(regex, aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) {
3228 show_progress->notify_about_work();
3229 for (int i=0; aiIndex[i]!=-1; i++) {
3230 sMatchWord = poGetOrigSynonymWord(aiIndex[i],iRealLib);
3231 bAlreadyInList = false;
3232 for (int j=0; j<iMatchCount; j++) {
3233 if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list
3234 bAlreadyInList = true;
3235 break;
3236 }
3237 }
3238 if (!bAlreadyInList)
3239 ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
3240 }
3241 }
3242 }
3243 g_regex_unref(regex);
3244
3245 if (iMatchCount)// sort it.
3246 std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare);
3247 return iMatchCount;
3248 }
3249
LookupData(const gchar * sWord,std::vector<gchar * > * reslist,updateSearchDialog_func search_func,gpointer search_data,bool * cancel,std::vector<InstantDictIndex> & dictmask)3250 bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist, updateSearchDialog_func search_func, gpointer search_data, bool *cancel, std::vector<InstantDictIndex> &dictmask)
3251 {
3252 std::vector<std::string> SearchWords;
3253 std::string SearchWord;
3254 const char *p=sWord;
3255 while (*p) {
3256 if (*p=='\\') {
3257 p++;
3258 switch (*p) {
3259 case ' ':
3260 SearchWord+=' ';
3261 break;
3262 case '\\':
3263 SearchWord+='\\';
3264 break;
3265 case 't':
3266 SearchWord+='\t';
3267 break;
3268 case 'n':
3269 SearchWord+='\n';
3270 break;
3271 default:
3272 SearchWord+=*p;
3273 }
3274 } else if (*p == ' ') {
3275 if (!SearchWord.empty()) {
3276 SearchWords.push_back(SearchWord);
3277 SearchWord.clear();
3278 }
3279 } else {
3280 SearchWord+=*p;
3281 }
3282 p++;
3283 }
3284 if (!SearchWord.empty()) {
3285 SearchWords.push_back(SearchWord);
3286 SearchWord.clear();
3287 }
3288 if (SearchWords.empty())
3289 return false;
3290
3291 glong search_count=0;
3292 glong total_count=0;
3293 if (search_func) {
3294 for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3295 if (dictmask[i].type == InstantDictType_LOCAL)
3296 total_count += narticles(dictmask[i].index);
3297 }
3298 }
3299
3300 guint32 max_size =0;
3301 gchar *origin_data = NULL;
3302 std::vector<InstantDictIndex>::size_type iRealLib;
3303 for (std::vector<InstantDictIndex>::size_type i=0; i<dictmask.size(); ++i) {
3304 if (dictmask[i].type != InstantDictType_LOCAL)
3305 continue;
3306 iRealLib = dictmask[i].index;
3307 if (!oLib[iRealLib]->containSearchData())
3308 continue;
3309 const gulong iwords = narticles(iRealLib);
3310 const gchar *key;
3311 guint32 offset, size;
3312 for (gulong j=0; j<iwords; ++j) {
3313 if (search_func) {
3314 if (*cancel)
3315 goto search_out;
3316 if (search_count % 10000 == 0) {
3317 search_func(search_data, (gdouble)search_count/(gdouble)total_count);
3318 }
3319 search_count++;
3320 }
3321 oLib[iRealLib]->get_key_and_data(j, &key, &offset, &size);
3322 if (size>max_size) {
3323 origin_data = (gchar *)g_realloc(origin_data, size);
3324 max_size = size;
3325 }
3326 if (oLib[iRealLib]->SearchData(SearchWords, offset, size, origin_data)) {
3327 if (reslist[i].empty() || strcmp(reslist[i].back(), key))
3328 reslist[i].push_back(g_strdup(key));
3329 }
3330 }
3331 }
3332 search_out:
3333 g_free(origin_data);
3334 //KMP_end();
3335
3336 std::vector<InstantDictIndex>::size_type i;
3337 for (i=0; i<dictmask.size(); ++i)
3338 if (!reslist[i].empty())
3339 break;
3340
3341 return i!=dictmask.size();
3342 }
3343
GetStorageType(size_t iLib)3344 StorageType Libs::GetStorageType(size_t iLib)
3345 {
3346 if (oLib[iLib]->storage == NULL)
3347 return StorageType_UNKNOWN;
3348 return oLib[iLib]->storage->get_storage_type();
3349 }
3350
GetStorageFilePath(size_t iLib,const std::string & key)3351 FileHolder Libs::GetStorageFilePath(size_t iLib, const std::string &key)
3352 {
3353 if (oLib[iLib]->storage == NULL)
3354 return FileHolder();
3355 return oLib[iLib]->storage->get_file_path(key);
3356 }
3357
GetStorageFileContent(size_t iLib,const std::string & key)3358 const char *Libs::GetStorageFileContent(size_t iLib, const std::string &key)
3359 {
3360 if (oLib[iLib]->storage == NULL)
3361 return NULL;
3362 return oLib[iLib]->storage->get_file_content(key);
3363 }
3364
init_collations()3365 void Libs::init_collations()
3366 {
3367 if (CollationLevel == CollationLevel_SINGLE) {
3368 if (utf8_collate_init(CollateFunction))
3369 g_print("Init collate function failed!\n");
3370 } else if (CollationLevel == CollationLevel_MULTI){
3371 if (utf8_collate_init_all())
3372 g_print("Init collate functions failed!\n");
3373 }
3374 }
3375
free_collations()3376 void Libs::free_collations()
3377 {
3378 if(CollationLevel == CollationLevel_SINGLE)
3379 utf8_collate_end(CollateFunction);
3380 else if(CollationLevel == CollationLevel_MULTI)
3381 utf8_collate_end_all();
3382 }
3383
ValidateCollateParams(CollationLevelType & level,CollateFunctions & func)3384 void Libs::ValidateCollateParams(CollationLevelType& level, CollateFunctions& func)
3385 {
3386 if(level == CollationLevel_SINGLE) {
3387 if(func == COLLATE_FUNC_NONE) {
3388 g_print(_("Invalid collate function. Disable collation."));
3389 level = CollationLevel_NONE;
3390 }
3391 } else {
3392 func = COLLATE_FUNC_NONE;
3393 }
3394 }
3395