1 /** @file scim_pinyin_phrase.h
2  * definition of PinyinPhrase, PinyinPhraseLib and related classes.
3  */
4 
5 /*
6  * Smart Pinyin Input Method
7  *
8  * Copyright (c) 2005 James Su <suzhe@tsinghua.org.cn>
9  *
10  * $Id: scim_pinyin_phrase.h,v 1.2 2006/01/13 06:31:46 suzhe Exp $
11  *
12  */
13 
14 #if !defined (__SCIM_PINYIN_PHRASE_H)
15 #define __SCIM_PINYIN_PHRASE_H
16 
17 using namespace scim;
18 
19 class PinyinPhrase;
20 class PinyinPhraseLib;
21 
22 class PinyinPhraseLessThan;
23 class PinyinPhraseEqualTo;
24 
25 class PinyinPhraseLessThanByOffset;
26 class PinyinPhraseEqualToByOffset;
27 class PinyinPhraseLessThanByOffsetSP;
28 class PinyinPhraseEqualToByOffsetSP;
29 
30 typedef std::pair <uint32, uint32> PinyinPhraseOffsetPair;
31 typedef std::vector <PinyinPhraseOffsetPair> PinyinPhraseOffsetVector;
32 
33 typedef bool (*PinyinPhraseValidatorFunc) (const PinyinPhrase &phrase);
34 
35 //////////////////////////////////////////////////////////////////////////////
36 //declaration of PinyinPhrase class
37 class PinyinPhrase
38 {
39 	PinyinPhraseLib *m_lib;
40 
41 	uint32 m_phrase_offset;
42 	uint32 m_pinyin_offset;
43 
44 public:
PinyinPhrase(PinyinPhraseLib * lib,uint32 phrase_offset,uint32 pinyin_offset)45 	PinyinPhrase (PinyinPhraseLib *lib,
46 				  uint32 phrase_offset,
47 				  uint32 pinyin_offset)
48 		: m_lib (lib),
49 		  m_phrase_offset (phrase_offset),
50 		  m_pinyin_offset (pinyin_offset) { }
51 
52 	bool valid () const;
53 
is_enable()54 	bool is_enable () const { return valid () && get_phrase ().is_enable (); }
55 
56 	uint32 check_attribute (uint32 attr = SCIM_PHRASE_ATTR_MASK_ALL) const {
57 		return get_phrase ().check_attribute (attr);
58 	}
59 
toggle_attribute(uint32 attr,uint32 value)60 	void toggle_attribute (uint32 attr, uint32 value) {
61 		get_phrase ().toggle_attribute (attr, value);
62 	}
63 
length()64 	uint32 length () const { return get_phrase ().length (); }
65 
frequency()66 	uint32 frequency () const { return get_phrase ().frequency (); }
67 
refresh(uint32 shift)68 	void refresh (uint32 shift) { get_phrase ().refresh (shift); }
69 
70 	void set_frequency (uint32 freq = 0) { get_phrase ().set_frequency (freq); }
71 
72 	ucs4_t operator [] (uint32 index) const { return get_phrase () [index]; }
73 
enable()74 	void enable () { get_phrase ().enable (); }
75 
disable()76 	void disable () { get_phrase ().disable (); }
77 
Phrase()78 	operator Phrase () const { return get_phrase (); }
79 
get_lib()80 	const PinyinPhraseLib * get_lib () const { return m_lib; }
81 
get_phrase_offset()82 	uint32 get_phrase_offset () const { return m_phrase_offset; }
83 
get_pinyin_offset()84 	uint32 get_pinyin_offset () const { return m_pinyin_offset; }
85 
86 	PinyinKey get_key (uint32 index) const;
87 
88 	Phrase get_phrase () const;
89 };
90 
91 //////////////////////////////////////////////////////////////////////////////
92 //declaration of PinyinPhrase comparision class
93 class PinyinPhraseLessThan
94 	: public std::binary_function <PinyinPhrase, PinyinPhrase, bool>
95 {
96 	PinyinKeyLessThan m_less;
97 public:
PinyinPhraseLessThan(const PinyinCustomSettings & custom)98 	PinyinPhraseLessThan (const PinyinCustomSettings &custom)
99 		: m_less (custom) {}
100 
PinyinPhraseLessThan(const PinyinKeyLessThan & le)101 	PinyinPhraseLessThan (const PinyinKeyLessThan &le)
102 		: m_less (le) {}
103 
104 	bool operator () (const PinyinPhrase &lhs, const PinyinPhrase &rhs) const;
105 };
106 
107 class PinyinPhraseEqualTo
108 	: public std::binary_function <PinyinPhrase, PinyinPhrase, bool>
109 {
110 	PinyinKeyEqualTo m_equal;
111 public:
PinyinPhraseEqualTo(const PinyinCustomSettings & custom)112 	PinyinPhraseEqualTo (const PinyinCustomSettings &custom)
113 		: m_equal (custom) {}
114 
PinyinPhraseEqualTo(const PinyinKeyEqualTo & eq)115 	PinyinPhraseEqualTo (const PinyinKeyEqualTo &eq)
116 		: m_equal (eq) {}
117 
118 	bool operator () (const PinyinPhrase &lhs, const PinyinPhrase &rhs) const;
119 };
120 
121 class PinyinPhraseLessThanByOffset
122 	: public std::binary_function < std::pair <uint32, uint32>,
123 									std::pair <uint32, uint32>, bool >
124 {
125 	PinyinPhraseLib *m_lib;
126 	PinyinKeyLessThan m_less;
127 public:
PinyinPhraseLessThanByOffset(PinyinPhraseLib * lib,const PinyinCustomSettings & custom)128 	PinyinPhraseLessThanByOffset (PinyinPhraseLib *lib,
129 								  const PinyinCustomSettings &custom)
130 		: m_lib (lib), m_less (custom) { }
131 
PinyinPhraseLessThanByOffset(PinyinPhraseLib * lib,const PinyinKeyLessThan & le)132 	PinyinPhraseLessThanByOffset (PinyinPhraseLib *lib,
133 								  const PinyinKeyLessThan &le)
134 		: m_lib (lib), m_less (le) { }
135 
136 	bool operator () (const std::pair <uint32, uint32> & lhs,
137 					  const std::pair <uint32, uint32> & rhs) const;
138 };
139 
140 class PinyinPhraseEqualToByOffset
141 	: public std::binary_function < std::pair <uint32, uint32>,
142 									std::pair <uint32, uint32>, bool >
143 {
144 	PinyinPhraseLib *m_lib;
145 	PinyinKeyEqualTo m_equal;
146 public:
PinyinPhraseEqualToByOffset(PinyinPhraseLib * lib,const PinyinCustomSettings & custom)147 	PinyinPhraseEqualToByOffset (PinyinPhraseLib *lib,
148 								 const PinyinCustomSettings &custom)
149 		: m_lib (lib), m_equal (custom) { }
150 
PinyinPhraseEqualToByOffset(PinyinPhraseLib * lib,const PinyinKeyEqualTo & eq)151 	PinyinPhraseEqualToByOffset (PinyinPhraseLib *lib,
152 								 const PinyinKeyEqualTo &eq)
153 		: m_lib (lib), m_equal (eq) { }
154 
155 	bool operator () (const std::pair <uint32, uint32> & lhs,
156 					  const std::pair <uint32, uint32> & rhs) const;
157 };
158 
159 class PinyinPhrasePinyinLessThanByOffset
160 	: public std::binary_function < std::pair <uint32, uint32>,
161 									std::pair <uint32, uint32>, bool >
162 {
163 	PinyinPhraseLib *m_lib;
164 	PinyinKeyLessThan m_less;
165 public:
PinyinPhrasePinyinLessThanByOffset(PinyinPhraseLib * lib,const PinyinKeyLessThan & le)166 	PinyinPhrasePinyinLessThanByOffset (PinyinPhraseLib *lib,
167 										const PinyinKeyLessThan &le)
168 		: m_lib (lib), m_less (le) { }
169 
PinyinPhrasePinyinLessThanByOffset(PinyinPhraseLib * lib,const PinyinCustomSettings & custom)170 	PinyinPhrasePinyinLessThanByOffset (PinyinPhraseLib *lib,
171 										const PinyinCustomSettings &custom)
172 		: m_lib (lib), m_less (custom) { }
173 
174 	bool operator () (const std::pair <uint32, uint32> & lhs,
175 					  const std::pair <uint32, uint32> & rhs) const;
176 };
177 
178 class PinyinPhrasePhraseLessThanByOffset
179 	: public std::binary_function < std::pair <uint32, uint32>,
180 									std::pair <uint32, uint32>, bool >
181 {
182 	PinyinPhraseLib *m_lib;
183 public:
PinyinPhrasePhraseLessThanByOffset(PinyinPhraseLib * lib)184 	PinyinPhrasePhraseLessThanByOffset (PinyinPhraseLib *lib)
185 		: m_lib (lib) { }
186 
187 	bool operator () (const std::pair <uint32, uint32> & lhs,
188 					  const std::pair <uint32, uint32> & rhs) const;
189 };
190 
191 //a PinyinPhraseVector with a PinyinKey
192 class PinyinPhraseEntry
193 {
194 	struct PinyinPhraseEntryImpl
195 	{
196 		PinyinKey m_key;
197 		PinyinPhraseOffsetVector m_phrases;
198 		int m_ref;
199 
PinyinPhraseEntryImplPinyinPhraseEntryImpl200 		PinyinPhraseEntryImpl (PinyinKey key, const PinyinPhraseOffsetVector &vec)
201 			: m_key (key), m_phrases (vec), m_ref (1) { }
202 
refPinyinPhraseEntryImpl203 		void ref () { m_ref ++; }
unrefPinyinPhraseEntryImpl204 		void unref () { if ((--m_ref) == 0) delete this; }
205 	};
206 
207 	PinyinPhraseEntryImpl *m_impl;
208 
209 public:
PinyinPhraseEntry()210 	PinyinPhraseEntry () {
211 		m_impl = new PinyinPhraseEntryImpl (PinyinKey (), PinyinPhraseOffsetVector ());
212 	}
213 
PinyinPhraseEntry(PinyinKey key)214 	PinyinPhraseEntry (PinyinKey key) {
215 		m_impl = new PinyinPhraseEntryImpl (key, PinyinPhraseOffsetVector ());
216 	}
217 
PinyinPhraseEntry(PinyinKey key,const PinyinPhraseOffsetVector & vec)218 	PinyinPhraseEntry (PinyinKey key, const PinyinPhraseOffsetVector &vec) {
219 		m_impl = new PinyinPhraseEntryImpl (key, vec);
220 	}
221 
PinyinPhraseEntry(const PinyinPhraseEntry & entry)222 	PinyinPhraseEntry (const PinyinPhraseEntry &entry) {
223 		m_impl = entry.m_impl;
224 		m_impl->ref ();
225 	}
226 
~PinyinPhraseEntry()227 	~PinyinPhraseEntry () {
228 		m_impl->unref ();
229 	}
230 
231 	PinyinPhraseEntry& operator = (const PinyinPhraseEntry &entry) {
232 		if (this != &entry) {
233 			m_impl->unref ();
234 			m_impl = entry.m_impl;
235 			m_impl->ref ();
236 		}
237 		return *this;
238 	}
239 
PinyinKey()240 	operator PinyinKey () const { return m_impl->m_key; }
241 
get_vector()242 	PinyinPhraseOffsetVector & get_vector () {
243 		if (m_impl->m_ref > 1) {
244 			PinyinPhraseEntryImpl *impl =
245 				new PinyinPhraseEntryImpl (m_impl->m_key, m_impl->m_phrases);
246 			m_impl->unref ();
247 			m_impl = impl;
248 		}
249 		return m_impl->m_phrases;
250 	}
251 
compact_memory()252 	void compact_memory () {
253 		if (m_impl)
254 			PinyinPhraseOffsetVector (m_impl->m_phrases).swap (m_impl->m_phrases);
255 	}
256 };
257 
258 //Definition of PinyinPhraseLib
259 class PinyinPhraseLib
260 {
261 	typedef std::vector<PinyinPhraseEntry> PinyinPhraseTable;
262 
263 	PinyinTable                  *m_pinyin_table;
264 	const PinyinValidator        *m_validator;
265 
266 	PinyinKeyLessThan             m_pinyin_key_less;
267 	PinyinKeyEqualTo              m_pinyin_key_equal;
268 
269 	PinyinPhraseLessThanByOffset  m_pinyin_phrase_less_by_offset;
270 	PinyinPhraseEqualToByOffset   m_pinyin_phrase_equal_by_offset;
271 
272 	PinyinKeyVector               m_pinyin_lib;
273 
274 	//to speed up phrase looking up, the phrases are divided to several groups
275 	//the first level of group is divided by phrase length, this level uses vector.
276 	//the second level is divided by the PinyinKey of the first Hanzi, this level uses map.
277 	PinyinPhraseTable m_phrases [SCIM_PHRASE_MAX_LENGTH];
278 
279 	PhraseLib                     m_phrase_lib;
280 
281 	friend class PinyinPhrase;
282 	friend class PinyinPhraseLessThanByOffset;
283 	friend class PinyinPhraseEqualToByOffset;
284 	friend class PinyinPhraseLessThanByOffsetSP;
285 	friend class PinyinPhraseEqualToByOffsetSP;
286 
287 	friend class PinyinPhrasePinyinLessThanByOffset;
288 	friend class PinyinPhrasePhraseLessThanByOffset;
289 public:
290 	PinyinPhraseLib (const PinyinCustomSettings &custom,
291 					 const PinyinValidator *validator,
292 					 PinyinTable *pinyin_table,
293 					 const char *libfile = NULL,
294 					 const char *pylibfile = NULL,
295 					 const char *idxfile = NULL);
296 	PinyinPhraseLib (const PinyinCustomSettings &custom,
297 					 const PinyinValidator *validator,
298 					 PinyinTable *pinyin_table,
299 					 std::istream &is_lib,
300 					 std::istream &is_pylib,
301 					 std::istream &is_idx);
302 
303 	bool output (std::ostream &os_lib, std::ostream &os_pylib, std::ostream &os_idx, bool binary = false);
304 	bool input (std::istream &is_lib, std::istream &is_pylib, std::istream &is_idx);
305 	bool input (std::istream &is_lib);
306 
307 	bool load_lib (const char *libfile, const char *pylibfile = NULL, const char *idxfile = NULL);
308 	bool save_lib (const char *libfile, const char *pylibfile = NULL, const char *idxfile = NULL, bool binary = false);
309 
valid()310 	bool valid () const {
311 		return m_validator != NULL && m_pinyin_table != NULL;
312 	}
313 
314 	void update_custom_settings (const PinyinCustomSettings &custom,
315 								 const PinyinValidator *validator);
316 
317 	// find all phrases according to the key string
318 	// if noshorter == true then do not find the phrases shorter than keys
319 	// if nolonger == true than do not find the phrases longer than keys
320 	int find_phrases (PhraseVector &vec,
321 					  const char *keys,
322 					  bool noshorter = false,
323 					  bool nolonger = false);
324 
325 	int find_phrases (PhraseVector &vec,
326 					  const PinyinKeyVector &keys,
327 					  bool noshorter = false,
328 					  bool nolonger = false);
329 
330 	int find_phrases (PhraseVector &vec,
331 					  const PinyinParsedKeyVector &keys,
332 					  bool noshorter = false,
333 					  bool nolonger = false);
334 
335 	int find_phrases (PhraseVector &vec,
336 					  const PinyinKeyVector::const_iterator &begin,
337 					  const PinyinKeyVector::const_iterator &end,
338 					  int minlen = 1,
339 					  int maxlen = -1);
340 
341 	int find_phrases (PhraseVector &vec,
342 					  const PinyinParsedKeyVector::const_iterator &begin,
343 					  const PinyinParsedKeyVector::const_iterator &end,
344 					  int minlen = 1,
345 					  int maxlen = -1);
346 
347 	void refresh (const Phrase &phrase, uint32 shift = 24) {
348 		m_phrase_lib.refresh (phrase, shift);
349 	}
350 
number_of_phrases()351 	uint32 number_of_phrases () const {
352 		return m_phrase_lib.number_of_phrases ();
353 	}
354 
clear()355 	void clear () {
356 		clear_phrase_index ();
357 		m_phrase_lib.clear ();
358 		m_pinyin_lib.clear ();
359 	}
360 
361 
362 	void refine_library (PinyinPhraseValidatorFunc pinyin_phrase_validator = 0);
363 
find(const Phrase & phrase)364 	Phrase find (const Phrase &phrase) {
365 		return m_phrase_lib.find (phrase);
366 	}
367 
find(const WideString & phrase)368 	Phrase find (const WideString &phrase) {
369 		return m_phrase_lib.find (phrase);
370 	}
371 
append(const Phrase & phrase)372 	Phrase append (const Phrase &phrase) {
373 		return append (phrase, PinyinKeyVector ());
374 	}
375 
append(const WideString & phrase)376 	Phrase append (const WideString &phrase) {
377 		return append (phrase, PinyinKeyVector ());
378 	}
379 
380 	Phrase append (const Phrase &phrase, const PinyinKeyVector &keys);
381 	Phrase append (const WideString &phrase, const PinyinKeyVector &keys);
382 
383 	void dump_content (std::ostream &os, int minlen = 1, int maxlen = SCIM_PHRASE_MAX_LENGTH);
384 
set_burst_stack_size(uint32 size)385 	void set_burst_stack_size (uint32 size) {
386 		m_phrase_lib.set_burst_stack_size (size);
387 	}
388 
389 	uint32 get_phrase_relation (const Phrase & first, const Phrase & second, bool local = true) {
390 		return m_phrase_lib.get_phrase_relation (first, second, local);
391 	}
392 
393 	void set_phrase_relation (const Phrase & first, const Phrase & second, uint32 relation = 0) {
394 		m_phrase_lib.set_phrase_relation (first, second, relation);
395 	}
396 
397 	void refresh_phrase_relation (const Phrase & first, const Phrase & second, uint32 shift = 16) {
398 		m_phrase_lib.refresh_phrase_relation (first, second, shift);
399 	}
400 
401 	void optimize_phrase_relation_map (uint32 max_size = 131072) {
402 		m_phrase_lib.optimize_phrase_relation_map (max_size);
403 	}
404 
405 	void optimize_phrase_frequencies (uint32 max_freq = (SCIM_PHRASE_MAX_FREQUENCY >> 1));
406 
407 private:
get_phrase(uint32 phrase_offset)408 	Phrase get_phrase (uint32 phrase_offset) {
409 		return Phrase (&m_phrase_lib, phrase_offset);
410 	}
411 
valid_pinyin_phrase(uint32 phrase_offset,uint32 pinyin_offset)412 	bool valid_pinyin_phrase (uint32 phrase_offset, uint32 pinyin_offset) {
413 		Phrase phrase (&m_phrase_lib, phrase_offset);
414 		return phrase.valid () &&
415 				pinyin_offset <= m_pinyin_lib.size () - phrase.length ();
416 	}
417 
get_pinyin_key(uint32 pinyin_offset)418 	PinyinKey get_pinyin_key (uint32 pinyin_offset) const {
419 		return m_pinyin_lib [pinyin_offset];
420 	}
421 
422 	bool input_pinyin_lib  (const PinyinValidator &validator, std::istream &is);
423 	bool output_pinyin_lib (std::ostream &os, bool binary = false);
424 
425 private:
426 	void compact_memory ();
427 
428 	void sort_phrase_tables ();
429 	void refine_phrase_index (PinyinPhraseValidatorFunc pinyin_phrase_validator);
430 	void refine_pinyin_lib ();
431 	void clear_phrase_index ();
432 
433 	bool output_indexes (std::ostream &os, bool binary = false);
434 	bool input_indexes (std::istream &is);
435 
436 	void create_pinyin_index ();
437 
438 	uint32 count_phrase_number ();
439 
440 	bool insert_pinyin_phrase_into_index (uint32 phrase_index, uint32 pinyin_index);
441 	bool insert_phrase_into_index (const Phrase &phrase, const PinyinKeyVector &keys);
442 
443 	template<class T>
444 	void for_each_phrase (T &op);
445 
446 	template<class T>
447 	void for_each_phrase_level_one (uint32 len, T &op);
448 
449 	template<class T>
450 	void for_each_phrase_level_two (const PinyinPhraseTable::iterator &begin,
451 									const PinyinPhraseTable::iterator &end,
452 									T &op);
453 	template<class T>
454 	void for_each_phrase_level_three (const PinyinPhraseOffsetVector::iterator &begin,
455 									  const PinyinPhraseOffsetVector::iterator &end,
456 									  T &op);
457 
458 	void find_phrases_impl (PhraseVector &pv,
459 							const PinyinPhraseOffsetVector::iterator &begin,
460 							const PinyinPhraseOffsetVector::iterator &end,
461 							const PinyinKeyVector::const_iterator &key_begin,
462 							const PinyinKeyVector::const_iterator &key_pos,
463 							const PinyinKeyVector::const_iterator &Key_end);
464 };
465 
466 class PinyinPhraseLessThanByOffsetSP
467 {
468 	PinyinPhraseLib *m_lib;
469 	const PinyinKeyLessThan &m_less;
470 	uint32 m_pos;
471 public:
472 	PinyinPhraseLessThanByOffsetSP (PinyinPhraseLib *lib,
473 									const PinyinKeyLessThan &less,
474 									uint32 pos = 0)
m_lib(lib)475 		: m_lib (lib), m_less (less), m_pos (pos) { }
476 
operator()477 	bool operator () (const std::pair <uint32, uint32> &lhs,
478 					  const std::pair <uint32, uint32> &rhs) const {
479 		if (m_less (m_lib->get_pinyin_key (lhs.second + m_pos),
480 					m_lib->get_pinyin_key (rhs.second + m_pos)))
481 			return true;
482 		return false;
483 	}
484 
operator()485 	bool operator () (const std::pair <uint32, uint32> &lhs,
486 					  const PinyinKeyVector &rhs) const {
487 		if (m_less (m_lib->get_pinyin_key (lhs.second + m_pos), rhs [m_pos]))
488 			return true;
489 		return false;
490 	}
491 
operator()492 	bool operator () (const PinyinKeyVector &lhs,
493 					  const std::pair <uint32, uint32> &rhs) const {
494 		if (m_less (lhs [m_pos], m_lib->get_pinyin_key (rhs.second + m_pos)))
495 			return true;
496 		return false;
497 	}
498 
operator()499 	bool operator () (const std::pair <uint32, uint32> &lhs,
500 					  const PinyinKey &rhs) const {
501 		if (m_less (m_lib->get_pinyin_key (lhs.second + m_pos), rhs))
502 			return true;
503 		return false;
504 	}
505 
operator()506 	bool operator () (const PinyinKey &lhs,
507 					  const std::pair <uint32, uint32> &rhs) const {
508 		if (m_less (lhs, m_lib->get_pinyin_key (rhs.second + m_pos)))
509 			return true;
510 		return false;
511 	}
512 };
513 
514 class PinyinPhraseEqualToByOffsetSP
515 {
516 	PinyinPhraseLib *m_lib;
517 	const PinyinKeyEqualTo &m_equal;
518 	uint32 m_pos;
519 public:
520 	PinyinPhraseEqualToByOffsetSP (PinyinPhraseLib *lib,
521 								   const PinyinKeyEqualTo &equal,
522 								   uint32 pos = 0)
m_lib(lib)523 		: m_lib (lib), m_equal (equal), m_pos (pos) { }
524 
operator()525 	bool operator () (const std::pair <uint32, uint32> &lhs,
526 					  const std::pair <uint32, uint32> &rhs) const {
527 		if (m_equal (m_lib->get_pinyin_key (lhs.second + m_pos),
528 					 m_lib->get_pinyin_key (rhs.second + m_pos)))
529 			return true;
530 		return false;
531 	}
532 
operator()533 	bool operator () (const std::pair <uint32, uint32> &lhs,
534 					  const PinyinKeyVector &rhs) const {
535 		if (m_equal (m_lib->get_pinyin_key (lhs.second + m_pos), rhs [m_pos]))
536 			return true;
537 		return false;
538 	}
539 
operator()540 	bool operator () (const PinyinKeyVector &lhs,
541 					  const std::pair <uint32, uint32> &rhs) const {
542 		if (m_equal (lhs [m_pos], m_lib->get_pinyin_key (rhs.second + m_pos)))
543 			return true;
544 		return false;
545 	}
546 
operator()547 	bool operator () (const std::pair <uint32, uint32> &lhs,
548 					  const PinyinKey &rhs) const {
549 		if (m_equal (m_lib->get_pinyin_key (lhs.second + m_pos), rhs))
550 			return true;
551 		return false;
552 	}
553 
operator()554 	bool operator() (const PinyinKey &lhs,
555 					 const std::pair <uint32, uint32> &rhs) const {
556 		if (m_equal (lhs, m_lib->get_pinyin_key (rhs.second + m_pos)))
557 			return true;
558 		return false;
559 	}
560 };
561 
562 //Implementation of pinyin phrase less than and equal to operator.
563 inline bool
operator()564 PinyinPhraseLessThanByOffset::operator () (const std::pair <uint32, uint32> & lhs,
565 										   const std::pair <uint32, uint32> & rhs) const
566 {
567 	if (m_lib->get_phrase (lhs.first) < m_lib->get_phrase (rhs.first))
568 		return true;
569 	else if (m_lib->get_phrase (lhs.first) == m_lib->get_phrase (rhs.first)) {
570 		for (uint32 i=0; i<m_lib->get_phrase (lhs.first).length (); i++) {
571 			if (m_less (m_lib->get_pinyin_key (lhs.second + i),
572 						m_lib->get_pinyin_key (rhs.second + i)))
573 				return true;
574 			else if (m_less (m_lib->get_pinyin_key (rhs.second + i),
575 							 m_lib->get_pinyin_key (lhs.second + i)))
576 				return false;
577 		}
578 	}
579 	return false;
580 }
581 
582 inline bool
operator()583 PinyinPhraseEqualToByOffset::operator () (const std::pair <uint32, uint32> & lhs,
584 										  const std::pair <uint32, uint32> & rhs) const
585 {
586 	if (lhs.first == rhs.first && lhs.second == rhs.second)
587 		return true;
588 	else if (m_lib->get_phrase (lhs.first) != m_lib->get_phrase (rhs.first))
589 		return false;
590 	else {
591 		for (uint32 i=0; i<m_lib->get_phrase (lhs.first).length (); i++)
592 			if (!m_equal (m_lib->get_pinyin_key (lhs.second + i),
593 						  m_lib->get_pinyin_key (rhs.second + i)))
594 				return false;
595 	}
596 	return true;
597 }
598 
599 inline bool
operator()600 PinyinPhrasePinyinLessThanByOffset::operator () (const std::pair <uint32, uint32> & lhs,
601 												 const std::pair <uint32, uint32> & rhs) const
602 {
603 	for (uint32 i=0; i<m_lib->get_phrase (lhs.first).length (); i++) {
604 		if (m_less (m_lib->get_pinyin_key (lhs.second + i),
605 					m_lib->get_pinyin_key (rhs.second + i)))
606 			return true;
607 		else if (m_less (m_lib->get_pinyin_key (rhs.second + i),
608 						 m_lib->get_pinyin_key (lhs.second + i)))
609 			return false;
610 	}
611 
612 	return m_lib->get_phrase (lhs.first) < m_lib->get_phrase (rhs.first);
613 }
614 
615 inline bool
operator()616 PinyinPhrasePhraseLessThanByOffset::operator () (const std::pair <uint32, uint32> & lhs,
617 												 const std::pair <uint32, uint32> & rhs) const
618 {
619 	return m_lib->get_phrase (lhs.first) < m_lib->get_phrase (rhs.first);
620 }
621 
622 //Implementation of some PinyinPhraseLib members.
623 template<class T> void
for_each_phrase(T & op)624 PinyinPhraseLib::for_each_phrase (T &op)
625 {
626 	for (uint32 i=0; i<SCIM_PHRASE_MAX_LENGTH; i++)
627 		for_each_phrase_level_two (m_phrases[i].begin(), m_phrases[i].end(), op);
628 }
629 
630 template<class T> void
for_each_phrase_level_one(uint32 len,T & op)631 PinyinPhraseLib::for_each_phrase_level_one (uint32 len, T &op)
632 {
633 	if (len > 0 && len <= SCIM_PHRASE_MAX_LENGTH)
634 		for_each_phrase_level_two (m_phrases[len-1].begin(), m_phrases[len-1].end(), op);
635 }
636 
637 template<class T> void
for_each_phrase_level_two(const PinyinPhraseTable::iterator & begin,const PinyinPhraseTable::iterator & end,T & op)638 PinyinPhraseLib::for_each_phrase_level_two (const PinyinPhraseTable::iterator &begin,
639 											const PinyinPhraseTable::iterator &end,
640 											T &op)
641 {
642 	for (PinyinPhraseTable::iterator i=begin; i!=end; i++)
643 		for_each_phrase_level_three (
644 						i->get_vector ().begin(),
645 						i->get_vector ().end(),
646 						op);
647 }
648 
649 template<class T> void
for_each_phrase_level_three(const PinyinPhraseOffsetVector::iterator & begin,const PinyinPhraseOffsetVector::iterator & end,T & op)650 PinyinPhraseLib::for_each_phrase_level_three (const PinyinPhraseOffsetVector::iterator &begin,
651 												  const PinyinPhraseOffsetVector::iterator &end,
652 												  T &op)
653 {
654 	for (PinyinPhraseOffsetVector::iterator i=begin; i!=end; i++)
655 		if (valid_pinyin_phrase (i->first, i->second))
656 			op (PinyinPhrase (this, i->first, i->second));
657 }
658 
659 
660 // Implementation of some PinyinPhrase memebers.
661 inline bool
valid()662 PinyinPhrase::valid () const
663 {
664 	return m_lib != NULL &&
665 			m_lib->valid_pinyin_phrase (m_phrase_offset, m_pinyin_offset);
666 }
667 
668 inline PinyinKey
get_key(uint32 index)669 PinyinPhrase::get_key (uint32 index) const
670 {
671 	if (valid () && index < length ())
672 		return m_lib->get_pinyin_key (m_pinyin_offset + index);
673 	return PinyinKey ();
674 }
675 
676 inline Phrase
get_phrase()677 PinyinPhrase::get_phrase () const
678 {
679 	if (m_lib != NULL)
680 		return m_lib->get_phrase (m_phrase_offset);
681 	return Phrase ();
682 }
683 #endif
684 /*
685 vi:ts=4:nowrap:ai
686 */
687