1 /*
2 * libpinyin
3 * Library to deal with pinyin.
4 *
5 * Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <assert.h>
22 #include <string.h>
23 #include "phrase_large_table2.h"
24
25
26 /* class definition */
27
28 namespace pinyin{
29
30 class PhraseLengthIndexLevel2{
31 protected:
32 GArray * m_phrase_array_indexes;
33 public:
34 PhraseLengthIndexLevel2();
35 ~PhraseLengthIndexLevel2();
36
37 /* load/store method */
38 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
39 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
40
41 /* search method */
42 int search(int phrase_length, /* in */ const ucs4_t phrase[],
43 /* out */ PhraseTokens tokens) const;
44
45 /* add_index/remove_index method */
46 int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
47 /* in */ phrase_token_t token);
48 int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
49 /* in */ phrase_token_t token);
50
51 /* get length method */
52 int get_length() const;
53
54 /* mask out method */
55 bool mask_out(phrase_token_t mask, phrase_token_t value);
56 };
57
58
59 template<size_t phrase_length>
60 struct PhraseIndexItem2{
61 phrase_token_t m_token;
62 ucs4_t m_phrase[phrase_length];
63 public:
64 PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
65 memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
66 m_token = token;
67 }
68 };
69
70
71 template<size_t phrase_length>
72 class PhraseArrayIndexLevel2{
73 protected:
74 typedef PhraseIndexItem2<phrase_length> IndexItem;
75
76 protected:
77 MemoryChunk m_chunk;
78 public:
79 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
80 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
81
82 /* search method */
83 int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
84
85 /* add_index/remove_index method */
86 int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
87 int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
88
89 /* get length method */
90 int get_length() const;
91
92 /* mask out method */
93 bool mask_out(phrase_token_t mask, phrase_token_t value);
94 };
95
96 };
97
98 using namespace pinyin;
99
100 /* class implementation */
101
102 template<size_t phrase_length>
phrase_compare2(const PhraseIndexItem2<phrase_length> & lhs,const PhraseIndexItem2<phrase_length> & rhs)103 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
104 const PhraseIndexItem2<phrase_length> &rhs){
105 ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
106 ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
107
108 return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
109 }
110
111 template<size_t phrase_length>
phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,const PhraseIndexItem2<phrase_length> & rhs)112 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
113 const PhraseIndexItem2<phrase_length> & rhs){
114 return 0 > phrase_compare2(lhs, rhs);
115 }
116
PhraseBitmapIndexLevel2()117 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
118 memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
119 }
120
reset()121 void PhraseBitmapIndexLevel2::reset(){
122 for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
123 PhraseLengthIndexLevel2 * & length_array =
124 m_phrase_length_indexes[i];
125 if ( length_array )
126 delete length_array;
127 length_array = NULL;
128 }
129 }
130
131
132 /* search method */
133
search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens) const134 int PhraseBitmapIndexLevel2::search(int phrase_length,
135 /* in */ const ucs4_t phrase[],
136 /* out */ PhraseTokens tokens) const {
137 assert(phrase_length > 0);
138
139 int result = SEARCH_NONE;
140 /* use the first 8-bit of the lower 16-bit for bitmap index,
141 * as most the higher 16-bit are zero.
142 */
143 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
144
145 PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
146 if ( phrase_array )
147 return phrase_array->search(phrase_length, phrase, tokens);
148 return result;
149 }
150
PhraseLengthIndexLevel2()151 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
152 m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
153 }
154
~PhraseLengthIndexLevel2()155 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
156 #define CASE(len) case len: \
157 { \
158 PhraseArrayIndexLevel2<len> * & array = g_array_index \
159 (m_phrase_array_indexes, \
160 PhraseArrayIndexLevel2<len> *, len - 1); \
161 if ( array ) { \
162 delete array; \
163 array = NULL; \
164 } \
165 break; \
166 }
167
168 for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
169 switch (i){
170 CASE(1);
171 CASE(2);
172 CASE(3);
173 CASE(4);
174 CASE(5);
175 CASE(6);
176 CASE(7);
177 CASE(8);
178 CASE(9);
179 CASE(10);
180 CASE(11);
181 CASE(12);
182 CASE(13);
183 CASE(14);
184 CASE(15);
185 CASE(16);
186 default:
187 assert(false);
188 }
189 }
190 g_array_free(m_phrase_array_indexes, TRUE);
191 #undef CASE
192 }
193
search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens) const194 int PhraseLengthIndexLevel2::search(int phrase_length,
195 /* in */ const ucs4_t phrase[],
196 /* out */ PhraseTokens tokens) const {
197 int result = SEARCH_NONE;
198 if ((int) m_phrase_array_indexes->len < phrase_length)
199 return result;
200 if ((int) m_phrase_array_indexes->len > phrase_length)
201 result |= SEARCH_CONTINUED;
202
203 #define CASE(len) case len: \
204 { \
205 PhraseArrayIndexLevel2<len> * array = g_array_index \
206 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
207 if ( !array ) \
208 return result; \
209 result |= array->search(phrase, tokens); \
210 return result; \
211 }
212
213 switch ( phrase_length ){
214 CASE(1);
215 CASE(2);
216 CASE(3);
217 CASE(4);
218 CASE(5);
219 CASE(6);
220 CASE(7);
221 CASE(8);
222 CASE(9);
223 CASE(10);
224 CASE(11);
225 CASE(12);
226 CASE(13);
227 CASE(14);
228 CASE(15);
229 CASE(16);
230 default:
231 assert(false);
232 }
233 #undef CASE
234 }
235
236 template<size_t phrase_length>
search(const ucs4_t phrase[],PhraseTokens tokens) const237 int PhraseArrayIndexLevel2<phrase_length>::search
238 (/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
239 int result = SEARCH_NONE;
240
241 IndexItem * chunk_begin = NULL, * chunk_end = NULL;
242 chunk_begin = (IndexItem *) m_chunk.begin();
243 chunk_end = (IndexItem *) m_chunk.end();
244
245 /* do the search */
246 IndexItem search_elem(phrase, -1);
247 std_lite::pair<IndexItem *, IndexItem *> range;
248 range = std_lite::equal_range
249 (chunk_begin, chunk_end, search_elem,
250 phrase_less_than2<phrase_length>);
251
252 const IndexItem * const begin = range.first;
253 const IndexItem * const end = range.second;
254 if (begin == end)
255 return result;
256
257 const IndexItem * iter = NULL;
258 GArray * array = NULL;
259
260 for (iter = begin; iter != end; ++iter) {
261 phrase_token_t token = iter->m_token;
262
263 /* filter out disabled sub phrase indices. */
264 array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
265 if (NULL == array)
266 continue;
267
268 result |= SEARCH_OK;
269
270 g_array_append_val(array, token);
271 }
272
273 return result;
274 }
275
276
277 /* add/remove index method */
278
add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)279 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
280 /* in */ const ucs4_t phrase[],
281 /* in */ phrase_token_t token){
282 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
283
284 PhraseLengthIndexLevel2 * & length_array =
285 m_phrase_length_indexes[first_key];
286
287 if ( !length_array ){
288 length_array = new PhraseLengthIndexLevel2();
289 }
290 return length_array->add_index(phrase_length, phrase, token);
291 }
292
remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)293 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
294 /* in */ const ucs4_t phrase[],
295 /* in */ phrase_token_t token){
296 guint8 first_key = (phrase[0] & 0xFF00) >> 8;
297
298 PhraseLengthIndexLevel2 * & length_array =
299 m_phrase_length_indexes[first_key];
300
301 if (NULL == length_array)
302 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
303
304 int retval = length_array->remove_index(phrase_length, phrase, token);
305
306 /* remove empty array. */
307 if (0 == length_array->get_length()) {
308 delete length_array;
309 length_array = NULL;
310 }
311
312 return retval;
313 }
314
add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)315 int PhraseLengthIndexLevel2::add_index(int phrase_length,
316 /* in */ const ucs4_t phrase[],
317 /* in */ phrase_token_t token) {
318 if (phrase_length >= MAX_PHRASE_LENGTH)
319 return ERROR_PHRASE_TOO_LONG;
320
321 if ((int) m_phrase_array_indexes->len < phrase_length)
322 g_array_set_size(m_phrase_array_indexes, phrase_length);
323
324 #define CASE(len) case len: \
325 { \
326 PhraseArrayIndexLevel2<len> * & array = g_array_index \
327 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
328 if ( !array ) \
329 array = new PhraseArrayIndexLevel2<len>; \
330 return array->add_index(phrase, token); \
331 }
332
333 switch(phrase_length){
334 CASE(1);
335 CASE(2);
336 CASE(3);
337 CASE(4);
338 CASE(5);
339 CASE(6);
340 CASE(7);
341 CASE(8);
342 CASE(9);
343 CASE(10);
344 CASE(11);
345 CASE(12);
346 CASE(13);
347 CASE(14);
348 CASE(15);
349 CASE(16);
350 default:
351 assert(false);
352 }
353
354 #undef CASE
355 }
356
remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)357 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
358 /* in */ const ucs4_t phrase[],
359 /* in */ phrase_token_t token) {
360 if (phrase_length >= MAX_PHRASE_LENGTH)
361 return ERROR_PHRASE_TOO_LONG;
362
363 if ((int) m_phrase_array_indexes->len < phrase_length)
364 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
365
366 #define CASE(len) case len: \
367 { \
368 PhraseArrayIndexLevel2<len> * & array = g_array_index \
369 (m_phrase_array_indexes, \
370 PhraseArrayIndexLevel2<len> *, len - 1); \
371 if (NULL == array) \
372 return ERROR_REMOVE_ITEM_DONOT_EXISTS; \
373 int retval = array->remove_index(phrase, token); \
374 \
375 /* remove empty array. */ \
376 if (0 == array->get_length()) { \
377 delete array; \
378 array = NULL; \
379 \
380 /* shrink self array. */ \
381 g_array_set_size(m_phrase_array_indexes, \
382 get_length()); \
383 } \
384 return retval; \
385 }
386
387 switch(phrase_length){
388 CASE(1);
389 CASE(2);
390 CASE(3);
391 CASE(4);
392 CASE(5);
393 CASE(6);
394 CASE(7);
395 CASE(8);
396 CASE(9);
397 CASE(10);
398 CASE(11);
399 CASE(12);
400 CASE(13);
401 CASE(14);
402 CASE(15);
403 CASE(16);
404 default:
405 assert(false);
406 }
407 #undef CASE
408 }
409
410 template<size_t phrase_length>
add_index(const ucs4_t phrase[],phrase_token_t token)411 int PhraseArrayIndexLevel2<phrase_length>::add_index
412 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
413 IndexItem * begin, * end;
414
415 IndexItem add_elem(phrase, token);
416 begin = (IndexItem *) m_chunk.begin();
417 end = (IndexItem *) m_chunk.end();
418
419 std_lite::pair<IndexItem *, IndexItem *> range;
420 range = std_lite::equal_range
421 (begin, end, add_elem, phrase_less_than2<phrase_length>);
422
423 IndexItem * cur_elem;
424 for (cur_elem = range.first;
425 cur_elem != range.second; ++cur_elem) {
426 if (cur_elem->m_token == token)
427 return ERROR_INSERT_ITEM_EXISTS;
428 if (cur_elem->m_token > token)
429 break;
430 }
431
432 int offset = (cur_elem - begin) * sizeof(IndexItem);
433 m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
434 return ERROR_OK;
435 }
436
437 template<size_t phrase_length>
remove_index(const ucs4_t phrase[],phrase_token_t token)438 int PhraseArrayIndexLevel2<phrase_length>::remove_index
439 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
440 IndexItem * begin, * end;
441
442 IndexItem remove_elem(phrase, token);
443 begin = (IndexItem *) m_chunk.begin();
444 end = (IndexItem *) m_chunk.end();
445
446 std_lite::pair<IndexItem *, IndexItem *> range;
447 range = std_lite::equal_range
448 (begin, end, remove_elem, phrase_less_than2<phrase_length>);
449
450 IndexItem * cur_elem;
451 for (cur_elem = range.first;
452 cur_elem != range.second; ++cur_elem) {
453 if (cur_elem->m_token == token)
454 break;
455 }
456
457 if (cur_elem == range.second)
458 return ERROR_REMOVE_ITEM_DONOT_EXISTS;
459
460 int offset = (cur_elem - begin) * sizeof(IndexItem);
461 m_chunk.remove_content(offset, sizeof(IndexItem));
462 return ERROR_OK;
463 }
464
465
466 /* load text method */
467
load_text(FILE * infile)468 bool PhraseLargeTable2::load_text(FILE * infile){
469 char pinyin[256];
470 char phrase[256];
471 phrase_token_t token;
472 size_t freq;
473
474 while (!feof(infile)) {
475 int num = fscanf(infile, "%255s %255s %u %ld",
476 pinyin, phrase, &token, &freq);
477
478 if (4 != num)
479 continue;
480
481 if (feof(infile))
482 break;
483
484 glong phrase_len = g_utf8_strlen(phrase, -1);
485 ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
486 add_index(phrase_len, new_phrase, token);
487
488 g_free(new_phrase);
489 }
490 return true;
491 }
492
493
494 /* load/store method */
495
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)496 bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
497 table_offset_t offset,
498 table_offset_t end){
499 reset();
500 char * buf_begin = (char *) chunk->begin();
501 table_offset_t phrase_begin, phrase_end;
502 table_offset_t * index = (table_offset_t *) (buf_begin + offset);
503 phrase_end = *index;
504
505 for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
506 phrase_begin = phrase_end;
507 index++;
508 phrase_end = *index;
509 if ( phrase_begin == phrase_end ) //null pointer
510 continue;
511
512 /* after reset() all phrases are null pointer. */
513 PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
514 m_phrase_length_indexes[i] = phrases;
515
516 phrases->load(chunk, phrase_begin, phrase_end - 1);
517 assert( phrase_end <= end );
518 assert( *(buf_begin + phrase_end - 1) == c_separate);
519 }
520 offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
521 assert( c_separate == *(buf_begin + offset) );
522 return true;
523 }
524
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)525 bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
526 table_offset_t offset,
527 table_offset_t & end){
528 table_offset_t phrase_end;
529 table_offset_t index = offset;
530 offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
531 //add '#'
532 new_chunk->set_content(offset, &c_separate, sizeof(char));
533 offset +=sizeof(char);
534 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
535 index += sizeof(table_offset_t);
536 for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
537 PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
538 if ( !phrases ) { //null pointer
539 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
540 index += sizeof(table_offset_t);
541 continue;
542 }
543 phrases->store(new_chunk, offset, phrase_end); //has a end '#'
544 offset = phrase_end;
545 //add '#'
546 new_chunk->set_content(offset, &c_separate, sizeof(char));
547 offset += sizeof(char);
548 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
549 index += sizeof(table_offset_t);
550 }
551 end = offset;
552 return true;
553 }
554
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)555 bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
556 table_offset_t offset,
557 table_offset_t end) {
558 char * buf_begin = (char *) chunk->begin();
559 guint32 nindex = *((guint32 *)(buf_begin + offset));
560 table_offset_t * index = (table_offset_t *)
561 (buf_begin + offset + sizeof(guint32));
562
563 table_offset_t phrase_begin, phrase_end = *index;
564 g_array_set_size(m_phrase_array_indexes, 0);
565 for (size_t i = 1; i <= nindex; ++i) {
566 phrase_begin = phrase_end;
567 index++;
568 phrase_end = *index;
569 if ( phrase_begin == phrase_end ){
570 void * null = NULL;
571 g_array_append_val(m_phrase_array_indexes, null);
572 continue;
573 }
574
575 #define CASE(len) case len: \
576 { \
577 PhraseArrayIndexLevel2<len> * phrase = \
578 new PhraseArrayIndexLevel2<len>; \
579 phrase->load(chunk, phrase_begin, phrase_end - 1); \
580 assert( *(buf_begin + phrase_end - 1) == c_separate ); \
581 assert( phrase_end <= end ); \
582 g_array_append_val(m_phrase_array_indexes, phrase); \
583 break; \
584 }
585 switch ( i ){
586 CASE(1);
587 CASE(2);
588 CASE(3);
589 CASE(4);
590 CASE(5);
591 CASE(6);
592 CASE(7);
593 CASE(8);
594 CASE(9);
595 CASE(10);
596 CASE(11);
597 CASE(12);
598 CASE(13);
599 CASE(14);
600 CASE(15);
601 CASE(16);
602 default:
603 assert(false);
604 }
605 #undef CASE
606 }
607 offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
608 assert ( c_separate == * (buf_begin + offset) );
609 return true;
610 }
611
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)612 bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
613 table_offset_t offset,
614 table_offset_t & end) {
615 guint32 nindex = m_phrase_array_indexes->len;
616 new_chunk->set_content(offset, &nindex, sizeof(guint32));
617 table_offset_t index = offset + sizeof(guint32);
618
619 offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
620 new_chunk->set_content(offset, &c_separate, sizeof(char));
621 offset += sizeof(char);
622 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
623 index += sizeof(table_offset_t);
624
625 table_offset_t phrase_end;
626 for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
627 #define CASE(len) case len: \
628 { \
629 PhraseArrayIndexLevel2<len> * phrase = g_array_index \
630 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
631 if ( !phrase ){ \
632 new_chunk->set_content \
633 (index, &offset, sizeof(table_offset_t)); \
634 index += sizeof(table_offset_t); \
635 continue; \
636 } \
637 phrase->store(new_chunk, offset, phrase_end); \
638 offset = phrase_end; \
639 break; \
640 }
641 switch ( i ){
642 CASE(1);
643 CASE(2);
644 CASE(3);
645 CASE(4);
646 CASE(5);
647 CASE(6);
648 CASE(7);
649 CASE(8);
650 CASE(9);
651 CASE(10);
652 CASE(11);
653 CASE(12);
654 CASE(13);
655 CASE(14);
656 CASE(15);
657 CASE(16);
658 default:
659 assert(false);
660 }
661 //add '#'
662 new_chunk->set_content(offset, &c_separate, sizeof(char));
663 offset += sizeof(char);
664 new_chunk->set_content(index, &offset, sizeof(table_offset_t));
665 index += sizeof(table_offset_t);
666
667 #undef CASE
668 }
669 end = offset;
670 return true;
671 }
672
673 template<size_t phrase_length>
674 bool PhraseArrayIndexLevel2<phrase_length>::
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)675 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
676 char * buf_begin = (char *) chunk->begin();
677 m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
678 return true;
679 }
680
681 template<size_t phrase_length>
682 bool PhraseArrayIndexLevel2<phrase_length>::
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)683 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
684 new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
685 end = offset + m_chunk.size();
686 return true;
687 }
688
689
690 /* get length method */
691
get_length() const692 int PhraseLengthIndexLevel2::get_length() const {
693 int length = m_phrase_array_indexes->len;
694
695 /* trim trailing zero. */
696 for (int i = length - 1; i >= 0; --i) {
697 void * array = g_array_index(m_phrase_array_indexes, void *, i);
698
699 if (NULL != array)
700 break;
701
702 --length;
703 }
704
705 return length;
706 }
707
708 template<size_t phrase_length>
get_length() const709 int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
710 IndexItem * chunk_begin = NULL, * chunk_end = NULL;
711 chunk_begin = (IndexItem *) m_chunk.begin();
712 chunk_end = (IndexItem *) m_chunk.end();
713
714 return chunk_end - chunk_begin;
715 }
716
717
718 /* mask out method */
719
mask_out(phrase_token_t mask,phrase_token_t value)720 bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
721 phrase_token_t value){
722 for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
723 PhraseLengthIndexLevel2 * & length_array =
724 m_phrase_length_indexes[i];
725
726 if (NULL == length_array)
727 continue;
728
729 length_array->mask_out(mask, value);
730
731 if (0 == length_array->get_length()) {
732 delete length_array;
733 length_array = NULL;
734 }
735 }
736
737 return true;
738 }
739
mask_out(phrase_token_t mask,phrase_token_t value)740 bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
741 phrase_token_t value){
742 #define CASE(len) case len: \
743 { \
744 PhraseArrayIndexLevel2<len> * & array = g_array_index \
745 (m_phrase_array_indexes, \
746 PhraseArrayIndexLevel2<len> *, len - 1); \
747 \
748 if (NULL == array) \
749 continue; \
750 \
751 array->mask_out(mask, value); \
752 \
753 if (0 == array->get_length()) { \
754 delete array; \
755 array = NULL; \
756 } \
757 break; \
758 }
759
760 for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
761 switch (i) {
762 CASE(1);
763 CASE(2);
764 CASE(3);
765 CASE(4);
766 CASE(5);
767 CASE(6);
768 CASE(7);
769 CASE(8);
770 CASE(9);
771 CASE(10);
772 CASE(11);
773 CASE(12);
774 CASE(13);
775 CASE(14);
776 CASE(15);
777 CASE(16);
778 default:
779 assert(false);
780 }
781 }
782 /* shrink self array. */
783 g_array_set_size(m_phrase_array_indexes, get_length());
784 #undef CASE
785 return true;
786 }
787
788 template<size_t phrase_length>
mask_out(phrase_token_t mask,phrase_token_t value)789 bool PhraseArrayIndexLevel2<phrase_length>::mask_out
790 (phrase_token_t mask, phrase_token_t value) {
791 IndexItem * begin = NULL, * end = NULL;
792 begin = (IndexItem *) m_chunk.begin();
793 end = (IndexItem *) m_chunk.end();
794
795 for (IndexItem * cur = begin; cur != end; ++cur) {
796 if ((cur->m_token & mask) != value)
797 continue;
798
799 int offset = (cur - begin) * sizeof(IndexItem);
800 m_chunk.remove_content(offset, sizeof(IndexItem));
801
802 /* update chunk end. */
803 end = (IndexItem *) m_chunk.end();
804 --cur;
805 }
806
807 return true;
808 }
809