1 /*
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *
7  *  This program is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <assert.h>
22 #include <string.h>
23 #include "phrase_large_table2.h"
24 
25 
26 /* class definition */
27 
28 namespace pinyin{
29 
30 class PhraseLengthIndexLevel2{
31 protected:
32     GArray * m_phrase_array_indexes;
33 public:
34     PhraseLengthIndexLevel2();
35     ~PhraseLengthIndexLevel2();
36 
37     /* load/store method */
38     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
39     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
40 
41     /* search method */
42     int search(int phrase_length, /* in */ const ucs4_t phrase[],
43                /* out */ PhraseTokens tokens) const;
44 
45     /* add_index/remove_index method */
46     int add_index(int phrase_length, /* in */ const ucs4_t phrase[],
47                   /* in */ phrase_token_t token);
48     int remove_index(int phrase_length, /* in */ const ucs4_t phrase[],
49                      /* in */ phrase_token_t token);
50 
51     /* get length method */
52     int get_length() const;
53 
54     /* mask out method */
55     bool mask_out(phrase_token_t mask, phrase_token_t value);
56 };
57 
58 
59 template<size_t phrase_length>
60 struct PhraseIndexItem2{
61     phrase_token_t m_token;
62     ucs4_t m_phrase[phrase_length];
63 public:
64     PhraseIndexItem2<phrase_length>(const ucs4_t phrase[], phrase_token_t token){
65         memmove(m_phrase, phrase, sizeof(ucs4_t) * phrase_length);
66         m_token = token;
67     }
68 };
69 
70 
71 template<size_t phrase_length>
72 class PhraseArrayIndexLevel2{
73 protected:
74     typedef PhraseIndexItem2<phrase_length> IndexItem;
75 
76 protected:
77     MemoryChunk m_chunk;
78 public:
79     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
80     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
81 
82     /* search method */
83     int search(/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const;
84 
85     /* add_index/remove_index method */
86     int add_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
87     int remove_index(/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
88 
89     /* get length method */
90     int get_length() const;
91 
92     /* mask out method */
93     bool mask_out(phrase_token_t mask, phrase_token_t value);
94 };
95 
96 };
97 
98 using namespace pinyin;
99 
100 /* class implementation */
101 
102 template<size_t phrase_length>
phrase_compare2(const PhraseIndexItem2<phrase_length> & lhs,const PhraseIndexItem2<phrase_length> & rhs)103 static int phrase_compare2(const PhraseIndexItem2<phrase_length> &lhs,
104                            const PhraseIndexItem2<phrase_length> &rhs){
105     ucs4_t * phrase_lhs = (ucs4_t *) lhs.m_phrase;
106     ucs4_t * phrase_rhs = (ucs4_t *) rhs.m_phrase;
107 
108     return memcmp(phrase_lhs, phrase_rhs, sizeof(ucs4_t) * phrase_length);
109 }
110 
111 template<size_t phrase_length>
phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,const PhraseIndexItem2<phrase_length> & rhs)112 static bool phrase_less_than2(const PhraseIndexItem2<phrase_length> & lhs,
113                               const PhraseIndexItem2<phrase_length> & rhs){
114     return 0 > phrase_compare2(lhs, rhs);
115 }
116 
PhraseBitmapIndexLevel2()117 PhraseBitmapIndexLevel2::PhraseBitmapIndexLevel2(){
118     memset(m_phrase_length_indexes, 0, sizeof(m_phrase_length_indexes));
119 }
120 
reset()121 void PhraseBitmapIndexLevel2::reset(){
122     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; i++){
123         PhraseLengthIndexLevel2 * & length_array =
124             m_phrase_length_indexes[i];
125         if ( length_array )
126             delete length_array;
127         length_array = NULL;
128     }
129 }
130 
131 
132 /* search method */
133 
search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens) const134 int PhraseBitmapIndexLevel2::search(int phrase_length,
135                                     /* in */ const ucs4_t phrase[],
136                                     /* out */ PhraseTokens tokens) const {
137     assert(phrase_length > 0);
138 
139     int result = SEARCH_NONE;
140     /* use the first 8-bit of the lower 16-bit for bitmap index,
141      * as most the higher 16-bit are zero.
142      */
143     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
144 
145     PhraseLengthIndexLevel2 * phrase_array = m_phrase_length_indexes[first_key];
146     if ( phrase_array )
147         return phrase_array->search(phrase_length, phrase, tokens);
148     return result;
149 }
150 
PhraseLengthIndexLevel2()151 PhraseLengthIndexLevel2::PhraseLengthIndexLevel2(){
152     m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
153 }
154 
~PhraseLengthIndexLevel2()155 PhraseLengthIndexLevel2::~PhraseLengthIndexLevel2(){
156 #define CASE(len) case len:                                             \
157     {                                                                   \
158         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
159             (m_phrase_array_indexes,                                    \
160              PhraseArrayIndexLevel2<len> *, len - 1);                   \
161         if ( array ) {                                                  \
162             delete array;                                               \
163             array = NULL;                                               \
164         }                                                               \
165         break;                                                          \
166     }
167 
168     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i){
169         switch (i){
170 	    CASE(1);
171 	    CASE(2);
172 	    CASE(3);
173 	    CASE(4);
174 	    CASE(5);
175 	    CASE(6);
176 	    CASE(7);
177 	    CASE(8);
178 	    CASE(9);
179 	    CASE(10);
180 	    CASE(11);
181 	    CASE(12);
182 	    CASE(13);
183 	    CASE(14);
184 	    CASE(15);
185 	    CASE(16);
186 	default:
187 	    assert(false);
188         }
189     }
190     g_array_free(m_phrase_array_indexes, TRUE);
191 #undef CASE
192 }
193 
search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens) const194 int PhraseLengthIndexLevel2::search(int phrase_length,
195                                     /* in */ const ucs4_t phrase[],
196                                     /* out */ PhraseTokens tokens) const {
197     int result = SEARCH_NONE;
198     if ((int) m_phrase_array_indexes->len < phrase_length)
199         return result;
200     if ((int) m_phrase_array_indexes->len > phrase_length)
201         result |= SEARCH_CONTINUED;
202 
203 #define CASE(len) case len:                                             \
204     {                                                                   \
205         PhraseArrayIndexLevel2<len> * array = g_array_index             \
206             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
207         if ( !array )                                                   \
208             return result;                                              \
209         result |= array->search(phrase, tokens);                        \
210         return result;                                                  \
211     }
212 
213     switch ( phrase_length ){
214 	CASE(1);
215 	CASE(2);
216 	CASE(3);
217 	CASE(4);
218 	CASE(5);
219 	CASE(6);
220 	CASE(7);
221 	CASE(8);
222 	CASE(9);
223 	CASE(10);
224 	CASE(11);
225 	CASE(12);
226 	CASE(13);
227 	CASE(14);
228 	CASE(15);
229 	CASE(16);
230     default:
231 	assert(false);
232     }
233 #undef CASE
234 }
235 
236 template<size_t phrase_length>
search(const ucs4_t phrase[],PhraseTokens tokens) const237 int PhraseArrayIndexLevel2<phrase_length>::search
238 (/* in */ const ucs4_t phrase[], /* out */ PhraseTokens tokens) const {
239     int result = SEARCH_NONE;
240 
241     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
242     chunk_begin = (IndexItem *) m_chunk.begin();
243     chunk_end = (IndexItem *) m_chunk.end();
244 
245     /* do the search */
246     IndexItem search_elem(phrase, -1);
247     std_lite::pair<IndexItem *, IndexItem *> range;
248     range = std_lite::equal_range
249         (chunk_begin, chunk_end, search_elem,
250          phrase_less_than2<phrase_length>);
251 
252     const IndexItem * const begin = range.first;
253     const IndexItem * const end = range.second;
254     if (begin == end)
255         return result;
256 
257     const IndexItem * iter = NULL;
258     GArray * array = NULL;
259 
260     for (iter = begin; iter != end; ++iter) {
261         phrase_token_t token = iter->m_token;
262 
263         /* filter out disabled sub phrase indices. */
264         array = tokens[PHRASE_INDEX_LIBRARY_INDEX(token)];
265         if (NULL == array)
266             continue;
267 
268         result |= SEARCH_OK;
269 
270         g_array_append_val(array, token);
271     }
272 
273     return result;
274 }
275 
276 
277 /* add/remove index method */
278 
add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)279 int PhraseBitmapIndexLevel2::add_index(int phrase_length,
280                                        /* in */ const ucs4_t phrase[],
281                                        /* in */ phrase_token_t token){
282     guint8 first_key =  (phrase[0] & 0xFF00) >> 8;
283 
284     PhraseLengthIndexLevel2 * & length_array =
285         m_phrase_length_indexes[first_key];
286 
287     if ( !length_array ){
288         length_array = new PhraseLengthIndexLevel2();
289     }
290     return length_array->add_index(phrase_length, phrase, token);
291 }
292 
remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)293 int PhraseBitmapIndexLevel2::remove_index(int phrase_length,
294                                          /* in */ const ucs4_t phrase[],
295                                          /* in */ phrase_token_t token){
296     guint8 first_key = (phrase[0] & 0xFF00) >> 8;
297 
298     PhraseLengthIndexLevel2 * & length_array =
299         m_phrase_length_indexes[first_key];
300 
301     if (NULL == length_array)
302         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
303 
304     int retval = length_array->remove_index(phrase_length, phrase, token);
305 
306     /* remove empty array. */
307     if (0 == length_array->get_length()) {
308         delete length_array;
309         length_array = NULL;
310     }
311 
312     return retval;
313 }
314 
add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)315 int PhraseLengthIndexLevel2::add_index(int phrase_length,
316                                        /* in */ const ucs4_t phrase[],
317                                        /* in */ phrase_token_t token) {
318     if (phrase_length >= MAX_PHRASE_LENGTH)
319         return ERROR_PHRASE_TOO_LONG;
320 
321     if ((int) m_phrase_array_indexes->len < phrase_length)
322         g_array_set_size(m_phrase_array_indexes, phrase_length);
323 
324 #define CASE(len) case len:                                             \
325     {                                                                   \
326         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
327             (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
328         if ( !array )                                                   \
329             array = new PhraseArrayIndexLevel2<len>;                    \
330         return array->add_index(phrase, token);                         \
331     }
332 
333     switch(phrase_length){
334 	CASE(1);
335 	CASE(2);
336 	CASE(3);
337 	CASE(4);
338 	CASE(5);
339 	CASE(6);
340 	CASE(7);
341 	CASE(8);
342 	CASE(9);
343 	CASE(10);
344 	CASE(11);
345 	CASE(12);
346 	CASE(13);
347 	CASE(14);
348 	CASE(15);
349         CASE(16);
350     default:
351 	assert(false);
352     }
353 
354 #undef CASE
355 }
356 
remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)357 int PhraseLengthIndexLevel2::remove_index(int phrase_length,
358                                           /* in */ const ucs4_t phrase[],
359                                           /* in */ phrase_token_t token) {
360     if (phrase_length >= MAX_PHRASE_LENGTH)
361         return ERROR_PHRASE_TOO_LONG;
362 
363     if ((int) m_phrase_array_indexes->len < phrase_length)
364         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
365 
366 #define CASE(len) case len:                                             \
367     {                                                                   \
368         PhraseArrayIndexLevel2<len> * & array = g_array_index           \
369             (m_phrase_array_indexes,                                    \
370              PhraseArrayIndexLevel2<len> *, len - 1);                   \
371         if (NULL == array)                                              \
372             return ERROR_REMOVE_ITEM_DONOT_EXISTS;                      \
373         int retval = array->remove_index(phrase, token);                \
374                                                                         \
375         /* remove empty array. */                                       \
376         if (0 == array->get_length()) {                                 \
377             delete array;                                               \
378             array = NULL;                                               \
379                                                                         \
380             /* shrink self array. */                                    \
381             g_array_set_size(m_phrase_array_indexes,                    \
382                              get_length());                             \
383         }                                                               \
384         return retval;                                                  \
385     }
386 
387     switch(phrase_length){
388 	CASE(1);
389 	CASE(2);
390 	CASE(3);
391 	CASE(4);
392 	CASE(5);
393 	CASE(6);
394 	CASE(7);
395 	CASE(8);
396 	CASE(9);
397 	CASE(10);
398 	CASE(11);
399 	CASE(12);
400 	CASE(13);
401 	CASE(14);
402 	CASE(15);
403 	CASE(16);
404     default:
405 	assert(false);
406     }
407 #undef CASE
408 }
409 
410 template<size_t phrase_length>
add_index(const ucs4_t phrase[],phrase_token_t token)411 int PhraseArrayIndexLevel2<phrase_length>::add_index
412 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token){
413     IndexItem * begin, * end;
414 
415     IndexItem add_elem(phrase, token);
416     begin = (IndexItem *) m_chunk.begin();
417     end   = (IndexItem *) m_chunk.end();
418 
419     std_lite::pair<IndexItem *, IndexItem *> range;
420     range = std_lite::equal_range
421         (begin, end, add_elem, phrase_less_than2<phrase_length>);
422 
423     IndexItem * cur_elem;
424     for (cur_elem = range.first;
425          cur_elem != range.second; ++cur_elem) {
426         if (cur_elem->m_token == token)
427             return ERROR_INSERT_ITEM_EXISTS;
428         if (cur_elem->m_token > token)
429             break;
430     }
431 
432     int offset = (cur_elem - begin) * sizeof(IndexItem);
433     m_chunk.insert_content(offset, &add_elem, sizeof(IndexItem));
434     return ERROR_OK;
435 }
436 
437 template<size_t phrase_length>
remove_index(const ucs4_t phrase[],phrase_token_t token)438 int PhraseArrayIndexLevel2<phrase_length>::remove_index
439 (/* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
440     IndexItem * begin, * end;
441 
442     IndexItem remove_elem(phrase, token);
443     begin = (IndexItem *) m_chunk.begin();
444     end   = (IndexItem *) m_chunk.end();
445 
446     std_lite::pair<IndexItem *, IndexItem *> range;
447     range = std_lite::equal_range
448         (begin, end, remove_elem, phrase_less_than2<phrase_length>);
449 
450     IndexItem * cur_elem;
451     for (cur_elem = range.first;
452          cur_elem != range.second; ++cur_elem) {
453         if (cur_elem->m_token == token)
454             break;
455     }
456 
457     if (cur_elem == range.second)
458         return ERROR_REMOVE_ITEM_DONOT_EXISTS;
459 
460     int offset = (cur_elem - begin) * sizeof(IndexItem);
461     m_chunk.remove_content(offset, sizeof(IndexItem));
462     return ERROR_OK;
463 }
464 
465 
466 /* load text method */
467 
load_text(FILE * infile)468 bool PhraseLargeTable2::load_text(FILE * infile){
469     char pinyin[256];
470     char phrase[256];
471     phrase_token_t token;
472     size_t freq;
473 
474     while (!feof(infile)) {
475         int num = fscanf(infile, "%255s %255s %u %ld",
476                          pinyin, phrase, &token, &freq);
477 
478         if (4 != num)
479             continue;
480 
481         if (feof(infile))
482             break;
483 
484         glong phrase_len = g_utf8_strlen(phrase, -1);
485         ucs4_t * new_phrase = g_utf8_to_ucs4(phrase, -1, NULL, NULL, NULL);
486         add_index(phrase_len, new_phrase, token);
487 
488         g_free(new_phrase);
489     }
490     return true;
491 }
492 
493 
494 /* load/store method */
495 
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)496 bool PhraseBitmapIndexLevel2::load(MemoryChunk * chunk,
497                                    table_offset_t offset,
498                                    table_offset_t end){
499     reset();
500     char * buf_begin = (char *) chunk->begin();
501     table_offset_t phrase_begin, phrase_end;
502     table_offset_t * index = (table_offset_t *) (buf_begin + offset);
503     phrase_end = *index;
504 
505     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
506         phrase_begin = phrase_end;
507         index++;
508         phrase_end = *index;
509         if ( phrase_begin == phrase_end ) //null pointer
510             continue;
511 
512         /* after reset() all phrases are null pointer. */
513         PhraseLengthIndexLevel2 * phrases = new PhraseLengthIndexLevel2;
514         m_phrase_length_indexes[i] = phrases;
515 
516         phrases->load(chunk, phrase_begin, phrase_end - 1);
517         assert( phrase_end <= end );
518         assert( *(buf_begin + phrase_end - 1) == c_separate);
519     }
520     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
521     assert( c_separate == *(buf_begin + offset) );
522     return true;
523 }
524 
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)525 bool PhraseBitmapIndexLevel2::store(MemoryChunk * new_chunk,
526                                     table_offset_t offset,
527                                     table_offset_t & end){
528     table_offset_t phrase_end;
529     table_offset_t index = offset;
530     offset += (PHRASE_NUMBER_OF_BITMAP_INDEX + 1) * sizeof(table_offset_t);
531     //add '#'
532     new_chunk->set_content(offset, &c_separate, sizeof(char));
533     offset +=sizeof(char);
534     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
535     index += sizeof(table_offset_t);
536     for ( size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
537         PhraseLengthIndexLevel2 * phrases = m_phrase_length_indexes[i];
538         if ( !phrases ) { //null pointer
539             new_chunk->set_content(index, &offset, sizeof(table_offset_t));
540             index += sizeof(table_offset_t);
541             continue;
542         }
543         phrases->store(new_chunk, offset, phrase_end); //has a end '#'
544         offset = phrase_end;
545         //add '#'
546         new_chunk->set_content(offset, &c_separate, sizeof(char));
547         offset += sizeof(char);
548         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
549         index += sizeof(table_offset_t);
550     }
551     end = offset;
552     return true;
553 }
554 
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)555 bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
556                                    table_offset_t offset,
557                                    table_offset_t end) {
558     char * buf_begin = (char *) chunk->begin();
559     guint32 nindex = *((guint32 *)(buf_begin + offset));
560     table_offset_t * index = (table_offset_t *)
561         (buf_begin + offset + sizeof(guint32));
562 
563     table_offset_t phrase_begin, phrase_end = *index;
564     g_array_set_size(m_phrase_array_indexes, 0);
565     for (size_t i = 1; i <= nindex; ++i) {
566         phrase_begin = phrase_end;
567         index++;
568         phrase_end = *index;
569         if ( phrase_begin == phrase_end ){
570             void * null = NULL;
571             g_array_append_val(m_phrase_array_indexes, null);
572             continue;
573         }
574 
575 #define CASE(len) case len:                                             \
576         {                                                               \
577             PhraseArrayIndexLevel2<len> * phrase =                      \
578                 new PhraseArrayIndexLevel2<len>;                        \
579             phrase->load(chunk, phrase_begin, phrase_end - 1);          \
580             assert( *(buf_begin + phrase_end - 1) == c_separate );      \
581             assert( phrase_end <= end );                                \
582             g_array_append_val(m_phrase_array_indexes, phrase);         \
583             break;                                                      \
584         }
585         switch ( i ){
586 	    CASE(1);
587 	    CASE(2);
588 	    CASE(3);
589 	    CASE(4);
590 	    CASE(5);
591 	    CASE(6);
592 	    CASE(7);
593 	    CASE(8);
594 	    CASE(9);
595 	    CASE(10);
596 	    CASE(11);
597 	    CASE(12);
598 	    CASE(13);
599 	    CASE(14);
600 	    CASE(15);
601 	    CASE(16);
602 	default:
603 	    assert(false);
604         }
605 #undef CASE
606     }
607     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
608     assert ( c_separate == * (buf_begin + offset) );
609     return true;
610 }
611 
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)612 bool PhraseLengthIndexLevel2::store(MemoryChunk * new_chunk,
613                                     table_offset_t offset,
614                                     table_offset_t & end) {
615     guint32 nindex = m_phrase_array_indexes->len;
616     new_chunk->set_content(offset, &nindex, sizeof(guint32));
617     table_offset_t index = offset + sizeof(guint32);
618 
619     offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
620     new_chunk->set_content(offset, &c_separate, sizeof(char));
621     offset += sizeof(char);
622     new_chunk->set_content(index, &offset, sizeof(table_offset_t));
623     index += sizeof(table_offset_t);
624 
625     table_offset_t phrase_end;
626     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
627 #define CASE(len) case len:                                             \
628         {                                                               \
629             PhraseArrayIndexLevel2<len> * phrase = g_array_index        \
630                 (m_phrase_array_indexes, PhraseArrayIndexLevel2<len> *, len - 1); \
631             if ( !phrase ){                                             \
632                 new_chunk->set_content                                  \
633                     (index, &offset, sizeof(table_offset_t));           \
634                 index += sizeof(table_offset_t);                        \
635                 continue;                                               \
636             }                                                           \
637             phrase->store(new_chunk, offset, phrase_end);               \
638             offset = phrase_end;                                        \
639             break;                                                      \
640         }
641         switch ( i ){
642 	    CASE(1);
643 	    CASE(2);
644 	    CASE(3);
645 	    CASE(4);
646 	    CASE(5);
647 	    CASE(6);
648 	    CASE(7);
649 	    CASE(8);
650 	    CASE(9);
651 	    CASE(10);
652 	    CASE(11);
653 	    CASE(12);
654 	    CASE(13);
655 	    CASE(14);
656 	    CASE(15);
657 	    CASE(16);
658 	default:
659 	    assert(false);
660         }
661         //add '#'
662         new_chunk->set_content(offset, &c_separate, sizeof(char));
663         offset += sizeof(char);
664         new_chunk->set_content(index, &offset, sizeof(table_offset_t));
665         index += sizeof(table_offset_t);
666 
667 #undef CASE
668     }
669     end = offset;
670     return true;
671 }
672 
673 template<size_t phrase_length>
674 bool PhraseArrayIndexLevel2<phrase_length>::
load(MemoryChunk * chunk,table_offset_t offset,table_offset_t end)675 load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
676     char * buf_begin = (char *) chunk->begin();
677     m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
678     return true;
679 }
680 
681 template<size_t phrase_length>
682 bool PhraseArrayIndexLevel2<phrase_length>::
store(MemoryChunk * new_chunk,table_offset_t offset,table_offset_t & end)683 store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
684     new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
685     end = offset + m_chunk.size();
686     return true;
687 }
688 
689 
690 /* get length method */
691 
get_length() const692 int PhraseLengthIndexLevel2::get_length() const {
693     int length = m_phrase_array_indexes->len;
694 
695     /* trim trailing zero. */
696     for (int i = length - 1; i >= 0; --i) {
697         void * array = g_array_index(m_phrase_array_indexes, void *, i);
698 
699         if (NULL != array)
700             break;
701 
702         --length;
703     }
704 
705     return length;
706 }
707 
708 template<size_t phrase_length>
get_length() const709 int PhraseArrayIndexLevel2<phrase_length>::get_length() const {
710     IndexItem * chunk_begin = NULL, * chunk_end = NULL;
711     chunk_begin = (IndexItem *) m_chunk.begin();
712     chunk_end = (IndexItem *) m_chunk.end();
713 
714     return chunk_end - chunk_begin;
715 }
716 
717 
718 /* mask out method */
719 
mask_out(phrase_token_t mask,phrase_token_t value)720 bool PhraseBitmapIndexLevel2::mask_out(phrase_token_t mask,
721                                        phrase_token_t value){
722     for (size_t i = 0; i < PHRASE_NUMBER_OF_BITMAP_INDEX; ++i) {
723         PhraseLengthIndexLevel2 * & length_array =
724             m_phrase_length_indexes[i];
725 
726         if (NULL == length_array)
727             continue;
728 
729         length_array->mask_out(mask, value);
730 
731         if (0 == length_array->get_length()) {
732             delete length_array;
733             length_array = NULL;
734         }
735     }
736 
737     return true;
738 }
739 
mask_out(phrase_token_t mask,phrase_token_t value)740 bool PhraseLengthIndexLevel2::mask_out(phrase_token_t mask,
741                                        phrase_token_t value){
742 #define CASE(len) case len:                                     \
743     {                                                           \
744         PhraseArrayIndexLevel2<len> * & array = g_array_index   \
745             (m_phrase_array_indexes,                            \
746              PhraseArrayIndexLevel2<len> *, len - 1);           \
747                                                                 \
748         if (NULL == array)                                      \
749             continue;                                           \
750                                                                 \
751         array->mask_out(mask, value);                           \
752                                                                 \
753         if (0 == array->get_length()) {                         \
754             delete array;                                       \
755             array = NULL;                                       \
756         }                                                       \
757         break;                                                  \
758     }
759 
760     for (size_t i = 1; i <= m_phrase_array_indexes->len; ++i) {
761         switch (i) {
762 	    CASE(1);
763 	    CASE(2);
764 	    CASE(3);
765 	    CASE(4);
766 	    CASE(5);
767 	    CASE(6);
768 	    CASE(7);
769 	    CASE(8);
770 	    CASE(9);
771 	    CASE(10);
772 	    CASE(11);
773 	    CASE(12);
774 	    CASE(13);
775 	    CASE(14);
776 	    CASE(15);
777 	    CASE(16);
778 	default:
779 	    assert(false);
780         }
781     }
782     /* shrink self array. */
783     g_array_set_size(m_phrase_array_indexes, get_length());
784 #undef CASE
785     return true;
786 }
787 
788 template<size_t phrase_length>
mask_out(phrase_token_t mask,phrase_token_t value)789 bool PhraseArrayIndexLevel2<phrase_length>::mask_out
790 (phrase_token_t mask, phrase_token_t value) {
791     IndexItem * begin = NULL, * end = NULL;
792     begin = (IndexItem *) m_chunk.begin();
793     end = (IndexItem *) m_chunk.end();
794 
795     for (IndexItem * cur = begin; cur != end; ++cur) {
796         if ((cur->m_token & mask) != value)
797             continue;
798 
799         int offset = (cur - begin) * sizeof(IndexItem);
800         m_chunk.remove_content(offset, sizeof(IndexItem));
801 
802         /* update chunk end. */
803         end = (IndexItem *) m_chunk.end();
804         --cur;
805     }
806 
807     return true;
808 }
809