1 /*
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *
7  *  This program is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #ifndef PHRASE_LARGE_TABLE2_H
22 #define PHRASE_LARGE_TABLE2_H
23 
24 #include <stdio.h>
25 #include "novel_types.h"
26 #include "memory_chunk.h"
27 
28 namespace pinyin{
29 
30 const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8);
31 
32 class PhraseLengthIndexLevel2;
33 
34 class PhraseBitmapIndexLevel2{
35 protected:
36     PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX];
37     /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */
38     void reset();
39 public:
40     PhraseBitmapIndexLevel2();
~PhraseBitmapIndexLevel2()41     ~PhraseBitmapIndexLevel2(){
42         reset();
43     }
44 
45     /* load/store method */
46     bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end);
47     bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end);
48 
49     /* search method */
50     int search(int phrase_length, /* in */ const ucs4_t phrase[],
51                /* out */ PhraseTokens tokens) const;
52 
53     /* add_index/remove_index method */
54     int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
55 
56     int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token);
57 
58     /* mask out method */
59     bool mask_out(phrase_token_t mask, phrase_token_t value);
60 };
61 
62 
63 class PhraseLargeTable2{
64 protected:
65     PhraseBitmapIndexLevel2 m_bitmap_table;
66     MemoryChunk * m_chunk;
67 
reset()68     void reset(){
69         if ( m_chunk ){
70             delete m_chunk;
71             m_chunk = NULL;
72         }
73     }
74 public:
PhraseLargeTable2()75     PhraseLargeTable2(){
76         m_chunk = NULL;
77     }
78 
~PhraseLargeTable2()79     ~PhraseLargeTable2(){
80         reset();
81     }
82 
83     /* load/store method */
load(MemoryChunk * chunk)84     bool load(MemoryChunk * chunk){
85         reset();
86         m_chunk = chunk;
87         return m_bitmap_table.load(chunk, 0, chunk->size());
88     }
89 
store(MemoryChunk * new_chunk)90     bool store(MemoryChunk * new_chunk){
91         table_offset_t end;
92         return m_bitmap_table.store(new_chunk, 0, end);
93     }
94 
95     bool load_text(FILE * file);
96 
97     /* search method */
search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens)98     int search(int phrase_length, /* in */ const ucs4_t phrase[],
99                /* out */ PhraseTokens tokens) const {
100         return m_bitmap_table.search(phrase_length, phrase, tokens);
101     }
102 
103     /* add_index/remove_index method */
add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)104     int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
105         return m_bitmap_table.add_index(phrase_length, phrase, token);
106     }
107 
remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)108     int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) {
109         return m_bitmap_table.remove_index(phrase_length, phrase, token);
110     }
111 
112     /* mask out method */
mask_out(phrase_token_t mask,phrase_token_t value)113     bool mask_out(phrase_token_t mask, phrase_token_t value) {
114         return m_bitmap_table.mask_out(mask, value);
115     }
116 };
117 
118 
119 };
120 
121 #endif
122