1 /* 2 * libpinyin 3 * Library to deal with pinyin. 4 * 5 * Copyright (C) 2012 Peng Wu <alexepico@gmail.com> 6 * 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #ifndef PHRASE_LARGE_TABLE2_H 22 #define PHRASE_LARGE_TABLE2_H 23 24 #include <stdio.h> 25 #include "novel_types.h" 26 #include "memory_chunk.h" 27 28 namespace pinyin{ 29 30 const size_t PHRASE_NUMBER_OF_BITMAP_INDEX = 1<<(sizeof(ucs4_t) / 4 * 8); 31 32 class PhraseLengthIndexLevel2; 33 34 class PhraseBitmapIndexLevel2{ 35 protected: 36 PhraseLengthIndexLevel2 * m_phrase_length_indexes[PHRASE_NUMBER_OF_BITMAP_INDEX]; 37 /* use the third byte of ucs4_t for class PhraseLengthIndexLevel2. */ 38 void reset(); 39 public: 40 PhraseBitmapIndexLevel2(); ~PhraseBitmapIndexLevel2()41 ~PhraseBitmapIndexLevel2(){ 42 reset(); 43 } 44 45 /* load/store method */ 46 bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); 47 bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); 48 49 /* search method */ 50 int search(int phrase_length, /* in */ const ucs4_t phrase[], 51 /* out */ PhraseTokens tokens) const; 52 53 /* add_index/remove_index method */ 54 int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); 55 56 int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token); 57 58 /* mask out method */ 59 bool mask_out(phrase_token_t mask, phrase_token_t value); 60 }; 61 62 63 class PhraseLargeTable2{ 64 protected: 65 PhraseBitmapIndexLevel2 m_bitmap_table; 66 MemoryChunk * m_chunk; 67 reset()68 void reset(){ 69 if ( m_chunk ){ 70 delete m_chunk; 71 m_chunk = NULL; 72 } 73 } 74 public: PhraseLargeTable2()75 PhraseLargeTable2(){ 76 m_chunk = NULL; 77 } 78 ~PhraseLargeTable2()79 ~PhraseLargeTable2(){ 80 reset(); 81 } 82 83 /* load/store method */ load(MemoryChunk * chunk)84 bool load(MemoryChunk * chunk){ 85 reset(); 86 m_chunk = chunk; 87 return m_bitmap_table.load(chunk, 0, chunk->size()); 88 } 89 store(MemoryChunk * new_chunk)90 bool store(MemoryChunk * new_chunk){ 91 table_offset_t end; 92 return m_bitmap_table.store(new_chunk, 0, end); 93 } 94 95 bool load_text(FILE * file); 96 97 /* search method */ search(int phrase_length,const ucs4_t phrase[],PhraseTokens tokens)98 int search(int phrase_length, /* in */ const ucs4_t phrase[], 99 /* out */ PhraseTokens tokens) const { 100 return m_bitmap_table.search(phrase_length, phrase, tokens); 101 } 102 103 /* add_index/remove_index method */ add_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)104 int add_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { 105 return m_bitmap_table.add_index(phrase_length, phrase, token); 106 } 107 remove_index(int phrase_length,const ucs4_t phrase[],phrase_token_t token)108 int remove_index(int phrase_length, /* in */ const ucs4_t phrase[], /* in */ phrase_token_t token) { 109 return m_bitmap_table.remove_index(phrase_length, phrase, token); 110 } 111 112 /* mask out method */ mask_out(phrase_token_t mask,phrase_token_t value)113 bool mask_out(phrase_token_t mask, phrase_token_t value) { 114 return m_bitmap_table.mask_out(mask, value); 115 } 116 }; 117 118 119 }; 120 121 #endif 122