1 /* 2 * libpinyin 3 * Library to deal with pinyin. 4 * 5 * Copyright (C) 2006-2007 Peng Wu 6 * 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 /* 22 * This header file contains novel types designed for pinyin processing. 23 */ 24 25 26 #ifndef NOVEL_TYPES_H 27 #define NOVEL_TYPES_H 28 29 #include <glib.h> 30 31 G_BEGIN_DECLS 32 33 typedef guint32 phrase_token_t; 34 typedef gunichar ucs4_t; 35 36 /* 37 * Phrase Index Library Definition 38 * Reserve 4-bits for future usage. 39 */ 40 41 #define PHRASE_MASK 0x00FFFFFF 42 #define PHRASE_INDEX_LIBRARY_MASK 0x0F000000 43 #define PHRASE_INDEX_LIBRARY_COUNT (1<<4) 44 #define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24) 45 #define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token) \ 46 ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK)) 47 48 49 /* 50 * PhraseIndexRanges definitions 51 */ 52 53 struct PhraseIndexRange{ 54 phrase_token_t m_range_begin; 55 phrase_token_t m_range_end; /* pass the last item like stl */ 56 }; 57 58 /* Array of PhraseIndexRange */ 59 typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT]; 60 /* Array of Token */ 61 typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT]; 62 63 64 /* 65 * PinYin Table Definition 66 */ 67 68 69 /* For both PinYin Table and Phrase Table */ 70 enum SearchResult{ 71 SEARCH_NONE = 0x00, /* found nothing */ 72 SEARCH_OK = 0x01 , /* found items */ 73 SEARCH_CONTINUED = 0x02 /* has longer word in the storage to search */ 74 }; 75 76 /* For Phrase Index */ 77 enum ErrorResult{ 78 ERROR_OK = 0, /* operate ok */ 79 ERROR_INSERT_ITEM_EXISTS, /* item already exists */ 80 ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */ 81 ERROR_PHRASE_TOO_LONG, /* the phrase is too long */ 82 ERROR_NO_SUB_PHRASE_INDEX, /* sub phrase index is not loaded */ 83 ERROR_NO_ITEM, /* item has a null slot */ 84 ERROR_OUT_OF_RANGE, /* beyond the end of the sub phrase index */ 85 ERROR_FILE_CORRUPTION, /* file is corrupted */ 86 ERROR_INTEGER_OVERFLOW, /* integer is overflowed */ 87 ERROR_ALREADY_EXISTS, /* the sub phrase already exists. */ 88 ERROR_NO_USER_TABLE /* the user table is not loaded. */ 89 }; 90 91 /* For N-gram */ 92 enum ATTACH_FLAG{ 93 ATTACH_READONLY = 1, 94 ATTACH_READWRITE = 0x1 << 1, 95 ATTACH_CREATE = 0x1 << 2, 96 }; 97 98 /* 99 * n-gram Definition 100 * no B parameter(there are duplicated items in uni-gram and bi-gram) 101 * used in system n-gram and user n-gram. 102 * using delta technique. 103 */ 104 105 struct BigramPhraseItem{ 106 phrase_token_t m_token; 107 gfloat m_freq; /* P(W2|W1) */ 108 }; 109 110 struct BigramPhraseItemWithCount{ 111 phrase_token_t m_token; 112 guint32 m_count; 113 gfloat m_freq; /* P(W2|W1) */ 114 }; 115 116 typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */ 117 typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */ 118 119 #define MAX_PHRASE_LENGTH 16 120 121 const phrase_token_t null_token = 0; 122 const phrase_token_t sentence_start = 1; 123 const phrase_token_t token_min = 0; 124 const phrase_token_t token_max = UINT_MAX; 125 126 const char c_separate = '#'; 127 128 typedef guint32 table_offset_t; 129 130 typedef double parameter_t; 131 132 /* Array of ChewingKey/ChewingKeyRest */ 133 typedef GArray * ChewingKeyVector; 134 typedef GArray * ChewingKeyRestVector; 135 136 /* Array of phrase_token_t */ 137 typedef GArray * TokenVector; 138 typedef TokenVector MatchResult; 139 140 /* Array of lookup_constraint_t */ 141 typedef GArray * CandidateConstraints; 142 143 typedef guint32 pinyin_option_t; 144 145 typedef guint32 pinyin_standard_option_t; 146 147 typedef guint32 pinyin_fuzzy_option_t; 148 149 typedef guint32 pinyin_correct_option_t; 150 151 typedef enum { 152 /* for default tables. */ 153 RESERVED = 0, 154 GB_DICTIONARY = 1, 155 TSI_DICTIONARY = 1, 156 GBK_DICTIONARY = 2, 157 OPENGRAM_DICTIONARY = 3, 158 MERGED_DICTIONARY = 4, 159 ADDON_DICTIONARY = 5, 160 NETWORK_DICTIONARY = 6, 161 USER_DICTIONARY = 7, 162 } PHRASE_INDEX_LIBRARIES; 163 164 G_END_DECLS 165 166 #endif 167