1 /*
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *
5  *  Copyright (C) 2006-2007 Peng Wu
6  *
7  *  This program is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 /*
22  * This header file contains novel types designed for pinyin processing.
23  */
24 
25 
26 #ifndef NOVEL_TYPES_H
27 #define NOVEL_TYPES_H
28 
29 #include <glib.h>
30 
31 G_BEGIN_DECLS
32 
33 typedef guint32 phrase_token_t;
34 typedef gunichar ucs4_t;
35 
36 /*
37  *  Phrase Index Library Definition
38  *  Reserve 4-bits for future usage.
39  */
40 
41 #define PHRASE_MASK  0x00FFFFFF
42 #define PHRASE_INDEX_LIBRARY_MASK 0x0F000000
43 #define PHRASE_INDEX_LIBRARY_COUNT (1<<4)
44 #define PHRASE_INDEX_LIBRARY_INDEX(token) ((token&PHRASE_INDEX_LIBRARY_MASK)>>24)
45 #define PHRASE_INDEX_MAKE_TOKEN(phrase_index, token)                    \
46     ( ( (phrase_index<<24) & PHRASE_INDEX_LIBRARY_MASK)|(token & PHRASE_MASK))
47 
48 
49 /*
50  *  PhraseIndexRanges definitions
51  */
52 
53 struct PhraseIndexRange{
54     phrase_token_t m_range_begin;
55     phrase_token_t m_range_end; /* pass the last item like stl */
56 };
57 
58 /* Array of PhraseIndexRange */
59 typedef GArray * PhraseIndexRanges[PHRASE_INDEX_LIBRARY_COUNT];
60 /* Array of Token */
61 typedef GArray * PhraseTokens[PHRASE_INDEX_LIBRARY_COUNT];
62 
63 
64 /*
65  *  PinYin Table Definition
66  */
67 
68 
69 /* For both PinYin Table and Phrase Table */
70 enum SearchResult{
71     SEARCH_NONE = 0x00,           /* found nothing */
72     SEARCH_OK = 0x01 ,            /* found items */
73     SEARCH_CONTINUED = 0x02       /* has longer word in the storage to search */
74 };
75 
76 /* For Phrase Index */
77 enum ErrorResult{
78     ERROR_OK = 0,                /* operate ok */
79     ERROR_INSERT_ITEM_EXISTS,    /* item already exists */
80     ERROR_REMOVE_ITEM_DONOT_EXISTS, /* item don't exists */
81     ERROR_PHRASE_TOO_LONG,       /* the phrase is too long */
82     ERROR_NO_SUB_PHRASE_INDEX,   /* sub phrase index is not loaded */
83     ERROR_NO_ITEM,               /* item has a null slot */
84     ERROR_OUT_OF_RANGE,          /* beyond the end of the sub phrase index */
85     ERROR_FILE_CORRUPTION,       /* file is corrupted */
86     ERROR_INTEGER_OVERFLOW,      /* integer is overflowed */
87     ERROR_ALREADY_EXISTS,        /* the sub phrase already exists. */
88     ERROR_NO_USER_TABLE          /* the user table is not loaded. */
89 };
90 
91 /* For N-gram */
92 enum ATTACH_FLAG{
93     ATTACH_READONLY = 1,
94     ATTACH_READWRITE = 0x1 << 1,
95     ATTACH_CREATE = 0x1 << 2,
96 };
97 
98 /*
99  *  n-gram Definition
100  *  no B parameter(there are duplicated items in uni-gram and bi-gram)
101  *  used in system n-gram and user n-gram.
102  *  using delta technique.
103  */
104 
105 struct BigramPhraseItem{
106     phrase_token_t m_token;
107     gfloat         m_freq; /* P(W2|W1) */
108 };
109 
110 struct BigramPhraseItemWithCount{
111     phrase_token_t m_token;
112     guint32        m_count;
113     gfloat         m_freq; /* P(W2|W1) */
114 };
115 
116 typedef GArray * BigramPhraseArray; /* Array of BigramPhraseItem */
117 typedef GArray * BigramPhraseWithCountArray; /* Array of BigramPhraseItemWithCount */
118 
119 #define MAX_PHRASE_LENGTH 16
120 
121 const phrase_token_t null_token = 0;
122 const phrase_token_t sentence_start = 1;
123 const phrase_token_t token_min = 0;
124 const phrase_token_t token_max = UINT_MAX;
125 
126 const char c_separate = '#';
127 
128 typedef guint32 table_offset_t;
129 
130 typedef double parameter_t;
131 
132 /* Array of ChewingKey/ChewingKeyRest */
133 typedef GArray * ChewingKeyVector;
134 typedef GArray * ChewingKeyRestVector;
135 
136 /* Array of phrase_token_t */
137 typedef GArray * TokenVector;
138 typedef TokenVector MatchResult;
139 
140 /* Array of lookup_constraint_t */
141 typedef GArray * CandidateConstraints;
142 
143 typedef guint32 pinyin_option_t;
144 
145 typedef guint32 pinyin_standard_option_t;
146 
147 typedef guint32 pinyin_fuzzy_option_t;
148 
149 typedef guint32 pinyin_correct_option_t;
150 
151 typedef enum {
152     /* for default tables. */
153     RESERVED = 0,
154     GB_DICTIONARY = 1,
155     TSI_DICTIONARY = 1,
156     GBK_DICTIONARY = 2,
157     OPENGRAM_DICTIONARY = 3,
158     MERGED_DICTIONARY = 4,
159     ADDON_DICTIONARY = 5,
160     NETWORK_DICTIONARY = 6,
161     USER_DICTIONARY = 7,
162 } PHRASE_INDEX_LIBRARIES;
163 
164 G_END_DECLS
165 
166 #endif
167