1 #ifndef MARISA_BASE_H_
2 #define MARISA_BASE_H_
3
4 // Old Visual C++ does not provide stdint.h.
5 #ifndef _MSC_VER
6 #include <stdint.h>
7 #endif // _MSC_VER
8
9 #ifdef __cplusplus
10 #include <cstddef>
11 #else // __cplusplus
12 #include <stddef.h>
13 #endif // __cplusplus
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif // __cplusplus
18
19 #ifdef _MSC_VER
20 typedef unsigned __int8 marisa_uint8;
21 typedef unsigned __int16 marisa_uint16;
22 typedef unsigned __int32 marisa_uint32;
23 typedef unsigned __int64 marisa_uint64;
24 #else // _MSC_VER
25 typedef uint8_t marisa_uint8;
26 typedef uint16_t marisa_uint16;
27 typedef uint32_t marisa_uint32;
28 typedef uint64_t marisa_uint64;
29 #endif // _MSC_VER
30
31 #if defined(_WIN64) || defined(__amd64__) || defined(__x86_64__) || \
32 defined(__ia64__) || defined(__ppc64__) || defined(__powerpc64__) || \
33 defined(__sparc64__) || defined(__mips64__) || defined(__aarch64__) || \
34 defined(__s390x__)
35 #define MARISA_WORD_SIZE 64
36 #else // defined(_WIN64), etc.
37 #define MARISA_WORD_SIZE 32
38 #endif // defined(_WIN64), etc.
39
40 //#define MARISA_WORD_SIZE (sizeof(void *) * 8)
41
42 #define MARISA_UINT8_MAX ((marisa_uint8)~(marisa_uint8)0)
43 #define MARISA_UINT16_MAX ((marisa_uint16)~(marisa_uint16)0)
44 #define MARISA_UINT32_MAX ((marisa_uint32)~(marisa_uint32)0)
45 #define MARISA_UINT64_MAX ((marisa_uint64)~(marisa_uint64)0)
46 #define MARISA_SIZE_MAX ((size_t)~(size_t)0)
47
48 #define MARISA_INVALID_LINK_ID MARISA_UINT32_MAX
49 #define MARISA_INVALID_KEY_ID MARISA_UINT32_MAX
50 #define MARISA_INVALID_EXTRA (MARISA_UINT32_MAX >> 8)
51
52 // Error codes are defined as members of marisa_error_code. This library throws
53 // an exception with one of the error codes when an error occurs.
54 typedef enum marisa_error_code_ {
55 // MARISA_OK means that a requested operation has succeeded. In practice, an
56 // exception never has MARISA_OK because it is not an error.
57 MARISA_OK = 0,
58
59 // MARISA_STATE_ERROR means that an object was not ready for a requested
60 // operation. For example, an operation to modify a fixed vector throws an
61 // exception with MARISA_STATE_ERROR.
62 MARISA_STATE_ERROR = 1,
63
64 // MARISA_NULL_ERROR means that an invalid NULL pointer has been given.
65 MARISA_NULL_ERROR = 2,
66
67 // MARISA_BOUND_ERROR means that an operation has tried to access an out of
68 // range address.
69 MARISA_BOUND_ERROR = 3,
70
71 // MARISA_RANGE_ERROR means that an out of range value has appeared in
72 // operation.
73 MARISA_RANGE_ERROR = 4,
74
75 // MARISA_CODE_ERROR means that an undefined code has appeared in operation.
76 MARISA_CODE_ERROR = 5,
77
78 // MARISA_RESET_ERROR means that a smart pointer has tried to reset itself.
79 MARISA_RESET_ERROR = 6,
80
81 // MARISA_SIZE_ERROR means that a size has exceeded a library limitation.
82 MARISA_SIZE_ERROR = 7,
83
84 // MARISA_MEMORY_ERROR means that a memory allocation has failed.
85 MARISA_MEMORY_ERROR = 8,
86
87 // MARISA_IO_ERROR means that an I/O operation has failed.
88 MARISA_IO_ERROR = 9,
89
90 // MARISA_FORMAT_ERROR means that input was in invalid format.
91 MARISA_FORMAT_ERROR = 10,
92 } marisa_error_code;
93
94 // Min/max values, flags and masks for dictionary settings are defined below.
95 // Please note that unspecified settings will be replaced with the default
96 // settings. For example, 0 is equivalent to (MARISA_DEFAULT_NUM_TRIES |
97 // MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER).
98
99 // A dictionary consists of 3 tries in default. Usually more tries make a
100 // dictionary space-efficient but time-inefficient.
101 typedef enum marisa_num_tries_ {
102 MARISA_MIN_NUM_TRIES = 0x00001,
103 MARISA_MAX_NUM_TRIES = 0x0007F,
104 MARISA_DEFAULT_NUM_TRIES = 0x00003,
105 } marisa_num_tries;
106
107 // This library uses a cache technique to accelerate search functions. The
108 // following enumerated type marisa_cache_level gives a list of available cache
109 // size options. A larger cache enables faster search but takes a more space.
110 typedef enum marisa_cache_level_ {
111 MARISA_HUGE_CACHE = 0x00080,
112 MARISA_LARGE_CACHE = 0x00100,
113 MARISA_NORMAL_CACHE = 0x00200,
114 MARISA_SMALL_CACHE = 0x00400,
115 MARISA_TINY_CACHE = 0x00800,
116 MARISA_DEFAULT_CACHE = MARISA_NORMAL_CACHE
117 } marisa_cache_level;
118
119 // This library provides 2 kinds of TAIL implementations.
120 typedef enum marisa_tail_mode_ {
121 // MARISA_TEXT_TAIL merges last labels as zero-terminated strings. So, it is
122 // available if and only if the last labels do not contain a NULL character.
123 // If MARISA_TEXT_TAIL is specified and a NULL character exists in the last
124 // labels, the setting is automatically switched to MARISA_BINARY_TAIL.
125 MARISA_TEXT_TAIL = 0x01000,
126
127 // MARISA_BINARY_TAIL also merges last labels but as byte sequences. It uses
128 // a bit vector to detect the end of a sequence, instead of NULL characters.
129 // So, MARISA_BINARY_TAIL requires a larger space if the average length of
130 // labels is greater than 8.
131 MARISA_BINARY_TAIL = 0x02000,
132
133 MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL,
134 } marisa_tail_mode;
135
136 // The arrangement of nodes affects the time cost of matching and the order of
137 // predictive search.
138 typedef enum marisa_node_order_ {
139 // MARISA_LABEL_ORDER arranges nodes in ascending label order.
140 // MARISA_LABEL_ORDER is useful if an application needs to predict keys in
141 // label order.
142 MARISA_LABEL_ORDER = 0x10000,
143
144 // MARISA_WEIGHT_ORDER arranges nodes in descending weight order.
145 // MARISA_WEIGHT_ORDER is generally a better choice because it enables faster
146 // matching.
147 MARISA_WEIGHT_ORDER = 0x20000,
148
149 MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER,
150 } marisa_node_order;
151
152 typedef enum marisa_config_mask_ {
153 MARISA_NUM_TRIES_MASK = 0x0007F,
154 MARISA_CACHE_LEVEL_MASK = 0x00F80,
155 MARISA_TAIL_MODE_MASK = 0x0F000,
156 MARISA_NODE_ORDER_MASK = 0xF0000,
157 MARISA_CONFIG_MASK = 0xFFFFF
158 } marisa_config_mask;
159
160 #ifdef __cplusplus
161 } // extern "C"
162 #endif // __cplusplus
163
164 #ifdef __cplusplus
165 namespace marisa {
166
167 typedef ::marisa_uint8 UInt8;
168 typedef ::marisa_uint16 UInt16;
169 typedef ::marisa_uint32 UInt32;
170 typedef ::marisa_uint64 UInt64;
171
172 typedef ::marisa_error_code ErrorCode;
173
174 typedef ::marisa_cache_level CacheLevel;
175 typedef ::marisa_tail_mode TailMode;
176 typedef ::marisa_node_order NodeOrder;
177
178 template <typename T>
swap(T & lhs,T & rhs)179 inline void swap(T &lhs, T &rhs) {
180 T temp = lhs;
181 lhs = rhs;
182 rhs = temp;
183 }
184
185 } // namespace marisa
186 #endif // __cplusplus
187
188 #ifdef __cplusplus
189 #include "marisa/exception.h"
190 #include "marisa/scoped-ptr.h"
191 #include "marisa/scoped-array.h"
192 #endif // __cplusplus
193
194 #endif // MARISA_BASE_H_
195