1 #ifndef MXTEXTTOOLS_H
2 #define MXTEXTTOOLS_H
3 /*
4   mxTextTools -- Fast text manipulation routines
5 
6   Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
7   Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com
8 */
9 
10 /* The extension's name; must be the same as the init function's suffix */
11 #define MXTEXTTOOLS_MODULE "mxTextTools"
12 
13 #include "mxbmse.h"
14 #ifdef MXFASTSEARCH
15 # include "private/mxfse.h"
16 #endif
17 
18 /* Include generic mx extension header file */
19 #include "mxh.h"
20 
21 #ifdef MX_BUILDING_MXTEXTTOOLS
22 # define MXTEXTTOOLS_EXTERNALIZE MX_EXPORT
23 #else
24 # define MXTEXTTOOLS_EXTERNALIZE MX_IMPORT
25 #endif
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 /* --- Text Search Object ---------------------------------------*/
32 
33 /* Algorithm values */
34 #define MXTEXTSEARCH_BOYERMOORE		0
35 #define MXTEXTSEARCH_FASTSEARCH		1
36 #define MXTEXTSEARCH_TRIVIAL		2
37 
38 typedef struct {
39     PyObject_HEAD
40     PyObject *match;        /* Match string object */
41     PyObject *translate;    /* Translate string object or NULL */
42     int algorithm;          /* Algorithm to be used */
43     void *data;             /* Internal data used by the algorithm or NULL */
44 } mxTextSearchObject;
45 
46 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTextSearch_Type;
47 
48 #define mxTextSearch_Check(v) \
49         (Py_TYPE((v)) == &mxTextSearch_Type)
50 
51 /* Exporting these APIs for mxTextTools internal use only ! */
52 
53 extern
54 Py_ssize_t mxTextSearch_MatchLength(PyObject *self);
55 
56 extern
57 Py_ssize_t mxTextSearch_SearchBuffer(PyObject *self,
58 			      char *text,
59 			      Py_ssize_t start,
60 			      Py_ssize_t stop,
61 			      Py_ssize_t *sliceleft,
62 			      Py_ssize_t *sliceright);
63 
64 #ifdef HAVE_UNICODE
65 extern
66 Py_ssize_t mxTextSearch_SearchUnicode(PyObject *self,
67 			       Py_UNICODE *text,
68 			       Py_ssize_t start,
69 			       Py_ssize_t stop,
70 			       Py_ssize_t *sliceleft,
71 			       Py_ssize_t *sliceright);
72 #endif
73 
74 /* --- Character Set Object -------------------------------------*/
75 
76 /* Mode values */
77 #define MXCHARSET_8BITMODE	0
78 #define MXCHARSET_UCS2MODE	1
79 #define MXCHARSET_UCS4MODE	2
80 
81 typedef struct {
82     PyObject_HEAD
83     PyObject *definition;           /* Character set definition */
84     int mode;                       /* Operation mode:
85                                         0 - 8-bit character lookup
86                                         1 - UCS-2 Unicode lookup
87                                         2 - UCS-4 Unicode lookup
88                                     */
89     void *lookup;                   /* Lookup table */
90 } mxCharSetObject;
91 
92 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxCharSet_Type;
93 
94 #define mxCharSet_Check(v) \
95         (Py_TYPE((v)) == &mxCharSet_Type)
96 
97 
98 /* Exporting these APIs for mxTextTools internal use only ! */
99 
100 extern
101 int mxCharSet_ContainsChar(PyObject *self,
102 			   register unsigned char ch);
103 
104 #ifdef HAVE_UNICODE
105 extern
106 int mxCharSet_ContainsUnicodeChar(PyObject *self,
107 				  register Py_UNICODE ch);
108 #endif
109 
110 extern
111 Py_ssize_t mxCharSet_Match(PyObject *self,
112 		    PyObject *text,
113 		    Py_ssize_t start,
114 		    Py_ssize_t stop,
115 		    int direction);
116 
117 /* --- Tag Table Object -----------------------------------------*/
118 
119 typedef struct {
120     PyObject *tagobj;			/* Tag object to assign, call,
121 					   append, etc. or NULL */
122     int cmd;				/* Command integer */
123     int flags;				/* Command flags */
124     PyObject *args;			/* Command arguments */
125     int jne;				/* Non-match jump offset */
126     int je;				/* Match jump offset */
127 } mxTagTableEntry;
128 
129 #define MXTAGTABLE_STRINGTYPE	0
130 #define MXTAGTABLE_UNICODETYPE	1
131 
132 typedef struct {
133     PyObject_VAR_HEAD
134     PyObject *definition;       /* Reference to the original
135                                    table definition or NULL;
136                                    needed for caching */
137     int tabletype;              /* Type of compiled table:
138                                    0 - 8-bit string args
139                                    1 - Unicode args */
140     int numentries;             /* number of allocated entries */
141     mxTagTableEntry entry[1];   /* Variable length array of
142                                    mxTagTableEntry fields */
143 } mxTagTableObject;
144 
145 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTagTable_Type;
146 
147 #define mxTagTable_Check(v) \
148         (Py_TYPE((v)) == &mxTagTable_Type)
149 
150 #define mxTagTable_Type(v) \
151 	(((mxTagTableObject *)(v))->tabletype)
152 #define mxTagTable_Definition(v) \
153 	(((mxTagTableObject *)(v))->definition)
154 
155 /* Exporting these APIs for mxTextTools internal use only ! */
156 extern
157 PyObject *mxTagTable_New(PyObject *definition,
158 			 int tabletype,
159 			 int cacheable);
160 
161 /* --- Tagging Engine -------------------------------------------*/
162 
163 /* Exporting these APIs for mxTextTools internal use only ! */
164 
165 /* mxTextTools_TaggingEngine(): a table driven parser engine
166 
167    - return codes: rc = 2: match ok; rc = 1: match failed; rc = 0: error
168    - doesn't check type of passed arguments !
169    - doesn't increment reference counts of passed objects !
170 */
171 
172 extern
173 int mxTextTools_TaggingEngine(PyObject *textobj,
174 			      Py_ssize_t text_start,
175 			      Py_ssize_t text_stop,
176 			      mxTagTableObject *table,
177 			      PyObject *taglist,
178 			      PyObject *context,
179 			      Py_ssize_t *next);
180 
181 extern
182 int mxTextTools_UnicodeTaggingEngine(PyObject *textobj,
183 				     Py_ssize_t text_start,
184 				     Py_ssize_t text_stop,
185 				     mxTagTableObject *table,
186 				     PyObject *taglist,
187 				     PyObject *context,
188 				     Py_ssize_t *next);
189 
190 /* Command integers for cmd; see Constants/TagTable.py for details */
191 
192 /* Low-level string matching, using the same simple logic:
193    - match has to be a string
194    - they only modify x (the current position in text)
195 */
196 #define MATCH_ALLIN 		11
197 #define MATCH_ALLNOTIN 		12
198 #define MATCH_IS 		13
199 #define MATCH_ISIN 		14
200 #define MATCH_ISNOTIN 		15
201 
202 #define MATCH_WORD 		21
203 #define MATCH_WORDSTART       	22
204 #define MATCH_WORDEND		23
205 
206 #define MATCH_ALLINSET 		31
207 #define MATCH_ISINSET		32
208 
209 #define MATCH_ALLINCHARSET	41
210 #define MATCH_ISINCHARSET	42
211 
212 #define MATCH_MAX_LOWLEVEL	99
213 
214 /* Jumps and other low-level special commands */
215 
216 #define MATCH_FAIL		100
217 #define MATCH_JUMP 		MATCH_FAIL
218 
219 #define MATCH_EOF 		101
220 #define MATCH_SKIP 		102
221 #define MATCH_MOVE		103
222 
223 #define MATCH_JUMPTARGET	104
224 
225 #define MATCH_MAX_SPECIALS	199
226 
227 /* Higher-level string matching */
228 
229 #define MATCH_SWORDSTART	211
230 #define MATCH_SWORDEND		212
231 #define MATCH_SFINDWORD		213
232 #define MATCH_NOWORD		MATCH_SWORDSTART
233 
234 /* Higher-level special commands */
235 #define MATCH_CALL 		201
236 #define MATCH_CALLARG 		202
237 #define MATCH_TABLE 		203
238 #define MATCH_SUBTABLE 		207
239 #define MATCH_TABLEINLIST 	204
240 #define MATCH_SUBTABLEINLIST 	208
241 #define MATCH_LOOP 		205
242 #define MATCH_LOOPCONTROL	206
243 
244 /* Special argument integers */
245 #define MATCH_JUMP_TO		0
246 #define MATCH_JUMP_MATCHOK	1000000
247 #define MATCH_JUMP_MATCHFAIL	-1000000
248 #define MATCH_MOVE_EOF		-1
249 #define MATCH_MOVE_BOF		0
250 #define MATCH_FAIL_HERE		1
251 #define MATCH_THISTABLE		999
252 #define MATCH_LOOPCONTROL_BREAK	0
253 #define MATCH_LOOPCONTROL_RESET -1
254 
255 /* Flags set in cmd (>=256) */
256 #define MATCH_CALLTAG		(1 << 8)
257 #define MATCH_APPENDTAG 	(1 << 9)
258 #define MATCH_APPENDTAGOBJ	(1 << 10)
259 #define MATCH_APPENDMATCH	(1 << 11)
260 #define MATCH_LOOKAHEAD		(1 << 12)
261 
262 /* EOF */
263 #ifdef __cplusplus
264 }
265 #endif
266 #endif
267