1 #ifndef MXTEXTTOOLS_H 2 #define MXTEXTTOOLS_H 3 /* 4 mxTextTools -- Fast text manipulation routines 5 6 Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com 7 Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com 8 */ 9 10 /* The extension's name; must be the same as the init function's suffix */ 11 #define MXTEXTTOOLS_MODULE "mxTextTools" 12 13 #include "mxbmse.h" 14 #ifdef MXFASTSEARCH 15 # include "private/mxfse.h" 16 #endif 17 18 /* Include generic mx extension header file */ 19 #include "mxh.h" 20 21 #ifdef MX_BUILDING_MXTEXTTOOLS 22 # define MXTEXTTOOLS_EXTERNALIZE MX_EXPORT 23 #else 24 # define MXTEXTTOOLS_EXTERNALIZE MX_IMPORT 25 #endif 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 31 /* --- Text Search Object ---------------------------------------*/ 32 33 /* Algorithm values */ 34 #define MXTEXTSEARCH_BOYERMOORE 0 35 #define MXTEXTSEARCH_FASTSEARCH 1 36 #define MXTEXTSEARCH_TRIVIAL 2 37 38 typedef struct { 39 PyObject_HEAD 40 PyObject *match; /* Match string object */ 41 PyObject *translate; /* Translate string object or NULL */ 42 int algorithm; /* Algorithm to be used */ 43 void *data; /* Internal data used by the algorithm or NULL */ 44 } mxTextSearchObject; 45 46 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTextSearch_Type; 47 48 #define mxTextSearch_Check(v) \ 49 (Py_TYPE((v)) == &mxTextSearch_Type) 50 51 /* Exporting these APIs for mxTextTools internal use only ! */ 52 53 extern 54 Py_ssize_t mxTextSearch_MatchLength(PyObject *self); 55 56 extern 57 Py_ssize_t mxTextSearch_SearchBuffer(PyObject *self, 58 char *text, 59 Py_ssize_t start, 60 Py_ssize_t stop, 61 Py_ssize_t *sliceleft, 62 Py_ssize_t *sliceright); 63 64 #ifdef HAVE_UNICODE 65 extern 66 Py_ssize_t mxTextSearch_SearchUnicode(PyObject *self, 67 Py_UNICODE *text, 68 Py_ssize_t start, 69 Py_ssize_t stop, 70 Py_ssize_t *sliceleft, 71 Py_ssize_t *sliceright); 72 #endif 73 74 /* --- Character Set Object -------------------------------------*/ 75 76 /* Mode values */ 77 #define MXCHARSET_8BITMODE 0 78 #define MXCHARSET_UCS2MODE 1 79 #define MXCHARSET_UCS4MODE 2 80 81 typedef struct { 82 PyObject_HEAD 83 PyObject *definition; /* Character set definition */ 84 int mode; /* Operation mode: 85 0 - 8-bit character lookup 86 1 - UCS-2 Unicode lookup 87 2 - UCS-4 Unicode lookup 88 */ 89 void *lookup; /* Lookup table */ 90 } mxCharSetObject; 91 92 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxCharSet_Type; 93 94 #define mxCharSet_Check(v) \ 95 (Py_TYPE((v)) == &mxCharSet_Type) 96 97 98 /* Exporting these APIs for mxTextTools internal use only ! */ 99 100 extern 101 int mxCharSet_ContainsChar(PyObject *self, 102 register unsigned char ch); 103 104 #ifdef HAVE_UNICODE 105 extern 106 int mxCharSet_ContainsUnicodeChar(PyObject *self, 107 register Py_UNICODE ch); 108 #endif 109 110 extern 111 Py_ssize_t mxCharSet_Match(PyObject *self, 112 PyObject *text, 113 Py_ssize_t start, 114 Py_ssize_t stop, 115 int direction); 116 117 /* --- Tag Table Object -----------------------------------------*/ 118 119 typedef struct { 120 PyObject *tagobj; /* Tag object to assign, call, 121 append, etc. or NULL */ 122 int cmd; /* Command integer */ 123 int flags; /* Command flags */ 124 PyObject *args; /* Command arguments */ 125 int jne; /* Non-match jump offset */ 126 int je; /* Match jump offset */ 127 } mxTagTableEntry; 128 129 #define MXTAGTABLE_STRINGTYPE 0 130 #define MXTAGTABLE_UNICODETYPE 1 131 132 typedef struct { 133 PyObject_VAR_HEAD 134 PyObject *definition; /* Reference to the original 135 table definition or NULL; 136 needed for caching */ 137 int tabletype; /* Type of compiled table: 138 0 - 8-bit string args 139 1 - Unicode args */ 140 int numentries; /* number of allocated entries */ 141 mxTagTableEntry entry[1]; /* Variable length array of 142 mxTagTableEntry fields */ 143 } mxTagTableObject; 144 145 MXTEXTTOOLS_EXTERNALIZE(PyTypeObject) mxTagTable_Type; 146 147 #define mxTagTable_Check(v) \ 148 (Py_TYPE((v)) == &mxTagTable_Type) 149 150 #define mxTagTable_Type(v) \ 151 (((mxTagTableObject *)(v))->tabletype) 152 #define mxTagTable_Definition(v) \ 153 (((mxTagTableObject *)(v))->definition) 154 155 /* Exporting these APIs for mxTextTools internal use only ! */ 156 extern 157 PyObject *mxTagTable_New(PyObject *definition, 158 int tabletype, 159 int cacheable); 160 161 /* --- Tagging Engine -------------------------------------------*/ 162 163 /* Exporting these APIs for mxTextTools internal use only ! */ 164 165 /* mxTextTools_TaggingEngine(): a table driven parser engine 166 167 - return codes: rc = 2: match ok; rc = 1: match failed; rc = 0: error 168 - doesn't check type of passed arguments ! 169 - doesn't increment reference counts of passed objects ! 170 */ 171 172 extern 173 int mxTextTools_TaggingEngine(PyObject *textobj, 174 Py_ssize_t text_start, 175 Py_ssize_t text_stop, 176 mxTagTableObject *table, 177 PyObject *taglist, 178 PyObject *context, 179 Py_ssize_t *next); 180 181 extern 182 int mxTextTools_UnicodeTaggingEngine(PyObject *textobj, 183 Py_ssize_t text_start, 184 Py_ssize_t text_stop, 185 mxTagTableObject *table, 186 PyObject *taglist, 187 PyObject *context, 188 Py_ssize_t *next); 189 190 /* Command integers for cmd; see Constants/TagTable.py for details */ 191 192 /* Low-level string matching, using the same simple logic: 193 - match has to be a string 194 - they only modify x (the current position in text) 195 */ 196 #define MATCH_ALLIN 11 197 #define MATCH_ALLNOTIN 12 198 #define MATCH_IS 13 199 #define MATCH_ISIN 14 200 #define MATCH_ISNOTIN 15 201 202 #define MATCH_WORD 21 203 #define MATCH_WORDSTART 22 204 #define MATCH_WORDEND 23 205 206 #define MATCH_ALLINSET 31 207 #define MATCH_ISINSET 32 208 209 #define MATCH_ALLINCHARSET 41 210 #define MATCH_ISINCHARSET 42 211 212 #define MATCH_MAX_LOWLEVEL 99 213 214 /* Jumps and other low-level special commands */ 215 216 #define MATCH_FAIL 100 217 #define MATCH_JUMP MATCH_FAIL 218 219 #define MATCH_EOF 101 220 #define MATCH_SKIP 102 221 #define MATCH_MOVE 103 222 223 #define MATCH_JUMPTARGET 104 224 225 #define MATCH_MAX_SPECIALS 199 226 227 /* Higher-level string matching */ 228 229 #define MATCH_SWORDSTART 211 230 #define MATCH_SWORDEND 212 231 #define MATCH_SFINDWORD 213 232 #define MATCH_NOWORD MATCH_SWORDSTART 233 234 /* Higher-level special commands */ 235 #define MATCH_CALL 201 236 #define MATCH_CALLARG 202 237 #define MATCH_TABLE 203 238 #define MATCH_SUBTABLE 207 239 #define MATCH_TABLEINLIST 204 240 #define MATCH_SUBTABLEINLIST 208 241 #define MATCH_LOOP 205 242 #define MATCH_LOOPCONTROL 206 243 244 /* Special argument integers */ 245 #define MATCH_JUMP_TO 0 246 #define MATCH_JUMP_MATCHOK 1000000 247 #define MATCH_JUMP_MATCHFAIL -1000000 248 #define MATCH_MOVE_EOF -1 249 #define MATCH_MOVE_BOF 0 250 #define MATCH_FAIL_HERE 1 251 #define MATCH_THISTABLE 999 252 #define MATCH_LOOPCONTROL_BREAK 0 253 #define MATCH_LOOPCONTROL_RESET -1 254 255 /* Flags set in cmd (>=256) */ 256 #define MATCH_CALLTAG (1 << 8) 257 #define MATCH_APPENDTAG (1 << 9) 258 #define MATCH_APPENDTAGOBJ (1 << 10) 259 #define MATCH_APPENDMATCH (1 << 11) 260 #define MATCH_LOOKAHEAD (1 << 12) 261 262 /* EOF */ 263 #ifdef __cplusplus 264 } 265 #endif 266 #endif 267