1 /* 2 ** $Id: lptypes.h,v 1.11 2015/03/04 16:38:00 roberto Exp $ 3 ** LPeg - PEG pattern matching for Lua 4 ** Copyright 2007-2014, Lua.org & PUC-Rio (see 'lpeg.html' for license) 5 ** written by Roberto Ierusalimschy 6 */ 7 8 /* 9 "Amalgamated" version for LuaTeX written by Scarso Luigi. 10 */ 11 12 13 #if !defined(lptypes_h) 14 #define lptypes_h 15 16 17 #if !defined(LPEG_DEBUG) 18 #define NDEBUG 19 #endif 20 21 #include <assert.h> 22 #include <limits.h> 23 #include <ctype.h> 24 #include <stdio.h> 25 #include <string.h> 26 27 #include "lauxlib.h" 28 #include "lua.h" 29 30 31 32 #define VERSION "0.12.2" 33 34 35 #define PATTERN_T "lpeg-pattern" 36 #define MAXSTACKIDX "lpeg-maxstack" 37 38 39 /* 40 ** compatibility with Lua 5.2 41 */ 42 #if (LUA_VERSION_NUM >= 502) 43 44 #undef lua_equal 45 #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ) 46 47 #undef lua_getfenv 48 #define lua_getfenv lua_getuservalue 49 #undef lua_setfenv 50 #define lua_setfenv lua_setuservalue 51 52 #undef lua_objlen 53 #define lua_objlen lua_rawlen 54 55 #undef luaL_register 56 #define luaL_register(L,n,f) \ 57 { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); } 58 59 #endif 60 61 62 /* default maximum size for call/backtrack stack */ 63 #if !defined(MAXBACK) 64 #define MAXBACK 100 65 #endif 66 67 68 /* maximum number of rules in a grammar */ 69 #if !defined(MAXRULES) 70 #define MAXRULES 1000 71 #endif 72 73 74 75 /* initial size for capture's list */ 76 #define INITCAPSIZE 32 77 78 79 /* index, on Lua stack, for subject */ 80 #define SUBJIDX 2 81 82 /* number of fixed arguments to 'match' (before capture arguments) */ 83 #define FIXEDARGS 3 84 85 /* index, on Lua stack, for capture list */ 86 #define caplistidx(ptop) ((ptop) + 2) 87 88 /* index, on Lua stack, for pattern's ktable */ 89 #define ktableidx(ptop) ((ptop) + 3) 90 91 /* index, on Lua stack, for backtracking stack */ 92 #define stackidx(ptop) ((ptop) + 4) 93 94 95 96 typedef unsigned char byte; 97 98 99 #define BITSPERCHAR 8 100 101 #define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1) 102 103 104 105 typedef struct Charset { 106 byte cs[CHARSETSIZE]; 107 } Charset; 108 109 110 111 #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} } 112 113 /* access to charset */ 114 #define treebuffer(t) ((byte *)((t) + 1)) 115 116 /* number of slots needed for 'n' bytes */ 117 #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1) 118 119 /* set 'b' bit in charset 'cs' */ 120 #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7))) 121 122 123 /* 124 ** in capture instructions, 'kind' of capture and its offset are 125 ** packed in field 'aux', 4 bits for each 126 */ 127 #define getkind(op) ((op)->i.aux & 0xF) 128 #define getoff(op) (((op)->i.aux >> 4) & 0xF) 129 #define joinkindoff(k,o) ((k) | ((o) << 4)) 130 131 #define MAXOFF 0xF 132 #define MAXAUX 0xFF 133 134 135 /* maximum number of bytes to look behind */ 136 #define MAXBEHIND MAXAUX 137 138 139 /* maximum size (in elements) for a pattern */ 140 #define MAXPATTSIZE (SHRT_MAX - 10) 141 142 143 /* size (in elements) for an instruction plus extra l bytes */ 144 #define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1) 145 146 147 /* size (in elements) for a ISet instruction */ 148 #define CHARSETINSTSIZE instsize(CHARSETSIZE) 149 150 /* size (in elements) for a IFunc instruction */ 151 #define funcinstsize(p) ((p)->i.aux + 2) 152 153 154 155 #define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7)))) 156 157 158 #endif 159 160 /* 161 ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $ 162 */ 163 164 #if !defined(lpcap_h) 165 #define lpcap_h 166 167 168 /* #include "lptypes.h"*/ 169 170 171 /* kinds of captures */ 172 typedef enum CapKind { 173 Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, 174 Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup 175 } CapKind; 176 177 178 typedef struct Capture { 179 const char *s; /* subject position */ 180 unsigned short idx; /* extra info (group name, arg index, etc.) */ 181 byte kind; /* kind of capture */ 182 byte siz; /* size of full capture + 1 (0 = not a full capture) */ 183 } Capture; 184 185 186 typedef struct CapState { 187 Capture *cap; /* current capture */ 188 Capture *ocap; /* (original) capture list */ 189 lua_State *L; 190 int ptop; /* index of last argument to 'match' */ 191 const char *s; /* original string */ 192 int valuecached; /* value stored in cache slot */ 193 } CapState; 194 195 196 int runtimecap (CapState *cs, Capture *close, const char *s, int *rem); 197 int getcaptures (lua_State *L, const char *s, const char *r, int ptop); 198 int finddyncap (Capture *cap, Capture *last); 199 200 #endif 201 202 203 /* 204 ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ 205 */ 206 207 #if !defined(lptree_h) 208 #define lptree_h 209 210 211 /* #include "lptypes.h" */ 212 213 214 /* 215 ** types of trees 216 */ 217 typedef enum TTag { 218 TChar = 0, TSet, TAny, /* standard PEG elements */ 219 TTrue, TFalse, 220 TRep, 221 TSeq, TChoice, 222 TNot, TAnd, 223 TCall, 224 TOpenCall, 225 TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */ 226 TGrammar, /* sib1 is initial (and first) rule */ 227 TBehind, /* match behind */ 228 TCapture, /* regular capture */ 229 TRunTime /* run-time capture */ 230 } TTag; 231 232 /* number of siblings for each tree */ 233 extern const byte numsiblings[]; 234 235 236 /* 237 ** Tree trees 238 ** The first sibling of a tree (if there is one) is immediately after 239 ** the tree. A reference to a second sibling (ps) is its position 240 ** relative to the position of the tree itself. A key in ktable 241 ** uses the (unique) address of the original tree that created that 242 ** entry. NULL means no data. 243 */ 244 typedef struct TTree { 245 byte tag; 246 byte cap; /* kind of capture (if it is a capture) */ 247 unsigned short key; /* key in ktable for Lua data (0 if no key) */ 248 union { 249 int ps; /* occasional second sibling */ 250 int n; /* occasional counter */ 251 } u; 252 } TTree; 253 254 255 /* 256 ** A complete pattern has its tree plus, if already compiled, 257 ** its corresponding code 258 */ 259 typedef struct Pattern { 260 union Instruction *code; 261 int codesize; 262 TTree tree[1]; 263 } Pattern; 264 265 266 /* number of siblings for each tree */ 267 extern const byte numsiblings[]; 268 269 /* access to siblings */ 270 #define sib1(t) ((t) + 1) 271 #define sib2(t) ((t) + (t)->u.ps) 272 273 274 275 276 277 278 #endif 279 280 /* 281 ** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $ 282 */ 283 284 #if !defined(lpvm_h) 285 #define lpvm_h 286 287 /* #include "lpcap.h"*/ 288 289 290 /* Virtual Machine's instructions */ 291 typedef enum Opcode { 292 IAny, /* if no char, fail */ 293 IChar, /* if char != aux, fail */ 294 ISet, /* if char not in buff, fail */ 295 ITestAny, /* in no char, jump to 'offset' */ 296 ITestChar, /* if char != aux, jump to 'offset' */ 297 ITestSet, /* if char not in buff, jump to 'offset' */ 298 ISpan, /* read a span of chars in buff */ 299 IBehind, /* walk back 'aux' characters (fail if not possible) */ 300 IRet, /* return from a rule */ 301 IEnd, /* end of pattern */ 302 IChoice, /* stack a choice; next fail will jump to 'offset' */ 303 IJmp, /* jump to 'offset' */ 304 ICall, /* call rule at 'offset' */ 305 IOpenCall, /* call rule number 'key' (must be closed to a ICall) */ 306 ICommit, /* pop choice and jump to 'offset' */ 307 IPartialCommit, /* update top choice to current position and jump */ 308 IBackCommit, /* "fails" but jump to its own 'offset' */ 309 IFailTwice, /* pop one choice and then fail */ 310 IFail, /* go back to saved state on choice and jump to saved offset */ 311 IGiveup, /* internal use */ 312 IFullCapture, /* complete capture of last 'off' chars */ 313 IOpenCapture, /* start a capture */ 314 ICloseCapture, 315 ICloseRunTime 316 } Opcode; 317 318 319 320 typedef union Instruction { 321 struct Inst { 322 byte code; 323 byte aux; 324 short key; 325 } i; 326 int offset; 327 byte buff[1]; 328 } Instruction; 329 330 331 void printpatt (Instruction *p, int n); 332 const char *match (lua_State *L, const char *o, const char *s, const char *e, 333 Instruction *op, Capture *capture, int ptop); 334 335 336 #endif 337 338 /* 339 ** $Id: lpcode.h,v 1.6 2013/11/28 14:56:02 roberto Exp $ 340 */ 341 342 #if !defined(lpcode_h) 343 #define lpcode_h 344 345 /* #include "lua.h"*/ 346 347 /* #include "lptypes.h"*/ 348 /* #include "lptree.h"*/ 349 /* #include "lpvm.h"*/ 350 351 int tocharset (TTree *tree, Charset *cs); 352 int checkaux (TTree *tree, int pred); 353 int fixedlenx (TTree *tree, int count, int len); 354 int hascaptures (TTree *tree); 355 int lp_gc (lua_State *L); 356 Instruction *compile (lua_State *L, Pattern *p); 357 void realloccode (lua_State *L, Pattern *p, int nsize); 358 int sizei (const Instruction *i); 359 360 361 #define PEnullable 0 362 #define PEnofail 1 363 364 #define nofail(t) checkaux(t, PEnofail) 365 #define nullable(t) checkaux(t, PEnullable) 366 367 #define fixedlen(t) fixedlenx(t, 0, 0) 368 369 370 371 #endif 372 /* 373 ** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $ 374 */ 375 376 377 #if !defined(lpprint_h) 378 #define lpprint_h 379 380 381 /* #include "lptree.h"*/ 382 /* #include "lpvm.h"*/ 383 384 385 #if defined(LPEG_DEBUG) 386 387 void printpatt (Instruction *p, int n); 388 void printtree (TTree *tree, int ident); 389 void printktable (lua_State *L, int idx); 390 void printcharset (const byte *st); 391 void printcaplist (Capture *cap, Capture *limit); 392 393 #else 394 395 #define printktable(L,idx) \ 396 luaL_error(L, "function only implemented in debug mode") 397 #define printtree(tree,i) \ 398 luaL_error(L, "function only implemented in debug mode") 399 #define printpatt(p,n) \ 400 luaL_error(L, "function only implemented in debug mode") 401 402 #endif 403 404 405 #endif 406 407