/* Bacula(R) - The Network Backup Solution Copyright (C) 2000-2020 Kern Sibbald The original author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. You may use this file and others of this release according to the license defined in the LICENSE file, which includes the Affero General Public License, v3.0 ("AGPLv3") and some additional permissions and terms pursuant to its AGPLv3 Section 7. This notice must be preserved when any source code is conveyed and/or propagated. Bacula(R) is a registered trademark of Kern Sibbald. */ /* regexpr.c * * Author: Tatu Ylonen * * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland * * Permission to use, copy, modify, distribute, and sell this software * and its documentation for any purpose is hereby granted without * fee, provided that the above copyright notice appear in all copies. * This software is provided "as is" without express or implied * warranty. * * Created: Thu Sep 26 17:14:05 1991 ylo * Last modified: Mon Nov 4 17:06:48 1991 ylo * Ported to Think C: 19 Jan 1992 guido@cwi.nl * * This code draws many ideas from the regular expression packages by * Henry Spencer of the University of Toronto and Richard Stallman of * the Free Software Foundation. * * Emacs-specific code and syntax table code is almost directly borrowed * from GNU regexp. * * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and * probably one or two others that I'm forgetting. * * This file modified to work with Bacula and C++ by * Kern Sibbald, April 2006 * * This file modified to work with REG_ICASE and Bacula by * Eric Bollengier April 2007 */ #include "bacula.h" #include "bregex.h" #define set_error(x) bufp->errmsg=((char *)(x)) #define got_error bufp->errmsg!=NULL /* The original code blithely assumed that sizeof(short) == 2. Not * always true. Original instances of "(short)x" were replaced by * SHORT(x), where SHORT is #defined below. */ #define SHORT(x) ((x) & 0x8000 ? (x) - 0x10000 : (x)) /* The stack implementation is taken from an idea by Andrew Kuchling. * It's a doubly linked list of arrays. The advantages of this over a * simple linked list are that the number of mallocs required are * reduced. It also makes it possible to statically allocate enough * space so that small patterns don't ever need to call malloc. * * The advantages over a single array is that is periodically * realloced when more space is needed is that we avoid ever copying * the stack. */ /* item_t is the basic stack element. Defined as a union of * structures so that both registers, failure points, and counters can * be pushed/popped from the stack. There's nothing built into the * item to keep track of whether a certain stack item is a register, a * failure point, or a counter. */ typedef union item_t { struct { int num; int level; unsigned char *start; unsigned char *end; } reg; struct { int count; int level; int phantom; unsigned char *code; unsigned char *text; } fail; struct { int num; int level; int count; } cntr; } item_t; #define B_STACK_PAGE_SIZE 256 #define NUM_REGISTERS 256 /* A 'page' of stack items. */ typedef struct item_page_t { item_t items[B_STACK_PAGE_SIZE]; struct item_page_t *prev; struct item_page_t *next; } item_page_t; typedef struct match_state { /* The number of registers that have been pushed onto the stack * since the last failure point. */ int count; /* Used to control when registers need to be pushed onto the * stack. */ int level; /* The number of failure points on the stack. */ int point; /* Storage for the registers. Each register consists of two * pointers to characters. So register N is represented as * start[N] and end[N]. The pointers must be converted to * offsets from the beginning of the string before returning the * registers to the calling program. */ unsigned char *start[NUM_REGISTERS]; unsigned char *end[NUM_REGISTERS]; /* Keeps track of whether a register has changed recently. */ int changed[NUM_REGISTERS]; /* Structure to encapsulate the stack. */ struct { /* index into the current page. If index == 0 and you need * to pop an item, move to the previous page and set index * = B_STACK_PAGE_SIZE - 1. Otherwise decrement index to * push a page. If index == B_STACK_PAGE_SIZE and you need * to push a page move to the next page and set index = * 0. If there is no new next page, allocate a new page * and link it in. Otherwise, increment index to push a * page. */ int index; item_page_t *current; /* Pointer to the current page. */ item_page_t first; /* First page is statically allocated. */ } stack; } match_state; /* Initialize a state object */ /* #define NEW_STATE(state) \ */ /* memset(&state, 0, (void *)(&state.stack) - (void *)(&state)); \ */ /* state.stack.current = &state.stack.first; \ */ /* state.stack.first.prev = NULL; \ */ /* state.stack.first.next = NULL; \ */ /* state.stack.index = 0; \ */ /* state.level = 1 */ #define NEW_STATE(state, nregs) \ { \ int i; \ for (i = 0; i < nregs; i++) \ { \ state.start[i] = NULL; \ state.end[i] = NULL; \ state.changed[i] = 0; \ } \ state.stack.current = &state.stack.first; \ state.stack.first.prev = NULL; \ state.stack.first.next = NULL; \ state.stack.index = 0; \ state.level = 1; \ state.count = 0; \ state.level = 0; \ state.point = 0; \ } /* Free any memory that might have been malloc'd */ #define FREE_STATE(state) \ while(state.stack.first.next != NULL) \ { \ state.stack.current = state.stack.first.next; \ state.stack.first.next = state.stack.current->next; \ free(state.stack.current); \ } /* Discard the top 'count' stack items. */ #define STACK_DISCARD(stack, count, on_error) \ stack.index -= count; \ while (stack.index < 0) \ { \ if (stack.current->prev == NULL) \ on_error; \ stack.current = stack.current->prev; \ stack.index += B_STACK_PAGE_SIZE; \ } /* Store a pointer to the previous item on the stack. Used to pop an * item off of the stack. */ #define STACK_PREV(stack, top, on_error) \ if (stack.index == 0) \ { \ if (stack.current->prev == NULL) \ on_error; \ stack.current = stack.current->prev; \ stack.index = B_STACK_PAGE_SIZE - 1; \ } \ else \ { \ stack.index--; \ } \ top = &(stack.current->items[stack.index]) /* Store a pointer to the next item on the stack. Used to push an item * on to the stack. */ #define STACK_NEXT(stack, top, on_error) \ if (stack.index == B_STACK_PAGE_SIZE) \ { \ if (stack.current->next == NULL) \ { \ stack.current->next = (item_page_t *)malloc(sizeof(item_page_t)); \ if (stack.current->next == NULL) \ on_error; \ stack.current->next->prev = stack.current; \ stack.current->next->next = NULL; \ } \ stack.current = stack.current->next; \ stack.index = 0; \ } \ top = &(stack.current->items[stack.index++]) /* Store a pointer to the item that is 'count' items back in the * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to * STACK_TOP(stack, top, on_error). */ #define STACK_BACK(stack, top, count, on_error) \ { \ int index; \ item_page_t *current; \ current = stack.current; \ index = stack.index - (count); \ while (index < 0) \ { \ if (current->prev == NULL) \ on_error; \ current = current->prev; \ index += B_STACK_PAGE_SIZE; \ } \ top = &(current->items[index]); \ } /* Store a pointer to the top item on the stack. Execute the * 'on_error' code if there are no items on the stack. */ #define STACK_TOP(stack, top, on_error) \ if (stack.index == 0) \ { \ if (stack.current->prev == NULL) \ on_error; \ top = &(stack.current->prev->items[B_STACK_PAGE_SIZE - 1]); \ } \ else \ { \ top = &(stack.current->items[stack.index - 1]); \ } /* Test to see if the stack is empty */ #define STACK_EMPTY(stack) ((stack.index == 0) && \ (stack.current->prev == NULL)) /* Return the start of register 'reg' */ #define GET_REG_START(state, reg) (state.start[reg]) /* Return the end of register 'reg' */ #define GET_REG_END(state, reg) (state.end[reg]) /* Set the start of register 'reg'. If the state of the register needs * saving, push it on the stack. */ #define SET_REG_START(state, reg, text, on_error) \ if(state.changed[reg] < state.level) \ { \ item_t *item; \ STACK_NEXT(state.stack, item, on_error); \ item->reg.num = reg; \ item->reg.start = state.start[reg]; \ item->reg.end = state.end[reg]; \ item->reg.level = state.changed[reg]; \ state.changed[reg] = state.level; \ state.count++; \ } \ state.start[reg] = text /* Set the end of register 'reg'. If the state of the register needs * saving, push it on the stack. */ #define SET_REG_END(state, reg, text, on_error) \ if(state.changed[reg] < state.level) \ { \ item_t *item; \ STACK_NEXT(state.stack, item, on_error); \ item->reg.num = reg; \ item->reg.start = state.start[reg]; \ item->reg.end = state.end[reg]; \ item->reg.level = state.changed[reg]; \ state.changed[reg] = state.level; \ state.count++; \ } \ state.end[reg] = text #define PUSH_FAILURE(state, xcode, xtext, on_error) \ { \ item_t *item; \ STACK_NEXT(state.stack, item, on_error); \ item->fail.code = xcode; \ item->fail.text = xtext; \ item->fail.count = state.count; \ item->fail.level = state.level; \ item->fail.phantom = 0; \ state.count = 0; \ state.level++; \ state.point++; \ } /* Update the last failure point with a new position in the text. */ #define UPDATE_FAILURE(state, xtext, on_error) \ { \ item_t *item; \ STACK_BACK(state.stack, item, state.count + 1, on_error); \ if (!item->fail.phantom) \ { \ item_t *item2; \ STACK_NEXT(state.stack, item2, on_error); \ item2->fail.code = item->fail.code; \ item2->fail.text = xtext; \ item2->fail.count = state.count; \ item2->fail.level = state.level; \ item2->fail.phantom = 1; \ state.count = 0; \ state.level++; \ state.point++; \ } \ else \ { \ STACK_DISCARD(state.stack, state.count, on_error); \ STACK_TOP(state.stack, item, on_error); \ item->fail.text = xtext; \ state.count = 0; \ state.level++; \ } \ } #define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \ { \ item_t *item; \ do \ { \ while(state.count > 0) \ { \ STACK_PREV(state.stack, item, on_error); \ state.start[item->reg.num] = item->reg.start; \ state.end[item->reg.num] = item->reg.end; \ state.changed[item->reg.num] = item->reg.level; \ state.count--; \ } \ STACK_PREV(state.stack, item, on_empty); \ xcode = item->fail.code; \ xtext = item->fail.text; \ state.count = item->fail.count; \ state.level = item->fail.level; \ state.point--; \ } \ while (item->fail.text == NULL); \ } enum regexp_compiled_ops { /* opcodes for compiled regexp */ Cend, /* end of pattern reached */ Cbol, /* beginning of line */ Ceol, /* end of line */ Cset, /* character set. Followed by 32 bytes of set. */ Cexact, /* followed by a byte to match */ Canychar, /* matches any character except newline */ Cstart_memory, /* set register start addr (followed by reg number) */ Cend_memory, /* set register end addr (followed by reg number) */ Cmatch_memory, /* match a duplicate of reg contents (regnum follows) */ Cjump, /* followed by two bytes (lsb,msb) of displacement. */ Cstar_jump, /* will change to jump/update_failure_jump at runtime */ Cfailure_jump, /* jump to addr on failure */ Cupdate_failure_jump, /* update topmost failure point and jump */ Cdummy_failure_jump, /* push a dummy failure point and jump */ Cbegbuf, /* match at beginning of buffer */ Cendbuf, /* match at end of buffer */ Cwordbeg, /* match at beginning of word */ Cwordend, /* match at end of word */ Cwordbound, /* match if at word boundary */ Cnotwordbound, /* match if not at word boundary */ Csyntaxspec, /* matches syntax code (1 byte follows) */ Cnotsyntaxspec, /* matches if syntax code does not match (1 byte follows) */ Crepeat1 }; enum regexp_syntax_op { /* syntax codes for plain and quoted characters */ Rend, /* special code for end of regexp */ Rnormal, /* normal character */ Ranychar, /* any character except newline */ Rquote, /* the quote character */ Rbol, /* match beginning of line */ Reol, /* match end of line */ Roptional, /* match preceding expression optionally */ Rstar, /* match preceding expr zero or more times */ Rplus, /* match preceding expr one or more times */ Ror, /* match either of alternatives */ Ropenpar, /* opening parenthesis */ Rclosepar, /* closing parenthesis */ Rmemory, /* match memory register */ Rextended_memory, /* \vnn to match registers 10-99 */ Ropenset, /* open set. Internal syntax hard-coded below. */ /* the following are gnu extensions to "normal" regexp syntax */ Rbegbuf, /* beginning of buffer */ Rendbuf, /* end of buffer */ Rwordchar, /* word character */ Rnotwordchar, /* not word character */ Rwordbeg, /* beginning of word */ Rwordend, /* end of word */ Rwordbound, /* word bound */ Rnotwordbound, /* not word bound */ Rnum_ops }; static int re_compile_initialized = 0; static int regexp_syntax = RE_SYNTAX_EGREP; int re_syntax = RE_SYNTAX_EGREP; /* Exported copy of regexp_syntax */ static unsigned char plain_ops[256]; static unsigned char quoted_ops[256]; static unsigned char precedences[Rnum_ops]; static int regexp_context_indep_ops; static int regexp_ansi_sequences; #define NUM_LEVELS 5 /* number of precedence levels in use */ #define MAX_NESTING 100 /* max nesting level of operators */ #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] unsigned char re_syntax_table[256]; void re_compile_initialize(void) { int a; static int syntax_table_inited = 0; if (!syntax_table_inited) { syntax_table_inited = 1; memset(re_syntax_table, 0, 256); for (a = 'a'; a <= 'z'; a++) re_syntax_table[a] = Sword; for (a = 'A'; a <= 'Z'; a++) re_syntax_table[a] = Sword; for (a = '0'; a <= '9'; a++) re_syntax_table[a] = Sword | Sdigit | Shexdigit; for (a = '0'; a <= '7'; a++) re_syntax_table[a] |= Soctaldigit; for (a = 'A'; a <= 'F'; a++) re_syntax_table[a] |= Shexdigit; for (a = 'a'; a <= 'f'; a++) re_syntax_table[a] |= Shexdigit; re_syntax_table[(int)'_'] = Sword; for (a = 9; a <= 13; a++) re_syntax_table[a] = Swhitespace; re_syntax_table[(int)' '] = Swhitespace; } re_compile_initialized = 1; for (a = 0; a < 256; a++) { plain_ops[a] = Rnormal; quoted_ops[a] = Rnormal; } for (a = '0'; a <= '9'; a++) quoted_ops[a] = Rmemory; plain_ops[(int)'\134'] = Rquote; if (regexp_syntax & RE_NO_BK_PARENS) { plain_ops[(int)'('] = Ropenpar; plain_ops[(int)')'] = Rclosepar; } else { quoted_ops[(int)'('] = Ropenpar; quoted_ops[(int)')'] = Rclosepar; } if (regexp_syntax & RE_NO_BK_VBAR) { plain_ops[(int)'\174'] = Ror; } else { quoted_ops[(int)'\174'] = Ror; } plain_ops[(int)'*'] = Rstar; if (regexp_syntax & RE_BK_PLUS_QM) { quoted_ops[(int)'+'] = Rplus; quoted_ops[(int)'?'] = Roptional; } else { plain_ops[(int)'+'] = Rplus; plain_ops[(int)'?'] = Roptional; } if (regexp_syntax & RE_NEWLINE_OR) { plain_ops[(int)'\n'] = Ror; } plain_ops[(int)'\133'] = Ropenset; plain_ops[(int)'\136'] = Rbol; plain_ops[(int)'$'] = Reol; plain_ops[(int)'.'] = Ranychar; if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) { quoted_ops[(int)'w'] = Rwordchar; quoted_ops[(int)'W'] = Rnotwordchar; quoted_ops[(int)'<'] = Rwordbeg; quoted_ops[(int)'>'] = Rwordend; quoted_ops[(int)'b'] = Rwordbound; quoted_ops[(int)'B'] = Rnotwordbound; quoted_ops[(int)'`'] = Rbegbuf; quoted_ops[(int)'\''] = Rendbuf; } if (regexp_syntax & RE_ANSI_HEX) { quoted_ops[(int)'v'] = Rextended_memory; } for (a = 0; a < Rnum_ops; a++) { precedences[a] = 4; } if (regexp_syntax & RE_TIGHT_VBAR) { precedences[Ror] = 3; precedences[Rbol] = 2; precedences[Reol] = 2; } else { precedences[Ror] = 2; precedences[Rbol] = 3; precedences[Reol] = 3; } precedences[Rclosepar] = 1; precedences[Rend] = 0; regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; } int re_set_syntax(int syntax) { int ret; ret = regexp_syntax; regexp_syntax = syntax; re_syntax = syntax; /* Exported copy */ re_compile_initialize(); return ret; } static int hex_char_to_decimal(int ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; return 16; } static void re_compile_fastmap_aux(regex_t * bufp, unsigned char *code, int pos, unsigned char *visited, unsigned char *can_be_null, unsigned char *fastmap) { int a; int b; int syntaxcode; if (visited[pos]) return; /* we have already been here */ visited[pos] = 1; for (;;) { switch (code[pos++]) { case Cend: *can_be_null = 1; return; case Cbol: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound: for (a = 0; a < 256; a++) fastmap[a] = 1; break; case Csyntaxspec: syntaxcode = code[pos++]; for (a = 0; a < 256; a++) if (SYNTAX(a) & syntaxcode) fastmap[a] = 1; return; case Cnotsyntaxspec: syntaxcode = code[pos++]; for (a = 0; a < 256; a++) if (!(SYNTAX(a) & syntaxcode)) fastmap[a] = 1; return; case Ceol: fastmap[(int)'\n'] = 1; if (*can_be_null == 0) *can_be_null = 2; /* can match null, but only at end of buffer */ return; case Cset: for (a = 0; a < 256 / 8; a++) if (code[pos + a] != 0) for (b = 0; b < 8; b++) if (code[pos + a] & (1 << b)) fastmap[(a << 3) + b] = 1; pos += 256 / 8; return; case Cexact: fastmap[(unsigned char)code[pos]] = 1; return; case Canychar: for (a = 0; a < 256; a++) if (a != '\n') fastmap[a] = 1; return; case Cstart_memory: case Cend_memory: pos++; break; case Cmatch_memory: for (a = 0; a < 256; a++) fastmap[a] = 1; *can_be_null = 1; return; case Cjump: case Cdummy_failure_jump: case Cupdate_failure_jump: case Cstar_jump: a = (unsigned char)code[pos++]; a |= (unsigned char)code[pos++] << 8; pos += (int)SHORT(a); if (visited[pos]) { /* argh... the regexp contains empty loops. This is not good, as this may cause a failure stack overflow when matching. Oh well. */ /* this path leads nowhere; pursue other paths. */ return; } visited[pos] = 1; break; case Cfailure_jump: a = (unsigned char)code[pos++]; a |= (unsigned char)code[pos++] << 8; a = pos + (int)SHORT(a); re_compile_fastmap_aux(bufp, code, a, visited, can_be_null, fastmap); break; case Crepeat1: pos += 2; break; default: set_error("Unknown regex opcode: memory corrupted?"); return; } } } static int re_do_compile_fastmap(regex_t * bufp, unsigned char *buffer, int used, int pos, unsigned char *can_be_null, unsigned char *fastmap) { unsigned char small_visited[512], *visited; if (used <= (int)sizeof(small_visited)) visited = small_visited; else { visited = (unsigned char *)malloc(used); if (!visited) return 0; } *can_be_null = 0; memset(fastmap, 0, 256); memset(visited, 0, used); re_compile_fastmap_aux(bufp, buffer, pos, visited, can_be_null, fastmap); if (visited != small_visited) free(visited); return 1; } void re_compile_fastmap(regex_t * bufp) { if (!bufp->fastmap || bufp->fastmap_accurate) return; // assert(bufp->used > 0); if (!re_do_compile_fastmap(bufp, bufp->buffer, bufp->used, 0, &bufp->can_be_null, bufp->fastmap)) return; if (got_error) return; if (bufp->buffer[0] == Cbol) bufp->anchor = 1; /* begline */ else if (bufp->buffer[0] == Cbegbuf) bufp->anchor = 2; /* begbuf */ else bufp->anchor = 0; /* none */ bufp->fastmap_accurate = 1; } /* * star is coded as: * 1: failure_jump 2 * ... code for operand of star * star_jump 1 * 2: ... code after star * * We change the star_jump to update_failure_jump if we can determine * that it is safe to do so; otherwise we change it to an ordinary * jump. * * plus is coded as * * jump 2 * 1: failure_jump 3 * 2: ... code for operand of plus * star_jump 1 * 3: ... code after plus * * For star_jump considerations this is processed identically to star. * */ static int re_optimize_star_jump(regex_t * bufp, unsigned char *code) { unsigned char map[256]; unsigned char can_be_null; unsigned char *p1; unsigned char *p2; unsigned char ch; int a; int b; int num_instructions = 0; a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)SHORT(a); p1 = code + a + 3; /* skip the failure_jump */ /* Check that the jump is within the pattern */ if (p1 < bufp->buffer || bufp->buffer + bufp->used < p1) { set_error("Regex VM jump out of bounds (failure_jump opt)"); return 0; } // assert(p1[-3] == Cfailure_jump); p2 = code; /* p1 points inside loop, p2 points to after loop */ if (!re_do_compile_fastmap(bufp, bufp->buffer, bufp->used, (int)(p2 - bufp->buffer), &can_be_null, map)) goto make_normal_jump; /* If we might introduce a new update point inside the * loop, we can't optimize because then update_jump would * update a wrong failure point. Thus we have to be * quite careful here. */ /* loop until we find something that consumes a character */ for (;;) { num_instructions++; switch (*p1++) { case Cbol: case Ceol: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound: continue; case Cstart_memory: case Cend_memory: p1++; continue; case Cexact: ch = (unsigned char)*p1++; if (map[(int)ch]) goto make_normal_jump; break; case Canychar: for (b = 0; b < 256; b++) if (b != '\n' && map[b]) goto make_normal_jump; break; case Cset: for (b = 0; b < 256; b++) if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) goto make_normal_jump; p1 += 256 / 8; break; default: goto make_normal_jump; } break; } /* now we know that we can't backtrack. */ while (p1 != p2 - 3) { num_instructions++; switch (*p1++) { case Cend: return 0; case Cbol: case Ceol: case Canychar: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound: break; case Cset: p1 += 256 / 8; break; case Cexact: case Cstart_memory: case Cend_memory: case Cmatch_memory: case Csyntaxspec: case Cnotsyntaxspec: p1++; break; case Cjump: case Cstar_jump: case Cfailure_jump: case Cupdate_failure_jump: case Cdummy_failure_jump: goto make_normal_jump; default: return 0; } } /* make_update_jump: */ code -= 3; a += 3; /* jump to after the Cfailure_jump */ code[0] = Cupdate_failure_jump; code[1] = a & 0xff; code[2] = a >> 8; if (num_instructions > 1) return 1; // assert(num_instructions == 1); /* if the only instruction matches a single character, we can do * better */ p1 = code + 3 + a; /* start of sole instruction */ if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar || *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec) code[0] = Crepeat1; return 1; make_normal_jump: code -= 3; *code = Cjump; return 1; } static int re_optimize(regex_t * bufp) { unsigned char *code; code = bufp->buffer; while (1) { switch (*code++) { case Cend: return 1; case Canychar: case Cbol: case Ceol: case Cbegbuf: case Cendbuf: case Cwordbeg: case Cwordend: case Cwordbound: case Cnotwordbound: break; case Cset: code += 256 / 8; break; case Cexact: case Cstart_memory: case Cend_memory: case Cmatch_memory: case Csyntaxspec: case Cnotsyntaxspec: code++; break; case Cstar_jump: if (!re_optimize_star_jump(bufp, code)) { return 0; } /* fall through */ case Cupdate_failure_jump: case Cjump: case Cdummy_failure_jump: case Cfailure_jump: case Crepeat1: code += 2; break; default: return 0; } } } #define NEXTCHAR(var) \ { \ if (pos >= size) \ goto ends_prematurely; \ (var) = regex[pos]; \ pos++; \ } #define ALLOC(amount) \ { \ if (pattern_offset+(amount) > alloc) \ { \ alloc += 256 + (amount); \ pattern = (unsigned char *)realloc(pattern, alloc); \ if (!pattern) \ goto out_of_memory; \ } \ } #define STORE(ch) pattern[pattern_offset++] = (ch) #define CURRENT_LEVEL_START (starts[starts_base + current_level]) #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset #define PUSH_LEVEL_STARTS \ if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ starts_base += NUM_LEVELS; \ else \ goto too_complex \ #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS #define PUT_ADDR(offset,addr) \ { \ int disp = (addr) - (offset) - 2; \ pattern[(offset)] = disp & 0xff; \ pattern[(offset)+1] = (disp>>8) & 0xff; \ } #define INSERT_JUMP(pos,type,addr) \ { \ int a, p = (pos), t = (type), ad = (addr); \ for (a = pattern_offset - 1; a >= p; a--) \ pattern[a + 3] = pattern[a]; \ pattern[p] = t; \ PUT_ADDR(p+1,ad); \ pattern_offset += 3; \ } #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7)) #define SET_FIELDS \ { \ bufp->allocated = alloc; \ bufp->buffer = pattern; \ bufp->used = pattern_offset; \ } #define GETHEX(var) \ { \ unsigned char gethex_ch, gethex_value; \ NEXTCHAR(gethex_ch); \ gethex_value = hex_char_to_decimal(gethex_ch); \ if (gethex_value == 16) \ goto hex_error; \ NEXTCHAR(gethex_ch); \ gethex_ch = hex_char_to_decimal(gethex_ch); \ if (gethex_ch == 16) \ goto hex_error; \ (var) = gethex_value * 16 + gethex_ch; \ } #define ANSI_TRANSLATE(ch) \ { \ switch (ch) \ { \ case 'a': \ case 'A': \ { \ ch = 7; /* audible bell */ \ break; \ } \ case 'b': \ case 'B': \ { \ ch = 8; /* backspace */ \ break; \ } \ case 'f': \ case 'F': \ { \ ch = 12; /* form feed */ \ break; \ } \ case 'n': \ case 'N': \ { \ ch = 10; /* line feed */ \ break; \ } \ case 'r': \ case 'R': \ { \ ch = 13; /* carriage return */ \ break; \ } \ case 't': \ case 'T': \ { \ ch = 9; /* tab */ \ break; \ } \ case 'v': \ case 'V': \ { \ ch = 11; /* vertical tab */ \ break; \ } \ case 'x': /* hex code */ \ case 'X': \ { \ GETHEX(ch); \ break; \ } \ default: \ { \ /* other characters passed through */ \ if (translate) \ ch = translate[(unsigned char)ch]; \ break; \ } \ } \ } const char *re_compile_pattern(regex_t * bufp, unsigned char *regex) { int a; int pos; int op; int current_level; int level; int opcode; int pattern_offset = 0, alloc; int starts[NUM_LEVELS * MAX_NESTING]; int starts_base; int future_jumps[MAX_NESTING]; int num_jumps; unsigned char ch = '\0'; unsigned char *pattern; unsigned char *translate; int next_register; int paren_depth; int num_open_registers; int open_registers[RE_NREGS]; int beginning_context; int size = strlen((char *)regex); if (!re_compile_initialized) re_compile_initialize(); bufp->used = 0; bufp->fastmap_accurate = 0; bufp->uses_registers = 1; bufp->num_registers = 1; translate = bufp->translate; pattern = bufp->buffer; alloc = bufp->allocated; if (alloc == 0 || pattern == NULL) { alloc = 256; bufp->buffer = pattern = (unsigned char *)malloc(alloc); if (!pattern) goto out_of_memory; } pattern_offset = 0; starts_base = 0; num_jumps = 0; current_level = 0; SET_LEVEL_START; num_open_registers = 0; next_register = 1; paren_depth = 0; beginning_context = 1; op = -1; /* we use Rend dummy to ensure that pending jumps are updated (due to low priority of Rend) before exiting the loop. */ pos = 0; while (op != Rend) { if (pos >= size) op = Rend; else { NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; op = plain_ops[(unsigned char)ch]; if (op == Rquote) { NEXTCHAR(ch); op = quoted_ops[(unsigned char)ch]; if (op == Rnormal && regexp_ansi_sequences) ANSI_TRANSLATE(ch); } } level = precedences[op]; /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n", ch, op, level, current_level, CURRENT_LEVEL_START); */ if (level > current_level) { for (current_level++; current_level < level; current_level++) SET_LEVEL_START; SET_LEVEL_START; } else if (level < current_level) { current_level = level; for (; num_jumps > 0 && future_jumps[num_jumps - 1] >= CURRENT_LEVEL_START; num_jumps--) PUT_ADDR(future_jumps[num_jumps - 1], pattern_offset); } switch (op) { case Rend: break; case Rnormal: normal_char: opcode = Cexact; store_opcode_and_arg: /* opcode & ch must be set */ SET_LEVEL_START; ALLOC(2); STORE(opcode); STORE(ch); break; case Ranychar: opcode = Canychar; store_opcode: SET_LEVEL_START; ALLOC(1); STORE(opcode); break; case Rquote: set_error("Rquote"); /*NOTREACHED*/ case Rbol: if (!beginning_context) { if (regexp_context_indep_ops) goto op_error; else goto normal_char; } opcode = Cbol; goto store_opcode; case Reol: if (!((pos >= size) || ((regexp_syntax & RE_NO_BK_VBAR) ? (regex[pos] == '\174') : (pos + 1 < size && regex[pos] == '\134' && regex[pos + 1] == '\174')) || ((regexp_syntax & RE_NO_BK_PARENS) ? (regex[pos] == ')') : (pos + 1 < size && regex[pos] == '\134' && regex[pos + 1] == ')')))) { if (regexp_context_indep_ops) goto op_error; else goto normal_char; } opcode = Ceol; goto store_opcode; /* NOTREACHED */ break; case Roptional: if (beginning_context) { if (regexp_context_indep_ops) goto op_error; else goto normal_char; } if (CURRENT_LEVEL_START == pattern_offset) break; /* ignore empty patterns for ? */ ALLOC(3); INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, pattern_offset + 3); break; case Rstar: case Rplus: if (beginning_context) { if (regexp_context_indep_ops) goto op_error; else goto normal_char; } if (CURRENT_LEVEL_START == pattern_offset) break; /* ignore empty patterns for + and * */ ALLOC(9); INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, pattern_offset + 6); INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); if (op == Rplus) /* jump over initial failure_jump */ INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, CURRENT_LEVEL_START + 6); break; case Ror: ALLOC(6); INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, pattern_offset + 6); if (num_jumps >= MAX_NESTING) goto too_complex; STORE(Cjump); future_jumps[num_jumps++] = pattern_offset; STORE(0); STORE(0); SET_LEVEL_START; break; case Ropenpar: { SET_LEVEL_START; if (next_register < RE_NREGS) { bufp->uses_registers = 1; ALLOC(2); STORE(Cstart_memory); STORE(next_register); open_registers[num_open_registers++] = next_register; bufp->num_registers++; next_register++; } paren_depth++; PUSH_LEVEL_STARTS; current_level = 0; SET_LEVEL_START; break; } case Rclosepar: if (paren_depth <= 0) goto parenthesis_error; POP_LEVEL_STARTS; current_level = precedences[Ropenpar]; paren_depth--; if (paren_depth < num_open_registers) { bufp->uses_registers = 1; ALLOC(2); STORE(Cend_memory); num_open_registers--; STORE(open_registers[num_open_registers]); } break; case Rmemory: if (ch == '0') goto bad_match_register; if (!(ch >= '0' && ch <= '9')) { goto bad_match_register; } bufp->uses_registers = 1; opcode = Cmatch_memory; ch -= '0'; goto store_opcode_and_arg; case Rextended_memory: NEXTCHAR(ch); if (ch < '0' || ch > '9') goto bad_match_register; NEXTCHAR(a); if (a < '0' || a > '9') goto bad_match_register; ch = 10 * (a - '0') + ch - '0'; if (ch == 0 || ch >= RE_NREGS) goto bad_match_register; bufp->uses_registers = 1; opcode = Cmatch_memory; goto store_opcode_and_arg; case Ropenset: { int complement; int prev; int offset; int range; int firstchar; SET_LEVEL_START; ALLOC(1 + 256 / 8); STORE(Cset); offset = pattern_offset; for (a = 0; a < 256 / 8; a++) STORE(0); NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; if (ch == '\136') { complement = 1; NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; } else complement = 0; prev = -1; range = 0; firstchar = 1; while (ch != '\135' || firstchar) { firstchar = 0; if (regexp_ansi_sequences && ch == '\134') { NEXTCHAR(ch); ANSI_TRANSLATE(ch); } if (range) { for (a = prev; a <= (int)ch; a++) SETBIT(pattern, offset, a); prev = -1; range = 0; } else if (prev != -1 && ch == '-') range = 1; else { SETBIT(pattern, offset, ch); prev = ch; } NEXTCHAR(ch); if (translate) ch = translate[(unsigned char)ch]; } if (range) SETBIT(pattern, offset, '-'); if (complement) { for (a = 0; a < 256 / 8; a++) pattern[offset + a] ^= 0xff; } break; } case Rbegbuf: { opcode = Cbegbuf; goto store_opcode; } case Rendbuf: { opcode = Cendbuf; goto store_opcode; } case Rwordchar: { opcode = Csyntaxspec; ch = Sword; goto store_opcode_and_arg; } case Rnotwordchar: { opcode = Cnotsyntaxspec; ch = Sword; goto store_opcode_and_arg; } case Rwordbeg: { opcode = Cwordbeg; goto store_opcode; } case Rwordend: { opcode = Cwordend; goto store_opcode; } case Rwordbound: { opcode = Cwordbound; goto store_opcode; } case Rnotwordbound: { opcode = Cnotwordbound; goto store_opcode; } default: { abort(); } } beginning_context = (op == Ropenpar || op == Ror); } if (starts_base != 0) goto parenthesis_error; // assert(num_jumps == 0); ALLOC(1); STORE(Cend); SET_FIELDS; if (!re_optimize(bufp)) return "Optimization error"; return NULL; op_error: SET_FIELDS; return "Badly placed special character"; bad_match_register: SET_FIELDS; return "Bad match register number"; hex_error: SET_FIELDS; return "Bad hexadecimal number"; parenthesis_error: SET_FIELDS; return "Badly placed parenthesis"; out_of_memory: SET_FIELDS; return "Out of memory"; ends_prematurely: SET_FIELDS; return "Regular expression ends prematurely"; too_complex: SET_FIELDS; return "Regular expression too complex"; } #undef CHARAT #undef NEXTCHAR #undef GETHEX #undef ALLOC #undef STORE #undef CURRENT_LEVEL_START #undef SET_LEVEL_START #undef PUSH_LEVEL_STARTS #undef POP_LEVEL_STARTS #undef PUT_ADDR #undef INSERT_JUMP #undef SETBIT #undef SET_FIELDS #define PREFETCH if (text == textend) goto fail #define NEXTCHAR(var) \ PREFETCH; \ var = (unsigned char)*text++; \ if (translate) \ var = translate[var] int regcomp(regex_t * bufp, const char *regex, int cflags) { memset(bufp, 0, sizeof(regex_t)); bufp->cflags = cflags; if (bufp->cflags & REG_ICASE) { char *p, *lcase = bstrdup(regex); for( p = lcase; *p ; p++) { *p = tolower(*p); } re_compile_pattern(bufp, (unsigned char *)lcase); bfree(lcase); } else { re_compile_pattern(bufp, (unsigned char *)regex); } if (got_error) { return -1; } return 0; } void re_registers_to_regmatch(regexp_registers_t old_regs, regmatch_t pmatch[], size_t nmatch) { size_t i=0; /* We have to set the last entry to -1 */ nmatch = nmatch - 1; for (i=0; (i < nmatch) && (old_regs->start[i] > -1) ; i++) { pmatch[i].rm_so = old_regs->start[i]; pmatch[i].rm_eo = old_regs->end[i]; } pmatch[i].rm_eo = pmatch[i].rm_so = -1; } int regexec(regex_t * preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { int stat; int len = strlen(string); struct re_registers regs; stat = re_search(preg, (unsigned char *)string, len, 0, len, ®s); if (stat >= 0 && nmatch > 0) { re_registers_to_regmatch(®s, pmatch, nmatch); } /* stat is the start position in the string base 0 where * the pattern was found or negative if not found. */ return stat < 0 ? -1 : 0; } size_t regerror(int errcode, regex_t * preg, char *errbuf, size_t errbuf_size) { bstrncpy(errbuf, preg->errmsg, errbuf_size); return 0; } void regfree(regex_t * preg) { if (preg->lcase) { free_pool_memory(preg->lcase); preg->lcase = NULL; } if (preg->buffer) { free(preg->buffer); preg->buffer = NULL; } } int re_match(regex_t * bufp, unsigned char *string, int size, int pos, regexp_registers_t old_regs) { unsigned char *code; unsigned char *translate; unsigned char *text; unsigned char *textstart; unsigned char *textend; int a; int b; int ch; int reg; int match_end; unsigned char *regstart; unsigned char *regend; int regsize; match_state state; // assert(pos >= 0 && size >= 0); // assert(pos <= size); text = string + pos; textstart = string; textend = string + size; code = bufp->buffer; translate = bufp->translate; NEW_STATE(state, bufp->num_registers); continue_matching: switch (*code++) { case Cend: { match_end = text - textstart; if (old_regs) { old_regs->start[0] = pos; old_regs->end[0] = match_end; if (!bufp->uses_registers) { for (a = 1; a < RE_NREGS; a++) { old_regs->start[a] = -1; old_regs->end[a] = -1; } } else { for (a = 1; a < bufp->num_registers; a++) { if ((GET_REG_START(state, a) == NULL) || (GET_REG_END(state, a) == NULL)) { old_regs->start[a] = -1; old_regs->end[a] = -1; continue; } old_regs->start[a] = GET_REG_START(state, a) - textstart; old_regs->end[a] = GET_REG_END(state, a) - textstart; } for (; a < RE_NREGS; a++) { old_regs->start[a] = -1; old_regs->end[a] = -1; } } } FREE_STATE(state); return match_end - pos; } case Cbol: { if (text == textstart || text[-1] == '\n') goto continue_matching; goto fail; } case Ceol: { if (text == textend || *text == '\n') goto continue_matching; goto fail; } case Cset: { NEXTCHAR(ch); if (code[ch / 8] & (1 << (ch & 7))) { code += 256 / 8; goto continue_matching; } goto fail; } case Cexact: { NEXTCHAR(ch); if (ch != (unsigned char)*code++) goto fail; goto continue_matching; } case Canychar: { NEXTCHAR(ch); if (ch == '\n') goto fail; goto continue_matching; } case Cstart_memory: { reg = *code++; SET_REG_START(state, reg, text, goto error); goto continue_matching; } case Cend_memory: { reg = *code++; SET_REG_END(state, reg, text, goto error); goto continue_matching; } case Cmatch_memory: { reg = *code++; regstart = GET_REG_START(state, reg); regend = GET_REG_END(state, reg); if ((regstart == NULL) || (regend == NULL)) goto fail; /* or should we just match nothing? */ regsize = regend - regstart; if (regsize > (textend - text)) goto fail; if (translate) { for (; regstart < regend; regstart++, text++) if (translate[*regstart] != translate[*text]) goto fail; } else for (; regstart < regend; regstart++, text++) if (*regstart != *text) goto fail; goto continue_matching; } case Cupdate_failure_jump: { UPDATE_FAILURE(state, text, goto error); /* fall to next case */ } /* treat Cstar_jump just like Cjump if it hasn't been optimized */ case Cstar_jump: case Cjump: { a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; code += (int)SHORT(a); if (code < bufp->buffer || bufp->buffer + bufp->used < code) { set_error("Regex VM jump out of bounds (Cjump)"); FREE_STATE(state); return -2; } goto continue_matching; } case Cdummy_failure_jump: { unsigned char *failuredest; a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)SHORT(a); // assert(*code == Cfailure_jump); b = (unsigned char)code[1]; b |= (unsigned char)code[2] << 8; failuredest = code + (int)SHORT(b) + 3; if (failuredest < bufp->buffer || bufp->buffer + bufp->used < failuredest) { set_error ("Regex VM jump out of bounds (Cdummy_failure_jump failuredest)"); FREE_STATE(state); return -2; } PUSH_FAILURE(state, failuredest, NULL, goto error); code += a; if (code < bufp->buffer || bufp->buffer + bufp->used < code) { set_error("Regex VM jump out of bounds (Cdummy_failure_jump code)"); FREE_STATE(state); return -2; } goto continue_matching; } case Cfailure_jump: { a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)SHORT(a); if (code + a < bufp->buffer || bufp->buffer + bufp->used < code + a) { set_error("Regex VM jump out of bounds (Cfailure_jump)"); FREE_STATE(state); return -2; } PUSH_FAILURE(state, code + a, text, goto error); goto continue_matching; } case Crepeat1: { unsigned char *pinst; a = (unsigned char)*code++; a |= (unsigned char)*code++ << 8; a = (int)SHORT(a); pinst = code + a; if (pinst < bufp->buffer || bufp->buffer + bufp->used < pinst) { set_error("Regex VM jump out of bounds (Crepeat1)"); FREE_STATE(state); return -2; } /* pinst is sole instruction in loop, and it matches a * single character. Since Crepeat1 was originally a * Cupdate_failure_jump, we also know that backtracking * is useless: so long as the single-character * expression matches, it must be used. Also, in the * case of +, we've already matched one character, so + * can't fail: nothing here can cause a failure. */ switch (*pinst++) { case Cset: { if (translate) { while (text < textend) { ch = translate[(unsigned char)*text]; if (pinst[ch / 8] & (1 << (ch & 7))) text++; else break; } } else { while (text < textend) { ch = (unsigned char)*text; if (pinst[ch / 8] & (1 << (ch & 7))) text++; else break; } } break; } case Cexact: { ch = (unsigned char)*pinst; if (translate) { while (text < textend && translate[(unsigned char)*text] == ch) text++; } else { while (text < textend && (unsigned char)*text == ch) text++; } break; } case Canychar: { while (text < textend && (unsigned char)*text != '\n') text++; break; } case Csyntaxspec: { a = (unsigned char)*pinst; if (translate) { while (text < textend && (SYNTAX(translate[*text]) & a)) text++; } else { while (text < textend && (SYNTAX(*text) & a)) text++; } break; } case Cnotsyntaxspec: { a = (unsigned char)*pinst; if (translate) { while (text < textend && !(SYNTAX(translate[*text]) & a)) text++; } else { while (text < textend && !(SYNTAX(*text) & a)) text++; } break; } default: { FREE_STATE(state); set_error("Unknown regex opcode: memory corrupted?"); return -2; /*NOTREACHED*/} } /* due to the funky way + and * are compiled, the top * failure- stack entry at this point is actually a * success entry -- update it & pop it */ UPDATE_FAILURE(state, text, goto error); goto fail; /* i.e., succeed */ } case Cbegbuf: { if (text == textstart) goto continue_matching; goto fail; } case Cendbuf: { if (text == textend) goto continue_matching; goto fail; } case Cwordbeg: { if (text == textend) goto fail; if (!(SYNTAX(*text) & Sword)) goto fail; if (text == textstart) goto continue_matching; if (!(SYNTAX(text[-1]) & Sword)) goto continue_matching; goto fail; } case Cwordend: { if (text == textstart) goto fail; if (!(SYNTAX(text[-1]) & Sword)) goto fail; if (text == textend) goto continue_matching; if (!(SYNTAX(*text) & Sword)) goto continue_matching; goto fail; } case Cwordbound: { /* Note: as in gnu regexp, this also matches at the * beginning and end of buffer. */ if (text == textstart || text == textend) goto continue_matching; if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)) goto continue_matching; goto fail; } case Cnotwordbound: { /* Note: as in gnu regexp, this never matches at the * beginning and end of buffer. */ if (text == textstart || text == textend) goto fail; if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))) goto continue_matching; goto fail; } case Csyntaxspec: { NEXTCHAR(ch); if (!(SYNTAX(ch) & (unsigned char)*code++)) goto fail; goto continue_matching; } case Cnotsyntaxspec: { NEXTCHAR(ch); if (SYNTAX(ch) & (unsigned char)*code++) goto fail; goto continue_matching; } default: { FREE_STATE(state); set_error("Unknown regex opcode: memory corrupted?"); return -2; /*NOTREACHED*/} } #if 0 /* This line is never reached --Guido */ abort(); #endif /* *NOTREACHED */ /* Using "break;" in the above switch statement is equivalent to "goto fail;" */ fail: POP_FAILURE(state, code, text, goto done_matching, goto error); goto continue_matching; done_matching: /* if(translated != NULL) */ /* free(translated); */ FREE_STATE(state); return -1; error: /* if (translated != NULL) */ /* free(translated); */ FREE_STATE(state); return -2; } #undef PREFETCH #undef NEXTCHAR int re_search(regex_t * bufp, unsigned char *str, int size, int pos, int range, regexp_registers_t regs) { unsigned char *fastmap; unsigned char *translate; unsigned char *text; unsigned char *partstart; unsigned char *partend; int dir; int ret; unsigned char anchor; unsigned char *string = str; if (bufp->cflags & REG_ICASE) { /* we must use string in lowercase */ int len = strlen((const char *)str); if (!bufp->lcase) { bufp->lcase = get_pool_memory(PM_FNAME); } bufp->lcase = check_pool_memory_size(bufp->lcase, len+1); unsigned char *dst = (unsigned char *)bufp->lcase; while (*string) { *dst++ = tolower(*string++); } *dst = '\0'; string = (unsigned char *)bufp->lcase; } // assert(size >= 0 && pos >= 0); // assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ fastmap = bufp->fastmap; translate = bufp->translate; if (fastmap && !bufp->fastmap_accurate) { re_compile_fastmap(bufp); if (got_error) return -2; } anchor = bufp->anchor; if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ fastmap = NULL; if (range < 0) { dir = -1; range = -range; } else dir = 1; if (anchor == 2) { if (pos != 0) return -1; else range = 0; } for (; range >= 0; range--, pos += dir) { if (fastmap) { if (dir == 1) { /* searching forwards */ text = string + pos; partend = string + size; partstart = text; if (translate) while (text != partend && !fastmap[(unsigned char)translate[(unsigned char)*text]]) text++; else while (text != partend && !fastmap[(unsigned char)*text]) text++; pos += text - partstart; range -= text - partstart; if (pos == size && bufp->can_be_null == 0) return -1; } else { /* searching backwards */ text = string + pos; partstart = string + pos - range; partend = text; if (translate) while (text != partstart && !fastmap[(unsigned char) translate[(unsigned char)*text]]) text--; else while (text != partstart && !fastmap[(unsigned char)*text]) text--; pos -= partend - text; range -= partend - text; } } if (anchor == 1) { /* anchored to begline */ if (pos > 0 && (string[pos - 1] != '\n')) continue; } // assert(pos >= 0 && pos <= size); ret = re_match(bufp, string, size, pos, regs); if (ret >= 0) return pos; if (ret == -2) return -2; } return -1; } /* ** Local Variables: ** mode: c ** c-file-style: "python" ** End: */