10b57cec5SDimitry Andric /*-
20b57cec5SDimitry Andric * This code is derived from OpenBSD's libc/regex, original license follows:
30b57cec5SDimitry Andric *
40b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994 Henry Spencer.
50b57cec5SDimitry Andric * Copyright (c) 1992, 1993, 1994
60b57cec5SDimitry Andric * The Regents of the University of California. All rights reserved.
70b57cec5SDimitry Andric *
80b57cec5SDimitry Andric * This code is derived from software contributed to Berkeley by
90b57cec5SDimitry Andric * Henry Spencer.
100b57cec5SDimitry Andric *
110b57cec5SDimitry Andric * Redistribution and use in source and binary forms, with or without
120b57cec5SDimitry Andric * modification, are permitted provided that the following conditions
130b57cec5SDimitry Andric * are met:
140b57cec5SDimitry Andric * 1. Redistributions of source code must retain the above copyright
150b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer.
160b57cec5SDimitry Andric * 2. Redistributions in binary form must reproduce the above copyright
170b57cec5SDimitry Andric * notice, this list of conditions and the following disclaimer in the
180b57cec5SDimitry Andric * documentation and/or other materials provided with the distribution.
190b57cec5SDimitry Andric * 3. Neither the name of the University nor the names of its contributors
200b57cec5SDimitry Andric * may be used to endorse or promote products derived from this software
210b57cec5SDimitry Andric * without specific prior written permission.
220b57cec5SDimitry Andric *
230b57cec5SDimitry Andric * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
240b57cec5SDimitry Andric * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
250b57cec5SDimitry Andric * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
260b57cec5SDimitry Andric * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
270b57cec5SDimitry Andric * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
280b57cec5SDimitry Andric * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
290b57cec5SDimitry Andric * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
300b57cec5SDimitry Andric * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
310b57cec5SDimitry Andric * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
320b57cec5SDimitry Andric * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
330b57cec5SDimitry Andric * SUCH DAMAGE.
340b57cec5SDimitry Andric *
350b57cec5SDimitry Andric * @(#)regexec.c 8.3 (Berkeley) 3/20/94
360b57cec5SDimitry Andric */
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric /*
390b57cec5SDimitry Andric * the outer shell of llvm_regexec()
400b57cec5SDimitry Andric *
410b57cec5SDimitry Andric * This file includes engine.inc *twice*, after muchos fiddling with the
420b57cec5SDimitry Andric * macros that code uses. This lets the same code operate on two different
430b57cec5SDimitry Andric * representations for state sets.
440b57cec5SDimitry Andric */
450b57cec5SDimitry Andric #include <sys/types.h>
460b57cec5SDimitry Andric #include <stdio.h>
470b57cec5SDimitry Andric #include <stdlib.h>
480b57cec5SDimitry Andric #include <string.h>
490b57cec5SDimitry Andric #include <limits.h>
500b57cec5SDimitry Andric #include <ctype.h>
510b57cec5SDimitry Andric #include "regex_impl.h"
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric #include "regutils.h"
540b57cec5SDimitry Andric #include "regex2.h"
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric /* macros for manipulating states, small version */
570b57cec5SDimitry Andric /* FIXME: 'states' is assumed as 'long' on small version. */
580b57cec5SDimitry Andric #define states1 long /* for later use in llvm_regexec() decision */
590b57cec5SDimitry Andric #define states states1
600b57cec5SDimitry Andric #define CLEAR(v) ((v) = 0)
610b57cec5SDimitry Andric #define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
620b57cec5SDimitry Andric #define SET1(v, n) ((v) |= (unsigned long)1 << (n))
630b57cec5SDimitry Andric #define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0)
640b57cec5SDimitry Andric #define ASSIGN(d, s) ((d) = (s))
650b57cec5SDimitry Andric #define EQ(a, b) ((a) == (b))
660b57cec5SDimitry Andric #define STATEVARS long dummy /* dummy version */
670b57cec5SDimitry Andric #define STATESETUP(m, n) /* nothing */
680b57cec5SDimitry Andric #define STATETEARDOWN(m) /* nothing */
690b57cec5SDimitry Andric #define SETUP(v) ((v) = 0)
700b57cec5SDimitry Andric #define onestate long
710b57cec5SDimitry Andric #define INIT(o, n) ((o) = (unsigned long)1 << (n))
720b57cec5SDimitry Andric #define INC(o) ((o) = (unsigned long)(o) << 1)
730b57cec5SDimitry Andric #define ISSTATEIN(v, o) (((v) & (o)) != 0)
740b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
750b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
760b57cec5SDimitry Andric #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n))
770b57cec5SDimitry Andric #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n))
780b57cec5SDimitry Andric #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0)
790b57cec5SDimitry Andric /* function names */
800b57cec5SDimitry Andric #define SNAMES /* engine.inc looks after details */
810b57cec5SDimitry Andric
820b57cec5SDimitry Andric #include "regengine.inc"
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric /* now undo things */
850b57cec5SDimitry Andric #undef states
860b57cec5SDimitry Andric #undef CLEAR
870b57cec5SDimitry Andric #undef SET0
880b57cec5SDimitry Andric #undef SET1
890b57cec5SDimitry Andric #undef ISSET
900b57cec5SDimitry Andric #undef ASSIGN
910b57cec5SDimitry Andric #undef EQ
920b57cec5SDimitry Andric #undef STATEVARS
930b57cec5SDimitry Andric #undef STATESETUP
940b57cec5SDimitry Andric #undef STATETEARDOWN
950b57cec5SDimitry Andric #undef SETUP
960b57cec5SDimitry Andric #undef onestate
970b57cec5SDimitry Andric #undef INIT
980b57cec5SDimitry Andric #undef INC
990b57cec5SDimitry Andric #undef ISSTATEIN
1000b57cec5SDimitry Andric #undef FWD
1010b57cec5SDimitry Andric #undef BACK
1020b57cec5SDimitry Andric #undef ISSETBACK
1030b57cec5SDimitry Andric #undef SNAMES
1040b57cec5SDimitry Andric
1050b57cec5SDimitry Andric /* macros for manipulating states, large version */
1060b57cec5SDimitry Andric #define states char *
1070b57cec5SDimitry Andric #define CLEAR(v) memset(v, 0, m->g->nstates)
1080b57cec5SDimitry Andric #define SET0(v, n) ((v)[n] = 0)
1090b57cec5SDimitry Andric #define SET1(v, n) ((v)[n] = 1)
1100b57cec5SDimitry Andric #define ISSET(v, n) ((v)[n])
1110b57cec5SDimitry Andric #define ASSIGN(d, s) memmove(d, s, m->g->nstates)
1120b57cec5SDimitry Andric #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
1130b57cec5SDimitry Andric #define STATEVARS long vn; char *space
1140b57cec5SDimitry Andric #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
1150b57cec5SDimitry Andric if ((m)->space == NULL) return(REG_ESPACE); \
1160b57cec5SDimitry Andric (m)->vn = 0; }
1170b57cec5SDimitry Andric #define STATETEARDOWN(m) { free((m)->space); }
1180b57cec5SDimitry Andric #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
1190b57cec5SDimitry Andric #define onestate long
1200b57cec5SDimitry Andric #define INIT(o, n) ((o) = (n))
1210b57cec5SDimitry Andric #define INC(o) ((o)++)
1220b57cec5SDimitry Andric #define ISSTATEIN(v, o) ((v)[o])
1230b57cec5SDimitry Andric /* some abbreviations; note that some of these know variable names! */
1240b57cec5SDimitry Andric /* do "if I'm here, I can also be there" etc without branches */
1250b57cec5SDimitry Andric #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
1260b57cec5SDimitry Andric #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
1270b57cec5SDimitry Andric #define ISSETBACK(v, n) ((v)[here - (n)])
1280b57cec5SDimitry Andric /* function names */
1290b57cec5SDimitry Andric #define LNAMES /* flag */
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric #include "regengine.inc"
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric /*
1340b57cec5SDimitry Andric - llvm_regexec - interface for matching
1350b57cec5SDimitry Andric *
1360b57cec5SDimitry Andric * We put this here so we can exploit knowledge of the state representation
1370b57cec5SDimitry Andric * when choosing which matcher to call. Also, by this point the matchers
1380b57cec5SDimitry Andric * have been prototyped.
1390b57cec5SDimitry Andric */
1400b57cec5SDimitry Andric int /* 0 success, REG_NOMATCH failure */
llvm_regexec(const llvm_regex_t * preg,const char * string,size_t nmatch,llvm_regmatch_t pmatch[],int eflags)1410b57cec5SDimitry Andric llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
1420b57cec5SDimitry Andric llvm_regmatch_t pmatch[], int eflags)
1430b57cec5SDimitry Andric {
1440b57cec5SDimitry Andric struct re_guts *g = preg->re_g;
1450b57cec5SDimitry Andric #ifdef REDEBUG
1460b57cec5SDimitry Andric # define GOODFLAGS(f) (f)
1470b57cec5SDimitry Andric #else
1480b57cec5SDimitry Andric # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
1490b57cec5SDimitry Andric #endif
1500b57cec5SDimitry Andric
1510b57cec5SDimitry Andric if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
1520b57cec5SDimitry Andric return(REG_BADPAT);
1530b57cec5SDimitry Andric assert(!(g->iflags®EX_BAD));
1540b57cec5SDimitry Andric if (g->iflags®EX_BAD) /* backstop for no-debug case */
1550b57cec5SDimitry Andric return(REG_BADPAT);
1560b57cec5SDimitry Andric eflags = GOODFLAGS(eflags);
1570b57cec5SDimitry Andric
1580b57cec5SDimitry Andric if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE))
1590b57cec5SDimitry Andric return(smatcher(g, string, nmatch, pmatch, eflags));
1600b57cec5SDimitry Andric else
1610b57cec5SDimitry Andric return(lmatcher(g, string, nmatch, pmatch, eflags));
1620b57cec5SDimitry Andric }
163