1 /*
2  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
3  *
4  * Development of this software was funded, in part, by Cray Research Inc.,
5  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
6  * Corporation, none of whom are responsible for the results.  The author
7  * thanks all of them.
8  *
9  * Redistribution and use in source and binary forms -- with or without
10  * modification -- are permitted for any purpose, provided that
11  * redistributions in source form retain this entire copyright notice and
12  * indicate the origin and nature of any modifications.
13  *
14  * I'd appreciate being given credit for this package in the documentation
15  * of software which uses it, but that is not a requirement.
16  *
17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
20  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
23  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
25  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * src/include/regex/regcustom.h
29  */
30 
31 /* headers if any */
32 
33 /*
34  * It's against Postgres coding conventions to include postgres.h in a
35  * header file, but we allow the violation here because the regexp library
36  * files specifically intend this file to supply application-dependent
37  * headers, and are careful to include this file before anything else.
38  */
39 #include "postgres.h"
40 
41 #include <ctype.h>
42 #include <limits.h>
43 
44 /*
45  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
46  * declare them in <wchar.h>.
47  */
48 #ifdef HAVE_WCHAR_H
49 #include <wchar.h>
50 #endif
51 #ifdef HAVE_WCTYPE_H
52 #include <wctype.h>
53 #endif
54 
55 #include "mb/pg_wchar.h"
56 
57 #include "miscadmin.h"			/* needed by rcancelrequested/rstacktoodeep */
58 
59 
60 /* overrides for regguts.h definitions, if any */
61 #define FUNCPTR(name, args) (*name) args
62 #define MALLOC(n)		malloc(n)
63 #define FREE(p)			free(VS(p))
64 #define REALLOC(p,n)	realloc(VS(p),n)
65 #define assert(x)		Assert(x)
66 
67 /* internal character type and related */
68 typedef pg_wchar chr;			/* the type itself */
69 typedef unsigned uchr;			/* unsigned type that will hold a chr */
70 
71 #define CHR(c)	((unsigned char) (c))	/* turn char literal into chr literal */
72 #define DIGITVAL(c) ((c)-'0')	/* turn chr digit into its value */
73 #define CHRBITS 32				/* bits in a chr; must not use sizeof */
74 #define CHR_MIN 0x00000000		/* smallest and largest chr; the value */
75 #define CHR_MAX 0x7ffffffe		/* CHR_MAX-CHR_MIN+1 must fit in an int, and
76 								 * CHR_MAX+1 must fit in a chr variable */
77 
78 /*
79  * Check if a chr value is in range.  Ideally we'd just write this as
80  *		((c) >= CHR_MIN && (c) <= CHR_MAX)
81  * However, if chr is unsigned and CHR_MIN is zero, the first part of that
82  * is a no-op, and certain overly-nannyish compilers give warnings about it.
83  * So we leave that out here.  If you want to make chr signed and/or CHR_MIN
84  * not zero, redefine this macro as above.  Callers should assume that the
85  * macro may multiply evaluate its argument, even though it does not today.
86  */
87 #define CHR_IS_IN_RANGE(c)	((c) <= CHR_MAX)
88 
89 /*
90  * MAX_SIMPLE_CHR is the cutoff between "simple" and "complicated" processing
91  * in the color map logic.  It should usually be chosen high enough to ensure
92  * that all common characters are <= MAX_SIMPLE_CHR.  However, very large
93  * values will be counterproductive since they cause more regex setup time.
94  * Also, small values can be helpful for testing the high-color-map logic
95  * with plain old ASCII input.
96  */
97 #define MAX_SIMPLE_CHR	0x7FF	/* suitable value for Unicode */
98 
99 /* functions operating on chr */
100 #define iscalnum(x) pg_wc_isalnum(x)
101 #define iscalpha(x) pg_wc_isalpha(x)
102 #define iscdigit(x) pg_wc_isdigit(x)
103 #define iscspace(x) pg_wc_isspace(x)
104 
105 /* and pick up the standard header */
106 #include "regex.h"
107