1 /*
2  * contrib/pg_trgm/trgm.h
3  */
4 #ifndef __TRGM_H__
5 #define __TRGM_H__
6 
7 #include "access/gist.h"
8 #include "access/itup.h"
9 #include "access/stratnum.h"
10 #include "storage/bufpage.h"
11 
12 /*
13  * Options ... but note that trgm_regexp.c effectively assumes these values
14  * of LPADDING and RPADDING.
15  */
16 #define LPADDING		2
17 #define RPADDING		1
18 #define KEEPONLYALNUM
19 /*
20  * Caution: IGNORECASE macro means that trigrams are case-insensitive.
21  * If this macro is disabled, the ~* and ~~* operators must be removed from
22  * the operator classes, because we can't handle case-insensitive wildcard
23  * search with case-sensitive trigrams.  Failure to do this will result in
24  * "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
25  */
26 #define IGNORECASE
27 #define DIVUNION
28 
29 /* operator strategy numbers */
30 #define SimilarityStrategyNumber			1
31 #define DistanceStrategyNumber				2
32 #define LikeStrategyNumber					3
33 #define ILikeStrategyNumber					4
34 #define RegExpStrategyNumber				5
35 #define RegExpICaseStrategyNumber			6
36 #define WordSimilarityStrategyNumber		7
37 #define WordDistanceStrategyNumber			8
38 #define StrictWordSimilarityStrategyNumber	9
39 #define StrictWordDistanceStrategyNumber	10
40 
41 typedef char trgm[3];
42 
43 #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
44 #define CMPPCHAR(a,b,i)  CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) )
45 #define CMPTRGM(a,b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) )
46 
47 #define CPTRGM(a,b) do {				\
48 	*(((char*)(a))+0) = *(((char*)(b))+0);	\
49 	*(((char*)(a))+1) = *(((char*)(b))+1);	\
50 	*(((char*)(a))+2) = *(((char*)(b))+2);	\
51 } while(0)
52 
53 #ifdef KEEPONLYALNUM
54 #define ISWORDCHR(c)	(t_isalpha(c) || t_isdigit(c))
55 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
56 #else
57 #define ISWORDCHR(c)	(!t_isspace(c))
58 #define ISPRINTABLECHAR(a)	( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
59 #endif
60 #define ISPRINTABLETRGM(t)	( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
61 
62 #define ISESCAPECHAR(x) (*(x) == '\\')	/* Wildcard escape character */
63 #define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%')	/* Wildcard
64 														 * meta-character */
65 
66 typedef struct
67 {
68 	int32		vl_len_;		/* varlena header (do not touch directly!) */
69 	uint8		flag;
70 	char		data[FLEXIBLE_ARRAY_MEMBER];
71 } TRGM;
72 
73 #define TRGMHDRSIZE		  (VARHDRSZ + sizeof(uint8))
74 
75 /* gist */
76 #define BITBYTE 8
77 #define SIGLENINT  3			/* >122 => key will toast, so very slow!!! */
78 #define SIGLEN	( sizeof(int)*SIGLENINT )
79 
80 #define SIGLENBIT (SIGLEN*BITBYTE - 1)	/* see makesign */
81 
82 typedef char BITVEC[SIGLEN];
83 typedef char *BITVECP;
84 
85 #define LOOPBYTE \
86 			for(i=0;i<SIGLEN;i++)
87 
88 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
89 #define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
90 #define CLRBIT(x,i)   GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
91 #define SETBIT(x,i)   GETBYTE(x,i) |=  ( 0x01 << ( (i) % BITBYTE ) )
92 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
93 
94 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
95 #define HASH(sign, val) SETBIT((sign), HASHVAL(val))
96 
97 #define ARRKEY			0x01
98 #define SIGNKEY			0x02
99 #define ALLISTRUE		0x04
100 
101 #define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY )
102 #define ISSIGNKEY(x)	( ((TRGM*)x)->flag & SIGNKEY )
103 #define ISALLTRUE(x)	( ((TRGM*)x)->flag & ALLISTRUE )
104 
105 #define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
106 #define GETSIGN(x)		( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
107 #define GETARR(x)		( (trgm*)( (char*)x+TRGMHDRSIZE ) )
108 #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
109 
110 /*
111  * If DIVUNION is defined then similarity formula is:
112  * count / (len1 + len2 - count)
113  * else if DIVUNION is not defined then similarity formula is:
114  * count / max(len1, len2)
115  */
116 #ifdef DIVUNION
117 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
118 #else
119 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2)))
120 #endif
121 
122 typedef struct TrgmPackedGraph TrgmPackedGraph;
123 
124 extern double similarity_threshold;
125 extern double word_similarity_threshold;
126 extern double strict_word_similarity_threshold;
127 
128 extern double index_strategy_get_limit(StrategyNumber strategy);
129 extern uint32 trgm2int(trgm *ptr);
130 extern void compact_trigram(trgm *tptr, char *str, int bytelen);
131 extern TRGM *generate_trgm(char *str, int slen);
132 extern TRGM *generate_wildcard_trgm(const char *str, int slen);
133 extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact);
134 extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
135 extern bool *trgm_presence_map(TRGM *query, TRGM *key);
136 extern TRGM *createTrgmNFA(text *text_re, Oid collation,
137 			  TrgmPackedGraph **graph, MemoryContext rcontext);
138 extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
139 
140 #endif							/* __TRGM_H__ */
141