1 /* 2 * contrib/pg_trgm/trgm.h 3 */ 4 #ifndef __TRGM_H__ 5 #define __TRGM_H__ 6 7 #include "access/gist.h" 8 #include "access/itup.h" 9 #include "storage/bufpage.h" 10 11 /* 12 * Options ... but note that trgm_regexp.c effectively assumes these values 13 * of LPADDING and RPADDING. 14 */ 15 #define LPADDING 2 16 #define RPADDING 1 17 #define KEEPONLYALNUM 18 /* 19 * Caution: IGNORECASE macro means that trigrams are case-insensitive. 20 * If this macro is disabled, the ~* and ~~* operators must be removed from 21 * the operator classes, because we can't handle case-insensitive wildcard 22 * search with case-sensitive trigrams. Failure to do this will result in 23 * "cannot handle ~*(~~*) with case-sensitive trigrams" errors. 24 */ 25 #define IGNORECASE 26 #define DIVUNION 27 28 /* operator strategy numbers */ 29 #define SimilarityStrategyNumber 1 30 #define DistanceStrategyNumber 2 31 #define LikeStrategyNumber 3 32 #define ILikeStrategyNumber 4 33 #define RegExpStrategyNumber 5 34 #define RegExpICaseStrategyNumber 6 35 #define WordSimilarityStrategyNumber 7 36 #define WordDistanceStrategyNumber 8 37 38 typedef char trgm[3]; 39 40 #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) ) 41 #define CMPPCHAR(a,b,i) CMPCHAR( *(((const char*)(a))+i), *(((const char*)(b))+i) ) 42 #define CMPTRGM(a,b) ( CMPPCHAR(a,b,0) ? CMPPCHAR(a,b,0) : ( CMPPCHAR(a,b,1) ? CMPPCHAR(a,b,1) : CMPPCHAR(a,b,2) ) ) 43 44 #define CPTRGM(a,b) do { \ 45 *(((char*)(a))+0) = *(((char*)(b))+0); \ 46 *(((char*)(a))+1) = *(((char*)(b))+1); \ 47 *(((char*)(a))+2) = *(((char*)(b))+2); \ 48 } while(0) 49 50 #ifdef KEEPONLYALNUM 51 #define ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c)) 52 #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) 53 #else 54 #define ISWORDCHR(c) (!t_isspace(c)) 55 #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) ) 56 #endif 57 #define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) 58 59 #define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */ 60 #define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') /* Wildcard 61 * meta-character */ 62 63 typedef struct 64 { 65 int32 vl_len_; /* varlena header (do not touch directly!) */ 66 uint8 flag; 67 char data[FLEXIBLE_ARRAY_MEMBER]; 68 } TRGM; 69 70 #define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8)) 71 72 /* gist */ 73 #define BITBYTE 8 74 #define SIGLENINT 3 /* >122 => key will toast, so very slow!!! */ 75 #define SIGLEN ( sizeof(int)*SIGLENINT ) 76 77 #define SIGLENBIT (SIGLEN*BITBYTE - 1) /* see makesign */ 78 79 typedef char BITVEC[SIGLEN]; 80 typedef char *BITVECP; 81 82 #define LOOPBYTE \ 83 for(i=0;i<SIGLEN;i++) 84 85 #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) 86 #define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 ) 87 #define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) 88 #define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) 89 #define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) 90 91 #define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) 92 #define HASH(sign, val) SETBIT((sign), HASHVAL(val)) 93 94 #define ARRKEY 0x01 95 #define SIGNKEY 0x02 96 #define ALLISTRUE 0x04 97 98 #define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY ) 99 #define ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY ) 100 #define ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE ) 101 102 #define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) 103 #define GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) ) 104 #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) 105 #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) 106 107 /* 108 * If DIVUNION is defined then similarity formula is: 109 * count / (len1 + len2 - count) 110 * else if DIVUNION is not defined then similarity formula is: 111 * count / max(len1, len2) 112 */ 113 #ifdef DIVUNION 114 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count))) 115 #else 116 #define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2))) 117 #endif 118 119 typedef struct TrgmPackedGraph TrgmPackedGraph; 120 121 extern double similarity_threshold; 122 extern double word_similarity_threshold; 123 124 extern uint32 trgm2int(trgm *ptr); 125 extern void compact_trigram(trgm *tptr, char *str, int bytelen); 126 extern TRGM *generate_trgm(char *str, int slen); 127 extern TRGM *generate_wildcard_trgm(const char *str, int slen); 128 extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact); 129 extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2); 130 extern bool *trgm_presence_map(TRGM *query, TRGM *key); 131 extern TRGM *createTrgmNFA(text *text_re, Oid collation, 132 TrgmPackedGraph **graph, MemoryContext rcontext); 133 extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check); 134 135 #endif /* __TRGM_H__ */ 136