1 /*------------------------------------------------------------------------- 2 * 3 * ts_utils.h 4 * helper utilities for tsearch 5 * 6 * Copyright (c) 1998-2018, PostgreSQL Global Development Group 7 * 8 * src/include/tsearch/ts_utils.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef _PG_TS_UTILS_H_ 13 #define _PG_TS_UTILS_H_ 14 15 #include "nodes/pg_list.h" 16 #include "tsearch/ts_public.h" 17 #include "tsearch/ts_type.h" 18 19 /* 20 * Common parse definitions for tsvector and tsquery 21 */ 22 23 /* tsvector parser support. */ 24 25 struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ 26 typedef struct TSVectorParseStateData *TSVectorParseState; 27 28 #define P_TSV_OPR_IS_DELIM (1 << 0) 29 #define P_TSV_IS_TSQUERY (1 << 1) 30 #define P_TSV_IS_WEB (1 << 2) 31 32 extern TSVectorParseState init_tsvector_parser(char *input, int flags); 33 extern void reset_tsvector_parser(TSVectorParseState state, char *input); 34 extern bool gettoken_tsvector(TSVectorParseState state, 35 char **token, int *len, 36 WordEntryPos **pos, int *poslen, 37 char **endptr); 38 extern void close_tsvector_parser(TSVectorParseState state); 39 40 /* phrase operator begins with '<' */ 41 #define ISOPERATOR(x) \ 42 ( pg_mblen(x) == 1 && ( *(x) == '!' || \ 43 *(x) == '&' || \ 44 *(x) == '|' || \ 45 *(x) == '(' || \ 46 *(x) == ')' || \ 47 *(x) == '<' \ 48 ) ) 49 50 /* parse_tsquery */ 51 52 struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */ 53 typedef struct TSQueryParserStateData *TSQueryParserState; 54 55 typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, 56 char *token, int tokenlen, 57 int16 tokenweights, /* bitmap as described in 58 * QueryOperand struct */ 59 bool prefix); 60 61 #define P_TSQ_PLAIN (1 << 0) 62 #define P_TSQ_WEB (1 << 1) 63 64 extern TSQuery parse_tsquery(char *buf, 65 PushFunction pushval, 66 Datum opaque, 67 int flags); 68 69 /* Functions for use by PushFunction implementations */ 70 extern void pushValue(TSQueryParserState state, 71 char *strval, int lenval, int16 weight, bool prefix); 72 extern void pushStop(TSQueryParserState state); 73 extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance); 74 75 /* 76 * parse plain text and lexize words 77 */ 78 typedef struct 79 { 80 uint16 len; 81 uint16 nvariant; 82 union 83 { 84 uint16 pos; 85 86 /* 87 * When apos array is used, apos[0] is the number of elements in the 88 * array (excluding apos[0]), and alen is the allocated size of the 89 * array. 90 */ 91 uint16 *apos; 92 } pos; 93 uint16 flags; /* currently, only TSL_PREFIX */ 94 char *word; 95 uint32 alen; 96 } ParsedWord; 97 98 typedef struct 99 { 100 ParsedWord *words; 101 int32 lenwords; 102 int32 curwords; 103 int32 pos; 104 } ParsedText; 105 106 extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen); 107 108 /* 109 * headline framework, flow in common to generate: 110 * 1 parse text with hlparsetext 111 * 2 parser-specific function to find part 112 * 3 generateHeadline to generate result text 113 */ 114 115 extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, 116 char *buf, int32 buflen); 117 extern text *generateHeadline(HeadlineParsedText *prs); 118 119 /* 120 * TSQuery execution support 121 * 122 * TS_execute() executes a tsquery against data that can be represented in 123 * various forms. The TSExecuteCallback callback function is called to check 124 * whether a given primitive tsquery value is matched in the data. 125 */ 126 127 /* 128 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need 129 * lexeme position data (because of a phrase-match operator in the tsquery). 130 * The callback should fill in position data when it returns true (success). 131 * If it cannot return position data, it may leave "data" unchanged, but 132 * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag 133 * and must arrange for a later recheck with position data available. 134 * 135 * The reported lexeme positions must be sorted and unique. Callers must only 136 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]). 137 * This allows the returned "pos" to point directly to the WordEntryPos 138 * portion of a tsvector value. If "allocated" is true then the pos array 139 * is palloc'd workspace and caller may free it when done. 140 * 141 * "negate" means that the pos array contains positions where the query does 142 * not match, rather than positions where it does. "width" is positive when 143 * the match is wider than one lexeme. Neither of these fields normally need 144 * to be touched by TSExecuteCallback functions; they are used for 145 * phrase-search processing within TS_execute. 146 * 147 * All fields of the ExecPhraseData struct are initially zeroed by caller. 148 */ 149 typedef struct ExecPhraseData 150 { 151 int npos; /* number of positions reported */ 152 bool allocated; /* pos points to palloc'd data? */ 153 bool negate; /* positions are where query is NOT matched */ 154 WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */ 155 int width; /* width of match in lexemes, less 1 */ 156 } ExecPhraseData; 157 158 /* 159 * Signature for TSQuery lexeme check functions 160 * 161 * arg: opaque value passed through from caller of TS_execute 162 * val: lexeme to test for presence of 163 * data: to be filled with lexeme positions; NULL if position data not needed 164 * 165 * Return true if lexeme is present in data, else false. If data is not 166 * NULL, it should be filled with lexeme positions, but function can leave 167 * it as zeroes if position data is not available. 168 */ 169 typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, 170 ExecPhraseData *data); 171 172 /* 173 * Flag bits for TS_execute 174 */ 175 #define TS_EXEC_EMPTY (0x00) 176 /* 177 * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically 178 * evaluated to be true. Useful in cases where NOT cannot be accurately 179 * computed (GiST) or it isn't important (ranking). From TS_execute's 180 * perspective, !CALC_NOT means that the TSExecuteCallback function might 181 * return false-positive indications of a lexeme's presence. 182 */ 183 #define TS_EXEC_CALC_NOT (0x01) 184 /* 185 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily 186 * in the absence of position information: a true result indicates that the 187 * phrase might be present. Without this flag, OP_PHRASE always returns 188 * false if lexeme position information is not available. 189 */ 190 #define TS_EXEC_PHRASE_NO_POS (0x02) 191 192 extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, 193 TSExecuteCallback chkcond); 194 extern bool tsquery_requires_match(QueryItem *curitem); 195 196 /* 197 * to_ts* - text transformation to tsvector, tsquery 198 */ 199 extern TSVector make_tsvector(ParsedText *prs); 200 extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix); 201 202 /* 203 * Possible strategy numbers for indexes 204 * TSearchStrategyNumber - (tsvector|text) @@ tsquery 205 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery 206 */ 207 #define TSearchStrategyNumber 1 208 #define TSearchWithClassStrategyNumber 2 209 210 /* 211 * TSQuery Utilities 212 */ 213 extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); 214 extern TSQuery cleanup_tsquery_stopwords(TSQuery in); 215 216 typedef struct QTNode 217 { 218 QueryItem *valnode; 219 uint32 flags; 220 int32 nchild; 221 char *word; 222 uint32 sign; 223 struct QTNode **child; 224 } QTNode; 225 226 /* bits in QTNode.flags */ 227 #define QTN_NEEDFREE 0x01 228 #define QTN_NOCHANGE 0x02 229 #define QTN_WORDFREE 0x04 230 231 typedef uint64 TSQuerySign; 232 233 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE) 234 235 #define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X)) 236 #define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X)) 237 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X) 238 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n)) 239 240 241 extern QTNode *QT2QTN(QueryItem *in, char *operand); 242 extern TSQuery QTN2QT(QTNode *in); 243 extern void QTNFree(QTNode *in); 244 extern void QTNSort(QTNode *in); 245 extern void QTNTernary(QTNode *in); 246 extern void QTNBinary(QTNode *in); 247 extern int QTNodeCompare(QTNode *an, QTNode *bn); 248 extern QTNode *QTNCopy(QTNode *in); 249 extern void QTNClearFlags(QTNode *in, uint32 flags); 250 extern bool QTNEq(QTNode *a, QTNode *b); 251 extern TSQuerySign makeTSQuerySign(TSQuery a); 252 extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs, 253 bool *isfind); 254 255 #endif /* _PG_TS_UTILS_H_ */ 256