1 /*------------------------------------------------------------------------- 2 * 3 * ts_utils.h 4 * helper utilities for tsearch 5 * 6 * Copyright (c) 1998-2016, PostgreSQL Global Development Group 7 * 8 * src/include/tsearch/ts_utils.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef _PG_TS_UTILS_H_ 13 #define _PG_TS_UTILS_H_ 14 15 #include "nodes/pg_list.h" 16 #include "tsearch/ts_public.h" 17 #include "tsearch/ts_type.h" 18 19 /* 20 * Common parse definitions for tsvector and tsquery 21 */ 22 23 /* tsvector parser support. */ 24 25 struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ 26 typedef struct TSVectorParseStateData *TSVectorParseState; 27 28 extern TSVectorParseState init_tsvector_parser(char *input, 29 bool oprisdelim, 30 bool is_tsquery); 31 extern void reset_tsvector_parser(TSVectorParseState state, char *input); 32 extern bool gettoken_tsvector(TSVectorParseState state, 33 char **token, int *len, 34 WordEntryPos **pos, int *poslen, 35 char **endptr); 36 extern void close_tsvector_parser(TSVectorParseState state); 37 38 /* parse_tsquery */ 39 40 struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */ 41 typedef struct TSQueryParserStateData *TSQueryParserState; 42 43 typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, 44 char *token, int tokenlen, 45 int16 tokenweights, /* bitmap as described 46 * in QueryOperand 47 * struct */ 48 bool prefix); 49 50 extern TSQuery parse_tsquery(char *buf, 51 PushFunction pushval, 52 Datum opaque, bool isplain); 53 54 /* Functions for use by PushFunction implementations */ 55 extern void pushValue(TSQueryParserState state, 56 char *strval, int lenval, int16 weight, bool prefix); 57 extern void pushStop(TSQueryParserState state); 58 extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance); 59 60 /* 61 * parse plain text and lexize words 62 */ 63 typedef struct 64 { 65 uint16 len; 66 uint16 nvariant; 67 union 68 { 69 uint16 pos; 70 71 /* 72 * When apos array is used, apos[0] is the number of elements in the 73 * array (excluding apos[0]), and alen is the allocated size of the 74 * array. 75 */ 76 uint16 *apos; 77 } pos; 78 uint16 flags; /* currently, only TSL_PREFIX */ 79 char *word; 80 uint32 alen; 81 } ParsedWord; 82 83 typedef struct 84 { 85 ParsedWord *words; 86 int32 lenwords; 87 int32 curwords; 88 int32 pos; 89 } ParsedText; 90 91 extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen); 92 93 /* 94 * headline framework, flow in common to generate: 95 * 1 parse text with hlparsetext 96 * 2 parser-specific function to find part 97 * 3 generateHeadline to generate result text 98 */ 99 100 extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, 101 char *buf, int32 buflen); 102 extern text *generateHeadline(HeadlineParsedText *prs); 103 104 /* 105 * TSQuery execution support 106 * 107 * TS_execute() executes a tsquery against data that can be represented in 108 * various forms. The TSExecuteCallback callback function is called to check 109 * whether a given primitive tsquery value is matched in the data. 110 */ 111 112 /* 113 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need 114 * lexeme position data (because of a phrase-match operator in the tsquery). 115 * The callback should fill in position data when it returns true (success). 116 * If it cannot return position data, it may leave "data" unchanged, but 117 * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag 118 * and must arrange for a later recheck with position data available. 119 * 120 * The reported lexeme positions must be sorted and unique. Callers must only 121 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]). 122 * This allows the returned "pos" to point directly to the WordEntryPos 123 * portion of a tsvector value. If "allocated" is true then the pos array 124 * is palloc'd workspace and caller may free it when done. 125 * 126 * "negate" means that the pos array contains positions where the query does 127 * not match, rather than positions where it does. "width" is positive when 128 * the match is wider than one lexeme. Neither of these fields normally need 129 * to be touched by TSExecuteCallback functions; they are used for 130 * phrase-search processing within TS_execute. 131 * 132 * All fields of the ExecPhraseData struct are initially zeroed by caller. 133 */ 134 typedef struct ExecPhraseData 135 { 136 int npos; /* number of positions reported */ 137 bool allocated; /* pos points to palloc'd data? */ 138 bool negate; /* positions are where query is NOT matched */ 139 WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */ 140 int width; /* width of match in lexemes, less 1 */ 141 } ExecPhraseData; 142 143 /* 144 * Signature for TSQuery lexeme check functions 145 * 146 * arg: opaque value passed through from caller of TS_execute 147 * val: lexeme to test for presence of 148 * data: to be filled with lexeme positions; NULL if position data not needed 149 * 150 * Return TRUE if lexeme is present in data, else FALSE. If data is not 151 * NULL, it should be filled with lexeme positions, but function can leave 152 * it as zeroes if position data is not available. 153 */ 154 typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, 155 ExecPhraseData *data); 156 157 /* 158 * Flag bits for TS_execute 159 */ 160 #define TS_EXEC_EMPTY (0x00) 161 /* 162 * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically 163 * evaluated to be true. Useful in cases where NOT cannot be accurately 164 * computed (GiST) or it isn't important (ranking). From TS_execute's 165 * perspective, !CALC_NOT means that the TSExecuteCallback function might 166 * return false-positive indications of a lexeme's presence. 167 */ 168 #define TS_EXEC_CALC_NOT (0x01) 169 /* 170 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily 171 * in the absence of position information: a TRUE result indicates that the 172 * phrase might be present. Without this flag, OP_PHRASE always returns 173 * false if lexeme position information is not available. 174 */ 175 #define TS_EXEC_PHRASE_NO_POS (0x02) 176 /* Obsolete spelling of TS_EXEC_PHRASE_NO_POS: */ 177 #define TS_EXEC_PHRASE_AS_AND TS_EXEC_PHRASE_NO_POS 178 179 extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, 180 TSExecuteCallback chkcond); 181 extern bool tsquery_requires_match(QueryItem *curitem); 182 183 /* 184 * to_ts* - text transformation to tsvector, tsquery 185 */ 186 extern TSVector make_tsvector(ParsedText *prs); 187 extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix); 188 189 extern Datum to_tsvector_byid(PG_FUNCTION_ARGS); 190 extern Datum to_tsvector(PG_FUNCTION_ARGS); 191 extern Datum to_tsquery_byid(PG_FUNCTION_ARGS); 192 extern Datum to_tsquery(PG_FUNCTION_ARGS); 193 extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS); 194 extern Datum plainto_tsquery(PG_FUNCTION_ARGS); 195 extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS); 196 extern Datum phraseto_tsquery(PG_FUNCTION_ARGS); 197 198 /* 199 * GiST support function 200 */ 201 202 extern Datum gtsvector_compress(PG_FUNCTION_ARGS); 203 extern Datum gtsvector_decompress(PG_FUNCTION_ARGS); 204 extern Datum gtsvector_consistent(PG_FUNCTION_ARGS); 205 extern Datum gtsvector_union(PG_FUNCTION_ARGS); 206 extern Datum gtsvector_same(PG_FUNCTION_ARGS); 207 extern Datum gtsvector_penalty(PG_FUNCTION_ARGS); 208 extern Datum gtsvector_picksplit(PG_FUNCTION_ARGS); 209 extern Datum gtsvector_consistent_oldsig(PG_FUNCTION_ARGS); 210 211 /* 212 * IO functions for pseudotype gtsvector 213 * used internally in tsvector GiST opclass 214 */ 215 extern Datum gtsvectorin(PG_FUNCTION_ARGS); 216 extern Datum gtsvectorout(PG_FUNCTION_ARGS); 217 218 /* 219 * GIN support function 220 */ 221 222 extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS); 223 extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS); 224 extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS); 225 extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS); 226 extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS); 227 extern Datum gin_tsquery_triconsistent(PG_FUNCTION_ARGS); 228 extern Datum gin_extract_tsvector_2args(PG_FUNCTION_ARGS); 229 extern Datum gin_extract_tsquery_5args(PG_FUNCTION_ARGS); 230 extern Datum gin_tsquery_consistent_6args(PG_FUNCTION_ARGS); 231 extern Datum gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS); 232 extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS); 233 234 /* 235 * Possible strategy numbers for indexes 236 * TSearchStrategyNumber - (tsvector|text) @@ tsquery 237 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery 238 */ 239 #define TSearchStrategyNumber 1 240 #define TSearchWithClassStrategyNumber 2 241 242 /* 243 * TSQuery Utilities 244 */ 245 extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); 246 extern TSQuery cleanup_tsquery_stopwords(TSQuery in); 247 248 typedef struct QTNode 249 { 250 QueryItem *valnode; 251 uint32 flags; 252 int32 nchild; 253 char *word; 254 uint32 sign; 255 struct QTNode **child; 256 } QTNode; 257 258 /* bits in QTNode.flags */ 259 #define QTN_NEEDFREE 0x01 260 #define QTN_NOCHANGE 0x02 261 #define QTN_WORDFREE 0x04 262 263 typedef uint64 TSQuerySign; 264 265 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE) 266 267 #define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X)) 268 #define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X)) 269 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X) 270 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n)) 271 272 273 extern QTNode *QT2QTN(QueryItem *in, char *operand); 274 extern TSQuery QTN2QT(QTNode *in); 275 extern void QTNFree(QTNode *in); 276 extern void QTNSort(QTNode *in); 277 extern void QTNTernary(QTNode *in); 278 extern void QTNBinary(QTNode *in); 279 extern int QTNodeCompare(QTNode *an, QTNode *bn); 280 extern QTNode *QTNCopy(QTNode *in); 281 extern void QTNClearFlags(QTNode *in, uint32 flags); 282 extern bool QTNEq(QTNode *a, QTNode *b); 283 extern TSQuerySign makeTSQuerySign(TSQuery a); 284 extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs, 285 bool *isfind); 286 287 /* 288 * TSQuery GiST support 289 */ 290 extern Datum gtsquery_compress(PG_FUNCTION_ARGS); 291 extern Datum gtsquery_decompress(PG_FUNCTION_ARGS); 292 extern Datum gtsquery_consistent(PG_FUNCTION_ARGS); 293 extern Datum gtsquery_union(PG_FUNCTION_ARGS); 294 extern Datum gtsquery_same(PG_FUNCTION_ARGS); 295 extern Datum gtsquery_penalty(PG_FUNCTION_ARGS); 296 extern Datum gtsquery_picksplit(PG_FUNCTION_ARGS); 297 extern Datum gtsquery_consistent_oldsig(PG_FUNCTION_ARGS); 298 299 /* 300 * Parser interface to SQL 301 */ 302 extern Datum ts_token_type_byid(PG_FUNCTION_ARGS); 303 extern Datum ts_token_type_byname(PG_FUNCTION_ARGS); 304 extern Datum ts_parse_byid(PG_FUNCTION_ARGS); 305 extern Datum ts_parse_byname(PG_FUNCTION_ARGS); 306 307 /* 308 * Default word parser 309 */ 310 311 extern Datum prsd_start(PG_FUNCTION_ARGS); 312 extern Datum prsd_nexttoken(PG_FUNCTION_ARGS); 313 extern Datum prsd_end(PG_FUNCTION_ARGS); 314 extern Datum prsd_headline(PG_FUNCTION_ARGS); 315 extern Datum prsd_lextype(PG_FUNCTION_ARGS); 316 317 /* 318 * Dictionary interface to SQL 319 */ 320 extern Datum ts_lexize(PG_FUNCTION_ARGS); 321 322 /* 323 * Simple built-in dictionary 324 */ 325 extern Datum dsimple_init(PG_FUNCTION_ARGS); 326 extern Datum dsimple_lexize(PG_FUNCTION_ARGS); 327 328 /* 329 * Synonym built-in dictionary 330 */ 331 extern Datum dsynonym_init(PG_FUNCTION_ARGS); 332 extern Datum dsynonym_lexize(PG_FUNCTION_ARGS); 333 334 /* 335 * ISpell dictionary 336 */ 337 extern Datum dispell_init(PG_FUNCTION_ARGS); 338 extern Datum dispell_lexize(PG_FUNCTION_ARGS); 339 340 /* 341 * Thesaurus 342 */ 343 extern Datum thesaurus_init(PG_FUNCTION_ARGS); 344 extern Datum thesaurus_lexize(PG_FUNCTION_ARGS); 345 346 /* 347 * headline 348 */ 349 extern Datum ts_headline_byid_opt(PG_FUNCTION_ARGS); 350 extern Datum ts_headline_byid(PG_FUNCTION_ARGS); 351 extern Datum ts_headline(PG_FUNCTION_ARGS); 352 extern Datum ts_headline_opt(PG_FUNCTION_ARGS); 353 354 /* 355 * current cfg 356 */ 357 extern Datum get_current_ts_config(PG_FUNCTION_ARGS); 358 359 #endif /* _PG_TS_UTILS_H_ */ 360