1 /*------------------------------------------------------------------------- 2 * 3 * ts_utils.h 4 * helper utilities for tsearch 5 * 6 * Copyright (c) 1998-2017, PostgreSQL Global Development Group 7 * 8 * src/include/tsearch/ts_utils.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef _PG_TS_UTILS_H_ 13 #define _PG_TS_UTILS_H_ 14 15 #include "nodes/pg_list.h" 16 #include "tsearch/ts_public.h" 17 #include "tsearch/ts_type.h" 18 19 /* 20 * Common parse definitions for tsvector and tsquery 21 */ 22 23 /* tsvector parser support. */ 24 25 struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */ 26 typedef struct TSVectorParseStateData *TSVectorParseState; 27 28 extern TSVectorParseState init_tsvector_parser(char *input, 29 bool oprisdelim, 30 bool is_tsquery); 31 extern void reset_tsvector_parser(TSVectorParseState state, char *input); 32 extern bool gettoken_tsvector(TSVectorParseState state, 33 char **token, int *len, 34 WordEntryPos **pos, int *poslen, 35 char **endptr); 36 extern void close_tsvector_parser(TSVectorParseState state); 37 38 /* parse_tsquery */ 39 40 struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */ 41 typedef struct TSQueryParserStateData *TSQueryParserState; 42 43 typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, 44 char *token, int tokenlen, 45 int16 tokenweights, /* bitmap as described in 46 * QueryOperand struct */ 47 bool prefix); 48 49 extern TSQuery parse_tsquery(char *buf, 50 PushFunction pushval, 51 Datum opaque, bool isplain); 52 53 /* Functions for use by PushFunction implementations */ 54 extern void pushValue(TSQueryParserState state, 55 char *strval, int lenval, int16 weight, bool prefix); 56 extern void pushStop(TSQueryParserState state); 57 extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance); 58 59 /* 60 * parse plain text and lexize words 61 */ 62 typedef struct 63 { 64 uint16 len; 65 uint16 nvariant; 66 union 67 { 68 uint16 pos; 69 70 /* 71 * When apos array is used, apos[0] is the number of elements in the 72 * array (excluding apos[0]), and alen is the allocated size of the 73 * array. 74 */ 75 uint16 *apos; 76 } pos; 77 uint16 flags; /* currently, only TSL_PREFIX */ 78 char *word; 79 uint32 alen; 80 } ParsedWord; 81 82 typedef struct 83 { 84 ParsedWord *words; 85 int32 lenwords; 86 int32 curwords; 87 int32 pos; 88 } ParsedText; 89 90 extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen); 91 92 /* 93 * headline framework, flow in common to generate: 94 * 1 parse text with hlparsetext 95 * 2 parser-specific function to find part 96 * 3 generateHeadline to generate result text 97 */ 98 99 extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, 100 char *buf, int32 buflen); 101 extern text *generateHeadline(HeadlineParsedText *prs); 102 103 /* 104 * TSQuery execution support 105 * 106 * TS_execute() executes a tsquery against data that can be represented in 107 * various forms. The TSExecuteCallback callback function is called to check 108 * whether a given primitive tsquery value is matched in the data. 109 */ 110 111 /* 112 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need 113 * lexeme position data (because of a phrase-match operator in the tsquery). 114 * The callback should fill in position data when it returns true (success). 115 * If it cannot return position data, it may leave "data" unchanged, but 116 * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag 117 * and must arrange for a later recheck with position data available. 118 * 119 * The reported lexeme positions must be sorted and unique. Callers must only 120 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]). 121 * This allows the returned "pos" to point directly to the WordEntryPos 122 * portion of a tsvector value. If "allocated" is true then the pos array 123 * is palloc'd workspace and caller may free it when done. 124 * 125 * "negate" means that the pos array contains positions where the query does 126 * not match, rather than positions where it does. "width" is positive when 127 * the match is wider than one lexeme. Neither of these fields normally need 128 * to be touched by TSExecuteCallback functions; they are used for 129 * phrase-search processing within TS_execute. 130 * 131 * All fields of the ExecPhraseData struct are initially zeroed by caller. 132 */ 133 typedef struct ExecPhraseData 134 { 135 int npos; /* number of positions reported */ 136 bool allocated; /* pos points to palloc'd data? */ 137 bool negate; /* positions are where query is NOT matched */ 138 WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */ 139 int width; /* width of match in lexemes, less 1 */ 140 } ExecPhraseData; 141 142 /* 143 * Signature for TSQuery lexeme check functions 144 * 145 * arg: opaque value passed through from caller of TS_execute 146 * val: lexeme to test for presence of 147 * data: to be filled with lexeme positions; NULL if position data not needed 148 * 149 * Return TRUE if lexeme is present in data, else FALSE. If data is not 150 * NULL, it should be filled with lexeme positions, but function can leave 151 * it as zeroes if position data is not available. 152 */ 153 typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val, 154 ExecPhraseData *data); 155 156 /* 157 * Flag bits for TS_execute 158 */ 159 #define TS_EXEC_EMPTY (0x00) 160 /* 161 * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically 162 * evaluated to be true. Useful in cases where NOT cannot be accurately 163 * computed (GiST) or it isn't important (ranking). From TS_execute's 164 * perspective, !CALC_NOT means that the TSExecuteCallback function might 165 * return false-positive indications of a lexeme's presence. 166 */ 167 #define TS_EXEC_CALC_NOT (0x01) 168 /* 169 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily 170 * in the absence of position information: a TRUE result indicates that the 171 * phrase might be present. Without this flag, OP_PHRASE always returns 172 * false if lexeme position information is not available. 173 */ 174 #define TS_EXEC_PHRASE_NO_POS (0x02) 175 176 extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, 177 TSExecuteCallback chkcond); 178 extern bool tsquery_requires_match(QueryItem *curitem); 179 180 /* 181 * to_ts* - text transformation to tsvector, tsquery 182 */ 183 extern TSVector make_tsvector(ParsedText *prs); 184 extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix); 185 186 /* 187 * Possible strategy numbers for indexes 188 * TSearchStrategyNumber - (tsvector|text) @@ tsquery 189 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery 190 */ 191 #define TSearchStrategyNumber 1 192 #define TSearchWithClassStrategyNumber 2 193 194 /* 195 * TSQuery Utilities 196 */ 197 extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); 198 extern TSQuery cleanup_tsquery_stopwords(TSQuery in); 199 200 typedef struct QTNode 201 { 202 QueryItem *valnode; 203 uint32 flags; 204 int32 nchild; 205 char *word; 206 uint32 sign; 207 struct QTNode **child; 208 } QTNode; 209 210 /* bits in QTNode.flags */ 211 #define QTN_NEEDFREE 0x01 212 #define QTN_NOCHANGE 0x02 213 #define QTN_WORDFREE 0x04 214 215 typedef uint64 TSQuerySign; 216 217 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE) 218 219 #define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X)) 220 #define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X)) 221 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X) 222 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n)) 223 224 225 extern QTNode *QT2QTN(QueryItem *in, char *operand); 226 extern TSQuery QTN2QT(QTNode *in); 227 extern void QTNFree(QTNode *in); 228 extern void QTNSort(QTNode *in); 229 extern void QTNTernary(QTNode *in); 230 extern void QTNBinary(QTNode *in); 231 extern int QTNodeCompare(QTNode *an, QTNode *bn); 232 extern QTNode *QTNCopy(QTNode *in); 233 extern void QTNClearFlags(QTNode *in, uint32 flags); 234 extern bool QTNEq(QTNode *a, QTNode *b); 235 extern TSQuerySign makeTSQuerySign(TSQuery a); 236 extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs, 237 bool *isfind); 238 239 #endif /* _PG_TS_UTILS_H_ */ 240