1 /*------------------------------------------------------------------------- 2 * 3 * ts_type.h 4 * Definitions for the tsvector and tsquery types 5 * 6 * Copyright (c) 1998-2016, PostgreSQL Global Development Group 7 * 8 * src/include/tsearch/ts_type.h 9 * 10 *------------------------------------------------------------------------- 11 */ 12 #ifndef _PG_TSTYPE_H_ 13 #define _PG_TSTYPE_H_ 14 15 #include "fmgr.h" 16 #include "utils/memutils.h" 17 18 19 /* 20 * TSVector type. 21 * 22 * Structure of tsvector datatype: 23 * 1) standard varlena header 24 * 2) int32 size - number of lexemes (WordEntry array entries) 25 * 3) Array of WordEntry - one per lexeme; must be sorted according to 26 * tsCompareString() (ie, memcmp of lexeme strings). 27 * WordEntry->pos gives the number of bytes from end of WordEntry 28 * array to start of lexeme's string, which is of length len. 29 * 4) Per-lexeme data storage: 30 * lexeme string (not null-terminated) 31 * if haspos is true: 32 * padding byte if necessary to make the position data 2-byte aligned 33 * uint16 number of positions that follow 34 * WordEntryPos[] positions 35 * 36 * The positions for each lexeme must be sorted. 37 * 38 * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4 39 */ 40 41 typedef struct 42 { 43 uint32 44 haspos:1, 45 len:11, /* MAX 2Kb */ 46 pos:20; /* MAX 1Mb */ 47 } WordEntry; 48 49 #define MAXSTRLEN ( (1<<11) - 1) 50 #define MAXSTRPOS ( (1<<20) - 1) 51 52 extern int compareWordEntryPos(const void *a, const void *b); 53 54 /* 55 * Equivalent to 56 * typedef struct { 57 * uint16 58 * weight:2, 59 * pos:14; 60 * } 61 */ 62 63 typedef uint16 WordEntryPos; 64 65 typedef struct 66 { 67 uint16 npos; 68 WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]; 69 } WordEntryPosVector; 70 71 /* WordEntryPosVector with exactly 1 entry */ 72 typedef struct 73 { 74 uint16 npos; 75 WordEntryPos pos[1]; 76 } WordEntryPosVector1; 77 78 79 #define WEP_GETWEIGHT(x) ( (x) >> 14 ) 80 #define WEP_GETPOS(x) ( (x) & 0x3fff ) 81 82 #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) ) 83 #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) ) 84 85 #define MAXENTRYPOS (1<<14) 86 #define MAXNUMPOS (256) 87 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) 88 89 /* This struct represents a complete tsvector datum */ 90 typedef struct 91 { 92 int32 vl_len_; /* varlena header (do not touch directly!) */ 93 int32 size; 94 WordEntry entries[FLEXIBLE_ARRAY_MEMBER]; 95 /* lexemes follow the entries[] array */ 96 } TSVectorData; 97 98 typedef TSVectorData *TSVector; 99 100 #define DATAHDRSIZE (offsetof(TSVectorData, entries)) 101 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) ) 102 103 /* pointer to start of a tsvector's WordEntry array */ 104 #define ARRPTR(x) ( (x)->entries ) 105 106 /* pointer to start of a tsvector's lexeme storage */ 107 #define STRPTR(x) ( (char *) &(x)->entries[(x)->size] ) 108 109 #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))) 110 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 ) 111 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos) 112 113 /* 114 * fmgr interface macros 115 */ 116 117 #define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X)) 118 #define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X)) 119 #define TSVectorGetDatum(X) PointerGetDatum(X) 120 #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n)) 121 #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n)) 122 #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x) 123 124 /* 125 * I/O 126 */ 127 extern Datum tsvectorin(PG_FUNCTION_ARGS); 128 extern Datum tsvectorout(PG_FUNCTION_ARGS); 129 extern Datum tsvectorsend(PG_FUNCTION_ARGS); 130 extern Datum tsvectorrecv(PG_FUNCTION_ARGS); 131 132 /* 133 * operations with tsvector 134 */ 135 extern Datum tsvector_lt(PG_FUNCTION_ARGS); 136 extern Datum tsvector_le(PG_FUNCTION_ARGS); 137 extern Datum tsvector_eq(PG_FUNCTION_ARGS); 138 extern Datum tsvector_ne(PG_FUNCTION_ARGS); 139 extern Datum tsvector_ge(PG_FUNCTION_ARGS); 140 extern Datum tsvector_gt(PG_FUNCTION_ARGS); 141 extern Datum tsvector_cmp(PG_FUNCTION_ARGS); 142 143 extern Datum tsvector_length(PG_FUNCTION_ARGS); 144 extern Datum tsvector_strip(PG_FUNCTION_ARGS); 145 extern Datum tsvector_setweight(PG_FUNCTION_ARGS); 146 extern Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS); 147 extern Datum tsvector_concat(PG_FUNCTION_ARGS); 148 extern Datum tsvector_delete_str(PG_FUNCTION_ARGS); 149 extern Datum tsvector_delete_arr(PG_FUNCTION_ARGS); 150 extern Datum tsvector_unnest(PG_FUNCTION_ARGS); 151 extern Datum tsvector_to_array(PG_FUNCTION_ARGS); 152 extern Datum array_to_tsvector(PG_FUNCTION_ARGS); 153 extern Datum tsvector_filter(PG_FUNCTION_ARGS); 154 extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS); 155 extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS); 156 157 extern Datum ts_match_vq(PG_FUNCTION_ARGS); 158 extern Datum ts_match_qv(PG_FUNCTION_ARGS); 159 extern Datum ts_match_tt(PG_FUNCTION_ARGS); 160 extern Datum ts_match_tq(PG_FUNCTION_ARGS); 161 162 extern Datum ts_stat1(PG_FUNCTION_ARGS); 163 extern Datum ts_stat2(PG_FUNCTION_ARGS); 164 165 extern Datum ts_rank_tt(PG_FUNCTION_ARGS); 166 extern Datum ts_rank_wtt(PG_FUNCTION_ARGS); 167 extern Datum ts_rank_ttf(PG_FUNCTION_ARGS); 168 extern Datum ts_rank_wttf(PG_FUNCTION_ARGS); 169 extern Datum ts_rankcd_tt(PG_FUNCTION_ARGS); 170 extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS); 171 extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS); 172 extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS); 173 174 extern Datum tsmatchsel(PG_FUNCTION_ARGS); 175 extern Datum tsmatchjoinsel(PG_FUNCTION_ARGS); 176 177 extern Datum ts_typanalyze(PG_FUNCTION_ARGS); 178 179 180 /* 181 * TSQuery 182 * 183 * 184 */ 185 186 typedef int8 QueryItemType; 187 188 /* Valid values for QueryItemType: */ 189 #define QI_VAL 1 190 #define QI_OPR 2 191 #define QI_VALSTOP 3 /* This is only used in an intermediate stack 192 * representation in parse_tsquery. It's not a 193 * legal type elsewhere. */ 194 195 /* 196 * QueryItem is one node in tsquery - operator or operand. 197 */ 198 typedef struct 199 { 200 QueryItemType type; /* operand or kind of operator (ts_tokentype) */ 201 uint8 weight; /* weights of operand to search. It's a 202 * bitmask of allowed weights. if it =0 then 203 * any weight are allowed. Weights and bit 204 * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */ 205 bool prefix; /* true if it's a prefix search */ 206 int32 valcrc; /* XXX: pg_crc32 would be a more appropriate 207 * data type, but we use comparisons to signed 208 * integers in the code. They would need to be 209 * changed as well. */ 210 211 /* pointer to text value of operand, must correlate with WordEntry */ 212 uint32 213 length:12, 214 distance:20; 215 } QueryOperand; 216 217 218 /* 219 * Legal values for QueryOperator.operator. 220 */ 221 #define OP_NOT 1 222 #define OP_AND 2 223 #define OP_OR 3 224 #define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */ 225 #define OP_COUNT 4 226 227 extern const int tsearch_op_priority[OP_COUNT]; 228 229 /* get operation priority by its code*/ 230 #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] ) 231 /* get QueryOperator priority */ 232 #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) 233 234 typedef struct 235 { 236 QueryItemType type; 237 int8 oper; /* see above */ 238 int16 distance; /* distance between agrs for OP_PHRASE */ 239 uint32 left; /* pointer to left operand. Right operand is 240 * item + 1, left operand is placed 241 * item+item->left */ 242 } QueryOperator; 243 244 /* 245 * Note: TSQuery is 4-bytes aligned, so make sure there's no fields 246 * inside QueryItem requiring 8-byte alignment, like int64. 247 */ 248 typedef union 249 { 250 QueryItemType type; 251 QueryOperator qoperator; 252 QueryOperand qoperand; 253 } QueryItem; 254 255 /* 256 * Storage: 257 * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings) 258 */ 259 260 typedef struct 261 { 262 int32 vl_len_; /* varlena header (do not touch directly!) */ 263 int32 size; /* number of QueryItems */ 264 char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */ 265 } TSQueryData; 266 267 typedef TSQueryData *TSQuery; 268 269 #define HDRSIZETQ ( VARHDRSZ + sizeof(int32) ) 270 271 /* Computes the size of header and all QueryItems. size is the number of 272 * QueryItems, and lenofoperand is the total length of all operands 273 */ 274 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) ) 275 #define TSQUERY_TOO_BIG(size, lenofoperand) \ 276 ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem)) 277 278 /* Returns a pointer to the first QueryItem in a TSQuery */ 279 #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ )) 280 281 /* Returns a pointer to the beginning of operands in a TSQuery */ 282 #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) ) 283 284 /* 285 * fmgr interface macros 286 * Note, TSQuery type marked as plain storage, so it can't be toasted 287 * but PG_DETOAST_DATUM_COPY is used for simplicity 288 */ 289 290 #define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X)) 291 #define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X)) 292 #define TSQueryGetDatum(X) PointerGetDatum(X) 293 #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n)) 294 #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n)) 295 #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x) 296 297 /* 298 * I/O 299 */ 300 extern Datum tsqueryin(PG_FUNCTION_ARGS); 301 extern Datum tsqueryout(PG_FUNCTION_ARGS); 302 extern Datum tsquerysend(PG_FUNCTION_ARGS); 303 extern Datum tsqueryrecv(PG_FUNCTION_ARGS); 304 305 /* 306 * operations with tsquery 307 */ 308 extern Datum tsquery_lt(PG_FUNCTION_ARGS); 309 extern Datum tsquery_le(PG_FUNCTION_ARGS); 310 extern Datum tsquery_eq(PG_FUNCTION_ARGS); 311 extern Datum tsquery_ne(PG_FUNCTION_ARGS); 312 extern Datum tsquery_ge(PG_FUNCTION_ARGS); 313 extern Datum tsquery_gt(PG_FUNCTION_ARGS); 314 extern Datum tsquery_cmp(PG_FUNCTION_ARGS); 315 316 extern Datum tsquerytree(PG_FUNCTION_ARGS); 317 extern Datum tsquery_numnode(PG_FUNCTION_ARGS); 318 319 extern Datum tsquery_and(PG_FUNCTION_ARGS); 320 extern Datum tsquery_or(PG_FUNCTION_ARGS); 321 extern Datum tsquery_phrase(PG_FUNCTION_ARGS); 322 extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS); 323 extern Datum tsquery_not(PG_FUNCTION_ARGS); 324 325 extern Datum tsquery_rewrite(PG_FUNCTION_ARGS); 326 extern Datum tsquery_rewrite_query(PG_FUNCTION_ARGS); 327 328 extern Datum tsq_mcontains(PG_FUNCTION_ARGS); 329 extern Datum tsq_mcontained(PG_FUNCTION_ARGS); 330 331 #endif /* _PG_TSTYPE_H_ */ 332