1 /*-------------------------------------------------------------------------
2  *
3  * ts_type.h
4  *	  Definitions for the tsvector and tsquery types
5  *
6  * Copyright (c) 1998-2016, PostgreSQL Global Development Group
7  *
8  * src/include/tsearch/ts_type.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef _PG_TSTYPE_H_
13 #define _PG_TSTYPE_H_
14 
15 #include "fmgr.h"
16 #include "utils/memutils.h"
17 
18 
19 /*
20  * TSVector type.
21  *
22  * Structure of tsvector datatype:
23  * 1) standard varlena header
24  * 2) int32		size - number of lexemes (WordEntry array entries)
25  * 3) Array of WordEntry - one per lexeme; must be sorted according to
26  *				tsCompareString() (ie, memcmp of lexeme strings).
27  *				WordEntry->pos gives the number of bytes from end of WordEntry
28  *				array to start of lexeme's string, which is of length len.
29  * 4) Per-lexeme data storage:
30  *	  lexeme string (not null-terminated)
31  *	  if haspos is true:
32  *		padding byte if necessary to make the position data 2-byte aligned
33  *		uint16			number of positions that follow
34  *		WordEntryPos[]	positions
35  *
36  * The positions for each lexeme must be sorted.
37  *
38  * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
39  */
40 
41 typedef struct
42 {
43 	uint32
44 				haspos:1,
45 				len:11,			/* MAX 2Kb */
46 				pos:20;			/* MAX 1Mb */
47 } WordEntry;
48 
49 #define MAXSTRLEN ( (1<<11) - 1)
50 #define MAXSTRPOS ( (1<<20) - 1)
51 
52 extern int	compareWordEntryPos(const void *a, const void *b);
53 
54 /*
55  * Equivalent to
56  * typedef struct {
57  *		uint16
58  *			weight:2,
59  *			pos:14;
60  * }
61  */
62 
63 typedef uint16 WordEntryPos;
64 
65 typedef struct
66 {
67 	uint16		npos;
68 	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
69 } WordEntryPosVector;
70 
71 /* WordEntryPosVector with exactly 1 entry */
72 typedef struct
73 {
74 	uint16		npos;
75 	WordEntryPos pos[1];
76 } WordEntryPosVector1;
77 
78 
79 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
80 #define WEP_GETPOS(x)		( (x) & 0x3fff )
81 
82 #define WEP_SETWEIGHT(x,v)	( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
83 #define WEP_SETPOS(x,v)		( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
84 
85 #define MAXENTRYPOS (1<<14)
86 #define MAXNUMPOS	(256)
87 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
88 
89 /* This struct represents a complete tsvector datum */
90 typedef struct
91 {
92 	int32		vl_len_;		/* varlena header (do not touch directly!) */
93 	int32		size;
94 	WordEntry	entries[FLEXIBLE_ARRAY_MEMBER];
95 	/* lexemes follow the entries[] array */
96 } TSVectorData;
97 
98 typedef TSVectorData *TSVector;
99 
100 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
101 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
102 
103 /* pointer to start of a tsvector's WordEntry array */
104 #define ARRPTR(x)	( (x)->entries )
105 
106 /* pointer to start of a tsvector's lexeme storage */
107 #define STRPTR(x)	( (char *) &(x)->entries[(x)->size] )
108 
109 #define _POSVECPTR(x, e)	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
110 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
111 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
112 
113 /*
114  * fmgr interface macros
115  */
116 
117 #define DatumGetTSVector(X)			((TSVector) PG_DETOAST_DATUM(X))
118 #define DatumGetTSVectorCopy(X)		((TSVector) PG_DETOAST_DATUM_COPY(X))
119 #define TSVectorGetDatum(X)			PointerGetDatum(X)
120 #define PG_GETARG_TSVECTOR(n)		DatumGetTSVector(PG_GETARG_DATUM(n))
121 #define PG_GETARG_TSVECTOR_COPY(n)	DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
122 #define PG_RETURN_TSVECTOR(x)		return TSVectorGetDatum(x)
123 
124 /*
125  * I/O
126  */
127 extern Datum tsvectorin(PG_FUNCTION_ARGS);
128 extern Datum tsvectorout(PG_FUNCTION_ARGS);
129 extern Datum tsvectorsend(PG_FUNCTION_ARGS);
130 extern Datum tsvectorrecv(PG_FUNCTION_ARGS);
131 
132 /*
133  * operations with tsvector
134  */
135 extern Datum tsvector_lt(PG_FUNCTION_ARGS);
136 extern Datum tsvector_le(PG_FUNCTION_ARGS);
137 extern Datum tsvector_eq(PG_FUNCTION_ARGS);
138 extern Datum tsvector_ne(PG_FUNCTION_ARGS);
139 extern Datum tsvector_ge(PG_FUNCTION_ARGS);
140 extern Datum tsvector_gt(PG_FUNCTION_ARGS);
141 extern Datum tsvector_cmp(PG_FUNCTION_ARGS);
142 
143 extern Datum tsvector_length(PG_FUNCTION_ARGS);
144 extern Datum tsvector_strip(PG_FUNCTION_ARGS);
145 extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
146 extern Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS);
147 extern Datum tsvector_concat(PG_FUNCTION_ARGS);
148 extern Datum tsvector_delete_str(PG_FUNCTION_ARGS);
149 extern Datum tsvector_delete_arr(PG_FUNCTION_ARGS);
150 extern Datum tsvector_unnest(PG_FUNCTION_ARGS);
151 extern Datum tsvector_to_array(PG_FUNCTION_ARGS);
152 extern Datum array_to_tsvector(PG_FUNCTION_ARGS);
153 extern Datum tsvector_filter(PG_FUNCTION_ARGS);
154 extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
155 extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
156 
157 extern Datum ts_match_vq(PG_FUNCTION_ARGS);
158 extern Datum ts_match_qv(PG_FUNCTION_ARGS);
159 extern Datum ts_match_tt(PG_FUNCTION_ARGS);
160 extern Datum ts_match_tq(PG_FUNCTION_ARGS);
161 
162 extern Datum ts_stat1(PG_FUNCTION_ARGS);
163 extern Datum ts_stat2(PG_FUNCTION_ARGS);
164 
165 extern Datum ts_rank_tt(PG_FUNCTION_ARGS);
166 extern Datum ts_rank_wtt(PG_FUNCTION_ARGS);
167 extern Datum ts_rank_ttf(PG_FUNCTION_ARGS);
168 extern Datum ts_rank_wttf(PG_FUNCTION_ARGS);
169 extern Datum ts_rankcd_tt(PG_FUNCTION_ARGS);
170 extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
171 extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS);
172 extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
173 
174 extern Datum tsmatchsel(PG_FUNCTION_ARGS);
175 extern Datum tsmatchjoinsel(PG_FUNCTION_ARGS);
176 
177 extern Datum ts_typanalyze(PG_FUNCTION_ARGS);
178 
179 
180 /*
181  * TSQuery
182  *
183  *
184  */
185 
186 typedef int8 QueryItemType;
187 
188 /* Valid values for QueryItemType: */
189 #define QI_VAL 1
190 #define QI_OPR 2
191 #define QI_VALSTOP 3			/* This is only used in an intermediate stack
192 								 * representation in parse_tsquery. It's not a
193 								 * legal type elsewhere. */
194 
195 /*
196  * QueryItem is one node in tsquery - operator or operand.
197  */
198 typedef struct
199 {
200 	QueryItemType type;			/* operand or kind of operator (ts_tokentype) */
201 	uint8		weight;			/* weights of operand to search. It's a
202 								 * bitmask of allowed weights. if it =0 then
203 								 * any weight are allowed. Weights and bit
204 								 * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
205 	bool		prefix;			/* true if it's a prefix search */
206 	int32		valcrc;			/* XXX: pg_crc32 would be a more appropriate
207 								 * data type, but we use comparisons to signed
208 								 * integers in the code. They would need to be
209 								 * changed as well. */
210 
211 	/* pointer to text value of operand, must correlate with WordEntry */
212 	uint32
213 				length:12,
214 				distance:20;
215 } QueryOperand;
216 
217 
218 /*
219  * Legal values for QueryOperator.operator.
220  */
221 #define OP_NOT			1
222 #define OP_AND			2
223 #define OP_OR			3
224 #define OP_PHRASE		4		/* highest code, tsquery_cleanup.c */
225 #define OP_COUNT		4
226 
227 extern const int tsearch_op_priority[OP_COUNT];
228 
229 /* get operation priority  by its code*/
230 #define OP_PRIORITY(x)	( tsearch_op_priority[(x) - 1] )
231 /* get QueryOperator priority */
232 #define QO_PRIORITY(x)	OP_PRIORITY(((QueryOperator *) (x))->oper)
233 
234 typedef struct
235 {
236 	QueryItemType type;
237 	int8		oper;			/* see above */
238 	int16		distance;		/* distance between agrs for OP_PHRASE */
239 	uint32		left;			/* pointer to left operand. Right operand is
240 								 * item + 1, left operand is placed
241 								 * item+item->left */
242 } QueryOperator;
243 
244 /*
245  * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
246  * inside QueryItem requiring 8-byte alignment, like int64.
247  */
248 typedef union
249 {
250 	QueryItemType type;
251 	QueryOperator qoperator;
252 	QueryOperand qoperand;
253 } QueryItem;
254 
255 /*
256  * Storage:
257  *	(len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
258  */
259 
260 typedef struct
261 {
262 	int32		vl_len_;		/* varlena header (do not touch directly!) */
263 	int32		size;			/* number of QueryItems */
264 	char		data[FLEXIBLE_ARRAY_MEMBER];	/* data starts here */
265 } TSQueryData;
266 
267 typedef TSQueryData *TSQuery;
268 
269 #define HDRSIZETQ	( VARHDRSZ + sizeof(int32) )
270 
271 /* Computes the size of header and all QueryItems. size is the number of
272  * QueryItems, and lenofoperand is the total length of all operands
273  */
274 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
275 #define TSQUERY_TOO_BIG(size, lenofoperand) \
276 	((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
277 
278 /* Returns a pointer to the first QueryItem in a TSQuery */
279 #define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
280 
281 /* Returns a pointer to the beginning of operands in a TSQuery */
282 #define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
283 
284 /*
285  * fmgr interface macros
286  * Note, TSQuery type marked as plain storage, so it can't be toasted
287  * but PG_DETOAST_DATUM_COPY is used for simplicity
288  */
289 
290 #define DatumGetTSQuery(X)			((TSQuery) DatumGetPointer(X))
291 #define DatumGetTSQueryCopy(X)		((TSQuery) PG_DETOAST_DATUM_COPY(X))
292 #define TSQueryGetDatum(X)			PointerGetDatum(X)
293 #define PG_GETARG_TSQUERY(n)		DatumGetTSQuery(PG_GETARG_DATUM(n))
294 #define PG_GETARG_TSQUERY_COPY(n)	DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
295 #define PG_RETURN_TSQUERY(x)		return TSQueryGetDatum(x)
296 
297 /*
298  * I/O
299  */
300 extern Datum tsqueryin(PG_FUNCTION_ARGS);
301 extern Datum tsqueryout(PG_FUNCTION_ARGS);
302 extern Datum tsquerysend(PG_FUNCTION_ARGS);
303 extern Datum tsqueryrecv(PG_FUNCTION_ARGS);
304 
305 /*
306  * operations with tsquery
307  */
308 extern Datum tsquery_lt(PG_FUNCTION_ARGS);
309 extern Datum tsquery_le(PG_FUNCTION_ARGS);
310 extern Datum tsquery_eq(PG_FUNCTION_ARGS);
311 extern Datum tsquery_ne(PG_FUNCTION_ARGS);
312 extern Datum tsquery_ge(PG_FUNCTION_ARGS);
313 extern Datum tsquery_gt(PG_FUNCTION_ARGS);
314 extern Datum tsquery_cmp(PG_FUNCTION_ARGS);
315 
316 extern Datum tsquerytree(PG_FUNCTION_ARGS);
317 extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
318 
319 extern Datum tsquery_and(PG_FUNCTION_ARGS);
320 extern Datum tsquery_or(PG_FUNCTION_ARGS);
321 extern Datum tsquery_phrase(PG_FUNCTION_ARGS);
322 extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS);
323 extern Datum tsquery_not(PG_FUNCTION_ARGS);
324 
325 extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
326 extern Datum tsquery_rewrite_query(PG_FUNCTION_ARGS);
327 
328 extern Datum tsq_mcontains(PG_FUNCTION_ARGS);
329 extern Datum tsq_mcontained(PG_FUNCTION_ARGS);
330 
331 #endif   /* _PG_TSTYPE_H_ */
332