1 /*-------------------------------------------------------------------------
2  *
3  * ts_type.h
4  *	  Definitions for the tsvector and tsquery types
5  *
6  * Copyright (c) 1998-2017, PostgreSQL Global Development Group
7  *
8  * src/include/tsearch/ts_type.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef _PG_TSTYPE_H_
13 #define _PG_TSTYPE_H_
14 
15 #include "fmgr.h"
16 #include "utils/memutils.h"
17 
18 
19 /*
20  * TSVector type.
21  *
22  * Structure of tsvector datatype:
23  * 1) standard varlena header
24  * 2) int32		size - number of lexemes (WordEntry array entries)
25  * 3) Array of WordEntry - one per lexeme; must be sorted according to
26  *				tsCompareString() (ie, memcmp of lexeme strings).
27  *				WordEntry->pos gives the number of bytes from end of WordEntry
28  *				array to start of lexeme's string, which is of length len.
29  * 4) Per-lexeme data storage:
30  *	  lexeme string (not null-terminated)
31  *	  if haspos is true:
32  *		padding byte if necessary to make the position data 2-byte aligned
33  *		uint16			number of positions that follow
34  *		WordEntryPos[]	positions
35  *
36  * The positions for each lexeme must be sorted.
37  *
38  * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
39  */
40 
41 typedef struct
42 {
43 	uint32
44 				haspos:1,
45 				len:11,			/* MAX 2Kb */
46 				pos:20;			/* MAX 1Mb */
47 } WordEntry;
48 
49 #define MAXSTRLEN ( (1<<11) - 1)
50 #define MAXSTRPOS ( (1<<20) - 1)
51 
52 extern int	compareWordEntryPos(const void *a, const void *b);
53 
54 /*
55  * Equivalent to
56  * typedef struct {
57  *		uint16
58  *			weight:2,
59  *			pos:14;
60  * }
61  */
62 
63 typedef uint16 WordEntryPos;
64 
65 typedef struct
66 {
67 	uint16		npos;
68 	WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
69 } WordEntryPosVector;
70 
71 /* WordEntryPosVector with exactly 1 entry */
72 typedef struct
73 {
74 	uint16		npos;
75 	WordEntryPos pos[1];
76 } WordEntryPosVector1;
77 
78 
79 #define WEP_GETWEIGHT(x)	( (x) >> 14 )
80 #define WEP_GETPOS(x)		( (x) & 0x3fff )
81 
82 #define WEP_SETWEIGHT(x,v)	( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
83 #define WEP_SETPOS(x,v)		( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
84 
85 #define MAXENTRYPOS (1<<14)
86 #define MAXNUMPOS	(256)
87 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
88 
89 /* This struct represents a complete tsvector datum */
90 typedef struct
91 {
92 	int32		vl_len_;		/* varlena header (do not touch directly!) */
93 	int32		size;
94 	WordEntry	entries[FLEXIBLE_ARRAY_MEMBER];
95 	/* lexemes follow the entries[] array */
96 } TSVectorData;
97 
98 typedef TSVectorData *TSVector;
99 
100 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
101 #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
102 
103 /* pointer to start of a tsvector's WordEntry array */
104 #define ARRPTR(x)	( (x)->entries )
105 
106 /* pointer to start of a tsvector's lexeme storage */
107 #define STRPTR(x)	( (char *) &(x)->entries[(x)->size] )
108 
109 #define _POSVECPTR(x, e)	((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
110 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
111 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
112 
113 /*
114  * fmgr interface macros
115  */
116 
117 #define DatumGetTSVector(X)			((TSVector) PG_DETOAST_DATUM(X))
118 #define DatumGetTSVectorCopy(X)		((TSVector) PG_DETOAST_DATUM_COPY(X))
119 #define TSVectorGetDatum(X)			PointerGetDatum(X)
120 #define PG_GETARG_TSVECTOR(n)		DatumGetTSVector(PG_GETARG_DATUM(n))
121 #define PG_GETARG_TSVECTOR_COPY(n)	DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
122 #define PG_RETURN_TSVECTOR(x)		return TSVectorGetDatum(x)
123 
124 
125 /*
126  * TSQuery
127  *
128  *
129  */
130 
131 typedef int8 QueryItemType;
132 
133 /* Valid values for QueryItemType: */
134 #define QI_VAL 1
135 #define QI_OPR 2
136 #define QI_VALSTOP 3			/* This is only used in an intermediate stack
137 								 * representation in parse_tsquery. It's not a
138 								 * legal type elsewhere. */
139 
140 /*
141  * QueryItem is one node in tsquery - operator or operand.
142  */
143 typedef struct
144 {
145 	QueryItemType type;			/* operand or kind of operator (ts_tokentype) */
146 	uint8		weight;			/* weights of operand to search. It's a
147 								 * bitmask of allowed weights. if it =0 then
148 								 * any weight are allowed. Weights and bit
149 								 * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
150 	bool		prefix;			/* true if it's a prefix search */
151 	int32		valcrc;			/* XXX: pg_crc32 would be a more appropriate
152 								 * data type, but we use comparisons to signed
153 								 * integers in the code. They would need to be
154 								 * changed as well. */
155 
156 	/* pointer to text value of operand, must correlate with WordEntry */
157 	uint32
158 				length:12,
159 				distance:20;
160 } QueryOperand;
161 
162 
163 /*
164  * Legal values for QueryOperator.operator.
165  */
166 #define OP_NOT			1
167 #define OP_AND			2
168 #define OP_OR			3
169 #define OP_PHRASE		4		/* highest code, tsquery_cleanup.c */
170 #define OP_COUNT		4
171 
172 extern const int tsearch_op_priority[OP_COUNT];
173 
174 /* get operation priority  by its code*/
175 #define OP_PRIORITY(x)	( tsearch_op_priority[(x) - 1] )
176 /* get QueryOperator priority */
177 #define QO_PRIORITY(x)	OP_PRIORITY(((QueryOperator *) (x))->oper)
178 
179 typedef struct
180 {
181 	QueryItemType type;
182 	int8		oper;			/* see above */
183 	int16		distance;		/* distance between agrs for OP_PHRASE */
184 	uint32		left;			/* pointer to left operand. Right operand is
185 								 * item + 1, left operand is placed
186 								 * item+item->left */
187 } QueryOperator;
188 
189 /*
190  * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
191  * inside QueryItem requiring 8-byte alignment, like int64.
192  */
193 typedef union
194 {
195 	QueryItemType type;
196 	QueryOperator qoperator;
197 	QueryOperand qoperand;
198 } QueryItem;
199 
200 /*
201  * Storage:
202  *	(len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
203  */
204 
205 typedef struct
206 {
207 	int32		vl_len_;		/* varlena header (do not touch directly!) */
208 	int32		size;			/* number of QueryItems */
209 	char		data[FLEXIBLE_ARRAY_MEMBER];	/* data starts here */
210 } TSQueryData;
211 
212 typedef TSQueryData *TSQuery;
213 
214 #define HDRSIZETQ	( VARHDRSZ + sizeof(int32) )
215 
216 /* Computes the size of header and all QueryItems. size is the number of
217  * QueryItems, and lenofoperand is the total length of all operands
218  */
219 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
220 #define TSQUERY_TOO_BIG(size, lenofoperand) \
221 	((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
222 
223 /* Returns a pointer to the first QueryItem in a TSQuery */
224 #define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
225 
226 /* Returns a pointer to the beginning of operands in a TSQuery */
227 #define GETOPERAND(x)	( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
228 
229 /*
230  * fmgr interface macros
231  * Note, TSQuery type marked as plain storage, so it can't be toasted
232  * but PG_DETOAST_DATUM_COPY is used for simplicity
233  */
234 
235 #define DatumGetTSQuery(X)			((TSQuery) DatumGetPointer(X))
236 #define DatumGetTSQueryCopy(X)		((TSQuery) PG_DETOAST_DATUM_COPY(X))
237 #define TSQueryGetDatum(X)			PointerGetDatum(X)
238 #define PG_GETARG_TSQUERY(n)		DatumGetTSQuery(PG_GETARG_DATUM(n))
239 #define PG_GETARG_TSQUERY_COPY(n)	DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
240 #define PG_RETURN_TSQUERY(x)		return TSQueryGetDatum(x)
241 
242 #endif							/* _PG_TSTYPE_H_ */
243