1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  *	  like expression handling code.
5  *
6  *	 NOTES
7  *		A big hack of the regexp.c code!! Contributed by
8  *		Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  *	src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
27 
28 
29 #define LIKE_TRUE						1
30 #define LIKE_FALSE						0
31 #define LIKE_ABORT						(-1)
32 
33 
34 static int SB_MatchText(char *t, int tlen, char *p, int plen,
35 			 pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *, text *);
37 
38 static int MB_MatchText(char *t, int tlen, char *p, int plen,
39 			 pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *, text *);
41 
42 static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
43 			   pg_locale_t locale, bool locale_is_c);
44 
45 static int SB_IMatchText(char *t, int tlen, char *p, int plen,
46 			  pg_locale_t locale, bool locale_is_c);
47 
48 static int	GenericMatchText(char *s, int slen, char *p, int plen);
49 static int	Generic_Text_IC_like(text *str, text *pat, Oid collation);
50 
51 /*--------------------
52  * Support routine for MatchText. Compares given multibyte streams
53  * as wide characters. If they match, returns 1 otherwise returns 0.
54  *--------------------
55  */
56 static inline int
wchareq(char * p1,char * p2)57 wchareq(char *p1, char *p2)
58 {
59 	int			p1_len;
60 
61 	/* Optimization:  quickly compare the first byte. */
62 	if (*p1 != *p2)
63 		return 0;
64 
65 	p1_len = pg_mblen(p1);
66 	if (pg_mblen(p2) != p1_len)
67 		return 0;
68 
69 	/* They are the same length */
70 	while (p1_len--)
71 	{
72 		if (*p1++ != *p2++)
73 			return 0;
74 	}
75 	return 1;
76 }
77 
78 /*
79  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80  * comparison of multibyte characters.  It did not work at all, however,
81  * because it relied on tolower() which has a single-byte API ... and
82  * towlower() wouldn't be much better since we have no suitably cheap way
83  * of getting a single character transformed to the system's wchar_t format.
84  * So now, we just downcase the strings using lower() and apply regular LIKE
85  * comparison.  This should be revisited when we install better locale support.
86  */
87 
88 /*
89  * We do handle case-insensitive matching for single-byte encodings using
90  * fold-on-the-fly processing, however.
91  */
92 static char
SB_lower_char(unsigned char c,pg_locale_t locale,bool locale_is_c)93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
94 {
95 	if (locale_is_c)
96 		return pg_ascii_tolower(c);
97 #ifdef HAVE_LOCALE_T
98 	else if (locale)
99 		return tolower_l(c, locale->info.lt);
100 #endif
101 	else
102 		return pg_tolower(c);
103 }
104 
105 
106 #define NextByte(p, plen)	((p)++, (plen)--)
107 
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111 	do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113 	do { int __l = pg_mblen(src); \
114 		 (srclen) -= __l; \
115 		 while (__l-- > 0) \
116 			 *(dst)++ = *(src)++; \
117 	   } while (0)
118 
119 #define MatchText	MB_MatchText
120 #define do_like_escape	MB_do_like_escape
121 
122 #include "like_match.c"
123 
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128 
129 #define MatchText	SB_MatchText
130 #define do_like_escape	SB_do_like_escape
131 
132 #include "like_match.c"
133 
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
138 
139 #include "like_match.c"
140 
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142 
143 #define NextChar(p, plen) \
144 	do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText	UTF8_MatchText
146 
147 #include "like_match.c"
148 
149 /* Generic for all cases not requiring inline case-folding */
150 static inline int
GenericMatchText(char * s,int slen,char * p,int plen)151 GenericMatchText(char *s, int slen, char *p, int plen)
152 {
153 	if (pg_database_encoding_max_length() == 1)
154 		return SB_MatchText(s, slen, p, plen, 0, true);
155 	else if (GetDatabaseEncoding() == PG_UTF8)
156 		return UTF8_MatchText(s, slen, p, plen, 0, true);
157 	else
158 		return MB_MatchText(s, slen, p, plen, 0, true);
159 }
160 
161 static inline int
Generic_Text_IC_like(text * str,text * pat,Oid collation)162 Generic_Text_IC_like(text *str, text *pat, Oid collation)
163 {
164 	char	   *s,
165 			   *p;
166 	int			slen,
167 				plen;
168 	pg_locale_t locale = 0;
169 	bool		locale_is_c = false;
170 
171 	if (lc_ctype_is_c(collation))
172 		locale_is_c = true;
173 	else if (collation != DEFAULT_COLLATION_OID)
174 	{
175 		if (!OidIsValid(collation))
176 		{
177 			/*
178 			 * This typically means that the parser could not resolve a
179 			 * conflict of implicit collations, so report it that way.
180 			 */
181 			ereport(ERROR,
182 					(errcode(ERRCODE_INDETERMINATE_COLLATION),
183 					 errmsg("could not determine which collation to use for ILIKE"),
184 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
185 		}
186 		locale = pg_newlocale_from_collation(collation);
187 	}
188 
189 	/*
190 	 * For efficiency reasons, in the single byte case we don't call lower()
191 	 * on the pattern and text, but instead call SB_lower_char on each
192 	 * character.  In the multi-byte case we don't have much choice :-(. Also,
193 	 * ICU does not support single-character case folding, so we go the long
194 	 * way.
195 	 */
196 
197 	if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
198 	{
199 		/* lower's result is never packed, so OK to use old macros here */
200 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
201 													 PointerGetDatum(pat)));
202 		p = VARDATA_ANY(pat);
203 		plen = VARSIZE_ANY_EXHDR(pat);
204 		str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
205 													 PointerGetDatum(str)));
206 		s = VARDATA_ANY(str);
207 		slen = VARSIZE_ANY_EXHDR(str);
208 		if (GetDatabaseEncoding() == PG_UTF8)
209 			return UTF8_MatchText(s, slen, p, plen, 0, true);
210 		else
211 			return MB_MatchText(s, slen, p, plen, 0, true);
212 	}
213 	else
214 	{
215 		p = VARDATA_ANY(pat);
216 		plen = VARSIZE_ANY_EXHDR(pat);
217 		s = VARDATA_ANY(str);
218 		slen = VARSIZE_ANY_EXHDR(str);
219 		return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
220 	}
221 }
222 
223 /*
224  *	interface routines called by the function manager
225  */
226 
227 Datum
namelike(PG_FUNCTION_ARGS)228 namelike(PG_FUNCTION_ARGS)
229 {
230 	Name		str = PG_GETARG_NAME(0);
231 	text	   *pat = PG_GETARG_TEXT_PP(1);
232 	bool		result;
233 	char	   *s,
234 			   *p;
235 	int			slen,
236 				plen;
237 
238 	s = NameStr(*str);
239 	slen = strlen(s);
240 	p = VARDATA_ANY(pat);
241 	plen = VARSIZE_ANY_EXHDR(pat);
242 
243 	result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
244 
245 	PG_RETURN_BOOL(result);
246 }
247 
248 Datum
namenlike(PG_FUNCTION_ARGS)249 namenlike(PG_FUNCTION_ARGS)
250 {
251 	Name		str = PG_GETARG_NAME(0);
252 	text	   *pat = PG_GETARG_TEXT_PP(1);
253 	bool		result;
254 	char	   *s,
255 			   *p;
256 	int			slen,
257 				plen;
258 
259 	s = NameStr(*str);
260 	slen = strlen(s);
261 	p = VARDATA_ANY(pat);
262 	plen = VARSIZE_ANY_EXHDR(pat);
263 
264 	result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
265 
266 	PG_RETURN_BOOL(result);
267 }
268 
269 Datum
textlike(PG_FUNCTION_ARGS)270 textlike(PG_FUNCTION_ARGS)
271 {
272 	text	   *str = PG_GETARG_TEXT_PP(0);
273 	text	   *pat = PG_GETARG_TEXT_PP(1);
274 	bool		result;
275 	char	   *s,
276 			   *p;
277 	int			slen,
278 				plen;
279 
280 	s = VARDATA_ANY(str);
281 	slen = VARSIZE_ANY_EXHDR(str);
282 	p = VARDATA_ANY(pat);
283 	plen = VARSIZE_ANY_EXHDR(pat);
284 
285 	result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
286 
287 	PG_RETURN_BOOL(result);
288 }
289 
290 Datum
textnlike(PG_FUNCTION_ARGS)291 textnlike(PG_FUNCTION_ARGS)
292 {
293 	text	   *str = PG_GETARG_TEXT_PP(0);
294 	text	   *pat = PG_GETARG_TEXT_PP(1);
295 	bool		result;
296 	char	   *s,
297 			   *p;
298 	int			slen,
299 				plen;
300 
301 	s = VARDATA_ANY(str);
302 	slen = VARSIZE_ANY_EXHDR(str);
303 	p = VARDATA_ANY(pat);
304 	plen = VARSIZE_ANY_EXHDR(pat);
305 
306 	result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
307 
308 	PG_RETURN_BOOL(result);
309 }
310 
311 Datum
bytealike(PG_FUNCTION_ARGS)312 bytealike(PG_FUNCTION_ARGS)
313 {
314 	bytea	   *str = PG_GETARG_BYTEA_PP(0);
315 	bytea	   *pat = PG_GETARG_BYTEA_PP(1);
316 	bool		result;
317 	char	   *s,
318 			   *p;
319 	int			slen,
320 				plen;
321 
322 	s = VARDATA_ANY(str);
323 	slen = VARSIZE_ANY_EXHDR(str);
324 	p = VARDATA_ANY(pat);
325 	plen = VARSIZE_ANY_EXHDR(pat);
326 
327 	result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
328 
329 	PG_RETURN_BOOL(result);
330 }
331 
332 Datum
byteanlike(PG_FUNCTION_ARGS)333 byteanlike(PG_FUNCTION_ARGS)
334 {
335 	bytea	   *str = PG_GETARG_BYTEA_PP(0);
336 	bytea	   *pat = PG_GETARG_BYTEA_PP(1);
337 	bool		result;
338 	char	   *s,
339 			   *p;
340 	int			slen,
341 				plen;
342 
343 	s = VARDATA_ANY(str);
344 	slen = VARSIZE_ANY_EXHDR(str);
345 	p = VARDATA_ANY(pat);
346 	plen = VARSIZE_ANY_EXHDR(pat);
347 
348 	result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
349 
350 	PG_RETURN_BOOL(result);
351 }
352 
353 /*
354  * Case-insensitive versions
355  */
356 
357 Datum
nameiclike(PG_FUNCTION_ARGS)358 nameiclike(PG_FUNCTION_ARGS)
359 {
360 	Name		str = PG_GETARG_NAME(0);
361 	text	   *pat = PG_GETARG_TEXT_PP(1);
362 	bool		result;
363 	text	   *strtext;
364 
365 	strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
366 												 NameGetDatum(str)));
367 	result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
368 
369 	PG_RETURN_BOOL(result);
370 }
371 
372 Datum
nameicnlike(PG_FUNCTION_ARGS)373 nameicnlike(PG_FUNCTION_ARGS)
374 {
375 	Name		str = PG_GETARG_NAME(0);
376 	text	   *pat = PG_GETARG_TEXT_PP(1);
377 	bool		result;
378 	text	   *strtext;
379 
380 	strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
381 												 NameGetDatum(str)));
382 	result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
383 
384 	PG_RETURN_BOOL(result);
385 }
386 
387 Datum
texticlike(PG_FUNCTION_ARGS)388 texticlike(PG_FUNCTION_ARGS)
389 {
390 	text	   *str = PG_GETARG_TEXT_PP(0);
391 	text	   *pat = PG_GETARG_TEXT_PP(1);
392 	bool		result;
393 
394 	result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
395 
396 	PG_RETURN_BOOL(result);
397 }
398 
399 Datum
texticnlike(PG_FUNCTION_ARGS)400 texticnlike(PG_FUNCTION_ARGS)
401 {
402 	text	   *str = PG_GETARG_TEXT_PP(0);
403 	text	   *pat = PG_GETARG_TEXT_PP(1);
404 	bool		result;
405 
406 	result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
407 
408 	PG_RETURN_BOOL(result);
409 }
410 
411 /*
412  * like_escape() --- given a pattern and an ESCAPE string,
413  * convert the pattern to use Postgres' standard backslash escape convention.
414  */
415 Datum
like_escape(PG_FUNCTION_ARGS)416 like_escape(PG_FUNCTION_ARGS)
417 {
418 	text	   *pat = PG_GETARG_TEXT_PP(0);
419 	text	   *esc = PG_GETARG_TEXT_PP(1);
420 	text	   *result;
421 
422 	if (pg_database_encoding_max_length() == 1)
423 		result = SB_do_like_escape(pat, esc);
424 	else
425 		result = MB_do_like_escape(pat, esc);
426 
427 	PG_RETURN_TEXT_P(result);
428 }
429 
430 /*
431  * like_escape_bytea() --- given a pattern and an ESCAPE string,
432  * convert the pattern to use Postgres' standard backslash escape convention.
433  */
434 Datum
like_escape_bytea(PG_FUNCTION_ARGS)435 like_escape_bytea(PG_FUNCTION_ARGS)
436 {
437 	bytea	   *pat = PG_GETARG_BYTEA_PP(0);
438 	bytea	   *esc = PG_GETARG_BYTEA_PP(1);
439 	bytea	   *result = SB_do_like_escape((text *) pat, (text *) esc);
440 
441 	PG_RETURN_BYTEA_P((bytea *) result);
442 }
443