1 /* 2 * contrib/pg_trgm/trgm_gin.c 3 */ 4 #include "postgres.h" 5 6 #include "trgm.h" 7 8 #include "access/gin.h" 9 #include "access/stratnum.h" 10 #include "fmgr.h" 11 12 13 PG_FUNCTION_INFO_V1(gin_extract_trgm); 14 PG_FUNCTION_INFO_V1(gin_extract_value_trgm); 15 PG_FUNCTION_INFO_V1(gin_extract_query_trgm); 16 PG_FUNCTION_INFO_V1(gin_trgm_consistent); 17 PG_FUNCTION_INFO_V1(gin_trgm_triconsistent); 18 19 /* 20 * This function can only be called if a pre-9.1 version of the GIN operator 21 * class definition is present in the catalogs (probably as a consequence 22 * of upgrade-in-place). Cope. 23 */ 24 Datum 25 gin_extract_trgm(PG_FUNCTION_ARGS) 26 { 27 if (PG_NARGS() == 3) 28 return gin_extract_value_trgm(fcinfo); 29 if (PG_NARGS() == 7) 30 return gin_extract_query_trgm(fcinfo); 31 elog(ERROR, "unexpected number of arguments to gin_extract_trgm"); 32 PG_RETURN_NULL(); 33 } 34 35 Datum 36 gin_extract_value_trgm(PG_FUNCTION_ARGS) 37 { 38 text *val = (text *) PG_GETARG_TEXT_PP(0); 39 int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 40 Datum *entries = NULL; 41 TRGM *trg; 42 int32 trglen; 43 44 *nentries = 0; 45 46 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); 47 trglen = ARRNELEM(trg); 48 49 if (trglen > 0) 50 { 51 trgm *ptr; 52 int32 i; 53 54 *nentries = trglen; 55 entries = (Datum *) palloc(sizeof(Datum) * trglen); 56 57 ptr = GETARR(trg); 58 for (i = 0; i < trglen; i++) 59 { 60 int32 item = trgm2int(ptr); 61 62 entries[i] = Int32GetDatum(item); 63 ptr++; 64 } 65 } 66 67 PG_RETURN_POINTER(entries); 68 } 69 70 Datum 71 gin_extract_query_trgm(PG_FUNCTION_ARGS) 72 { 73 text *val = (text *) PG_GETARG_TEXT_PP(0); 74 int32 *nentries = (int32 *) PG_GETARG_POINTER(1); 75 StrategyNumber strategy = PG_GETARG_UINT16(2); 76 77 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */ 78 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); 79 80 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ 81 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); 82 Datum *entries = NULL; 83 TRGM *trg; 84 int32 trglen; 85 trgm *ptr; 86 TrgmPackedGraph *graph; 87 int32 i; 88 89 switch (strategy) 90 { 91 case SimilarityStrategyNumber: 92 case WordSimilarityStrategyNumber: 93 case StrictWordSimilarityStrategyNumber: 94 trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); 95 break; 96 case ILikeStrategyNumber: 97 #ifndef IGNORECASE 98 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 99 #endif 100 /* FALL THRU */ 101 case LikeStrategyNumber: 102 103 /* 104 * For wildcard search we extract all the trigrams that every 105 * potentially-matching string must include. 106 */ 107 trg = generate_wildcard_trgm(VARDATA_ANY(val), 108 VARSIZE_ANY_EXHDR(val)); 109 break; 110 case RegExpICaseStrategyNumber: 111 #ifndef IGNORECASE 112 elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 113 #endif 114 /* FALL THRU */ 115 case RegExpStrategyNumber: 116 trg = createTrgmNFA(val, PG_GET_COLLATION(), 117 &graph, CurrentMemoryContext); 118 if (trg && ARRNELEM(trg) > 0) 119 { 120 /* 121 * Successful regex processing: store NFA-like graph as 122 * extra_data. GIN API requires an array of nentries 123 * Pointers, but we just put the same value in each element. 124 */ 125 trglen = ARRNELEM(trg); 126 *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen); 127 for (i = 0; i < trglen; i++) 128 (*extra_data)[i] = (Pointer) graph; 129 } 130 else 131 { 132 /* No result: have to do full index scan. */ 133 *nentries = 0; 134 *searchMode = GIN_SEARCH_MODE_ALL; 135 PG_RETURN_POINTER(entries); 136 } 137 break; 138 default: 139 elog(ERROR, "unrecognized strategy number: %d", strategy); 140 trg = NULL; /* keep compiler quiet */ 141 break; 142 } 143 144 trglen = ARRNELEM(trg); 145 *nentries = trglen; 146 147 if (trglen > 0) 148 { 149 entries = (Datum *) palloc(sizeof(Datum) * trglen); 150 ptr = GETARR(trg); 151 for (i = 0; i < trglen; i++) 152 { 153 int32 item = trgm2int(ptr); 154 155 entries[i] = Int32GetDatum(item); 156 ptr++; 157 } 158 } 159 160 /* 161 * If no trigram was extracted then we have to scan all the index. 162 */ 163 if (trglen == 0) 164 *searchMode = GIN_SEARCH_MODE_ALL; 165 166 PG_RETURN_POINTER(entries); 167 } 168 169 Datum 170 gin_trgm_consistent(PG_FUNCTION_ARGS) 171 { 172 bool *check = (bool *) PG_GETARG_POINTER(0); 173 StrategyNumber strategy = PG_GETARG_UINT16(1); 174 175 /* text *query = PG_GETARG_TEXT_PP(2); */ 176 int32 nkeys = PG_GETARG_INT32(3); 177 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 178 bool *recheck = (bool *) PG_GETARG_POINTER(5); 179 bool res; 180 int32 i, 181 ntrue; 182 double nlimit; 183 184 /* All cases served by this function are inexact */ 185 *recheck = true; 186 187 switch (strategy) 188 { 189 case SimilarityStrategyNumber: 190 case WordSimilarityStrategyNumber: 191 case StrictWordSimilarityStrategyNumber: 192 nlimit = index_strategy_get_limit(strategy); 193 194 /* Count the matches */ 195 ntrue = 0; 196 for (i = 0; i < nkeys; i++) 197 { 198 if (check[i]) 199 ntrue++; 200 } 201 202 /*-------------------- 203 * If DIVUNION is defined then similarity formula is: 204 * c / (len1 + len2 - c) 205 * where c is number of common trigrams and it stands as ntrue in 206 * this code. Here we don't know value of len2 but we can assume 207 * that c (ntrue) is a lower bound of len2, so upper bound of 208 * similarity is: 209 * c / (len1 + c - c) => c / len1 210 * If DIVUNION is not defined then similarity formula is: 211 * c / max(len1, len2) 212 * And again, c (ntrue) is a lower bound of len2, but c <= len1 213 * just by definition and, consequently, upper bound of 214 * similarity is just c / len1. 215 * So, independently on DIVUNION the upper bound formula is the same. 216 */ 217 res = (nkeys == 0) ? false : 218 (((((float4) ntrue) / ((float4) nkeys))) >= nlimit); 219 break; 220 case ILikeStrategyNumber: 221 #ifndef IGNORECASE 222 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 223 #endif 224 /* FALL THRU */ 225 case LikeStrategyNumber: 226 /* Check if all extracted trigrams are presented. */ 227 res = true; 228 for (i = 0; i < nkeys; i++) 229 { 230 if (!check[i]) 231 { 232 res = false; 233 break; 234 } 235 } 236 break; 237 case RegExpICaseStrategyNumber: 238 #ifndef IGNORECASE 239 elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 240 #endif 241 /* FALL THRU */ 242 case RegExpStrategyNumber: 243 if (nkeys < 1) 244 { 245 /* Regex processing gave no result: do full index scan */ 246 res = true; 247 } 248 else 249 res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 250 check); 251 break; 252 default: 253 elog(ERROR, "unrecognized strategy number: %d", strategy); 254 res = false; /* keep compiler quiet */ 255 break; 256 } 257 258 PG_RETURN_BOOL(res); 259 } 260 261 /* 262 * In all cases, GIN_TRUE is at least as favorable to inclusion as 263 * GIN_MAYBE. If no better option is available, simply treat 264 * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary 265 * consistent function. 266 */ 267 Datum 268 gin_trgm_triconsistent(PG_FUNCTION_ARGS) 269 { 270 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); 271 StrategyNumber strategy = PG_GETARG_UINT16(1); 272 273 /* text *query = PG_GETARG_TEXT_PP(2); */ 274 int32 nkeys = PG_GETARG_INT32(3); 275 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); 276 GinTernaryValue res = GIN_MAYBE; 277 int32 i, 278 ntrue; 279 bool *boolcheck; 280 double nlimit; 281 282 switch (strategy) 283 { 284 case SimilarityStrategyNumber: 285 case WordSimilarityStrategyNumber: 286 case StrictWordSimilarityStrategyNumber: 287 nlimit = index_strategy_get_limit(strategy); 288 289 /* Count the matches */ 290 ntrue = 0; 291 for (i = 0; i < nkeys; i++) 292 { 293 if (check[i] != GIN_FALSE) 294 ntrue++; 295 } 296 297 /* 298 * See comment in gin_trgm_consistent() about * upper bound 299 * formula 300 */ 301 res = (nkeys == 0) 302 ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit) 303 ? GIN_MAYBE : GIN_FALSE); 304 break; 305 case ILikeStrategyNumber: 306 #ifndef IGNORECASE 307 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams"); 308 #endif 309 /* FALL THRU */ 310 case LikeStrategyNumber: 311 /* Check if all extracted trigrams are presented. */ 312 res = GIN_MAYBE; 313 for (i = 0; i < nkeys; i++) 314 { 315 if (check[i] == GIN_FALSE) 316 { 317 res = GIN_FALSE; 318 break; 319 } 320 } 321 break; 322 case RegExpICaseStrategyNumber: 323 #ifndef IGNORECASE 324 elog(ERROR, "cannot handle ~* with case-sensitive trigrams"); 325 #endif 326 /* FALL THRU */ 327 case RegExpStrategyNumber: 328 if (nkeys < 1) 329 { 330 /* Regex processing gave no result: do full index scan */ 331 res = GIN_MAYBE; 332 } 333 else 334 { 335 /* 336 * As trigramsMatchGraph implements a monotonic boolean 337 * function, promoting all GIN_MAYBE keys to GIN_TRUE will 338 * give a conservative result. 339 */ 340 boolcheck = (bool *) palloc(sizeof(bool) * nkeys); 341 for (i = 0; i < nkeys; i++) 342 boolcheck[i] = (check[i] != GIN_FALSE); 343 if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0], 344 boolcheck)) 345 res = GIN_FALSE; 346 pfree(boolcheck); 347 } 348 break; 349 default: 350 elog(ERROR, "unrecognized strategy number: %d", strategy); 351 res = GIN_FALSE; /* keep compiler quiet */ 352 break; 353 } 354 355 /* All cases served by this function are inexact */ 356 Assert(res != GIN_TRUE); 357 PG_RETURN_GIN_TERNARY_VALUE(res); 358 } 359