1 //
2 // Copyright (c) 2016, Riccardo Vianello
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of the authors nor the names of their contributors
16 // may be used to endorse or promote products derived from this software
17 // without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31
32 #include <postgres.h>
33
34 #include <access/gin.h>
35 #if PG_VERSION_NUM >= 90500
36 #include <access/stratnum.h>
37 #else
38 #include <access/skey.h>
39 #endif
40 #include <fmgr.h>
41
42 #include "rdkit.h"
43 #include "bitstring.h"
44 #include "guc.h"
45
gin_bfp_extract(Bfp * bfp,int32 * nkeys)46 static Datum *gin_bfp_extract(Bfp *bfp, int32 *nkeys) {
47 Datum *keys = NULL;
48
49 int32 weight, siglen = BFP_SIGLEN(bfp);
50 uint8 *fp = (uint8 *)VARDATA(bfp);
51
52 *nkeys = weight = bitstringWeight(siglen, fp);
53
54 if (weight != 0) {
55 int32 i, j, keycount;
56
57 keys = palloc(sizeof(Datum) * weight);
58
59 for (keycount = 0, i = 0; i < siglen; ++i) {
60 uint8 byte = fp[i];
61 for (j = 0; j < 8; ++j) {
62 if (byte & 0x01) {
63 int32 key = 8 * i + j;
64 keys[keycount++] = Int32GetDatum(key);
65 }
66 byte >>= 1;
67 }
68 }
69 }
70 return keys;
71 }
72
73 PGDLLEXPORT Datum gin_bfp_extract_value(PG_FUNCTION_ARGS);
74 PG_FUNCTION_INFO_V1(gin_bfp_extract_value);
gin_bfp_extract_value(PG_FUNCTION_ARGS)75 Datum gin_bfp_extract_value(PG_FUNCTION_ARGS) {
76 Bfp *bfp = PG_GETARG_BFP_P(0);
77 int32 *nkeys = (int32 *)PG_GETARG_POINTER(1);
78
79 PG_RETURN_POINTER(gin_bfp_extract(bfp, nkeys));
80 }
81
82 PGDLLEXPORT Datum gin_bfp_extract_query(PG_FUNCTION_ARGS);
83 PG_FUNCTION_INFO_V1(gin_bfp_extract_query);
gin_bfp_extract_query(PG_FUNCTION_ARGS)84 Datum gin_bfp_extract_query(PG_FUNCTION_ARGS) {
85 Bfp *bfp = PG_GETARG_BFP_P(0);
86 int32 *nkeys = (int32 *)PG_GETARG_POINTER(1);
87 /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
88 /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
89 /* Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); */
90 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
91 int32 *searchMode = (int32 *)PG_GETARG_POINTER(6);
92
93 Datum *keys = gin_bfp_extract(bfp, nkeys);
94
95 if (*nkeys == 0) {
96 *searchMode = GIN_SEARCH_MODE_ALL;
97 }
98
99 PG_RETURN_POINTER(keys);
100 }
101
102 PGDLLEXPORT Datum gin_bfp_consistent(PG_FUNCTION_ARGS);
103 PG_FUNCTION_INFO_V1(gin_bfp_consistent);
gin_bfp_consistent(PG_FUNCTION_ARGS)104 Datum gin_bfp_consistent(PG_FUNCTION_ARGS) {
105 bool *check = (bool *)PG_GETARG_POINTER(0);
106 StrategyNumber strategy = PG_GETARG_UINT16(1);
107 /* Bfp *query = PG_GETARG_BFP_P(2); */
108 int32 nkeys = PG_GETARG_INT32(3);
109 /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
110 bool *recheck = (bool *)PG_GETARG_POINTER(5);
111 /* Datum * queryKeys = PG_GETARG_POINTER(6); */
112 /* bool *nullFlags = (bool *) PG_GETARG_POINTER(7); */
113
114 double threshold;
115 bool result;
116
117 int32 i, nCommon = 0;
118 for (i = 0; i < nkeys; ++i) {
119 if (check[i] == true) {
120 ++nCommon;
121 }
122 }
123
124 switch (strategy) {
125 case RDKitTanimotoStrategy:
126 /*
127 * Nsame / (Na + Nb - Nsame)
128 */
129 threshold = getTanimotoLimit();
130 result = nCommon >= threshold * nkeys;
131 break;
132 case RDKitDiceStrategy:
133 /*
134 * 2 * Nsame / (Na + Nb)
135 */
136 threshold = getDiceLimit();
137 result = 2.0 * nCommon >= threshold * (nCommon + nkeys);
138 break;
139 default:
140 elog(ERROR, "Unknown strategy: %d", strategy);
141 }
142
143 *recheck = result;
144
145 PG_RETURN_BOOL(result);
146 }
147
148 PGDLLEXPORT Datum gin_bfp_triconsistent(PG_FUNCTION_ARGS);
149 PG_FUNCTION_INFO_V1(gin_bfp_triconsistent);
gin_bfp_triconsistent(PG_FUNCTION_ARGS)150 Datum gin_bfp_triconsistent(PG_FUNCTION_ARGS) {
151 #if PG_VERSION_NUM >= 90300
152 /*
153
154 */
155 GinTernaryValue *check = (GinTernaryValue *)PG_GETARG_POINTER(0);
156 StrategyNumber strategy = PG_GETARG_UINT16(1);
157 /* Bfp *query = PG_GETARG_BFP_P(2); */
158 int32 nkeys = PG_GETARG_INT32(3);
159 /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
160 /* Datum * queryKeys = PG_GETARG_POINTER(5); */
161 /* bool *nullFlags = (bool *) PG_GETARG_POINTER(6); */
162
163 double threshold;
164 GinTernaryValue result = GIN_MAYBE;
165
166 int32 i, nCommon = 0, nCommonMaybe = 0;
167 for (i = 0; i < nkeys; ++i) {
168 if (check[i] == GIN_TRUE) {
169 ++nCommon;
170 ++nCommonMaybe;
171 } else if (check[i] == GIN_MAYBE) {
172 ++nCommonMaybe;
173 }
174 }
175
176 switch (strategy) {
177 case RDKitTanimotoStrategy:
178 /*
179 * Nsame / (Na + Nb - Nsame)
180 */
181 threshold = getTanimotoLimit();
182 if (nCommonMaybe < threshold * nkeys) {
183 result = GIN_FALSE;
184 }
185 break;
186 case RDKitDiceStrategy:
187 /*
188 * 2 * Nsame / (Na + Nb)
189 */
190 threshold = getDiceLimit();
191 if (2.0 * nCommonMaybe < threshold * (nCommonMaybe + nkeys)) {
192 result = GIN_FALSE;
193 }
194 break;
195 default:
196 elog(ERROR, "Unknown strategy: %d", strategy);
197 }
198
199 PG_RETURN_GIN_TERNARY_VALUE(result);
200 #endif
201 }
202