1 //
2 //  Copyright (c) 2016, Riccardo Vianello
3 //  All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 //       notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 //       copyright notice, this list of conditions and the following
13 //       disclaimer in the documentation and/or other materials provided
14 //       with the distribution.
15 //     * Neither the name of the authors nor the names of their contributors
16 //       may be used to endorse or promote products derived from this software
17 //       without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 
32 #include <postgres.h>
33 
34 #include <access/gin.h>
35 #if PG_VERSION_NUM >= 90500
36 #include <access/stratnum.h>
37 #else
38 #include <access/skey.h>
39 #endif
40 #include <fmgr.h>
41 
42 #include "rdkit.h"
43 #include "bitstring.h"
44 #include "guc.h"
45 
gin_bfp_extract(Bfp * bfp,int32 * nkeys)46 static Datum *gin_bfp_extract(Bfp *bfp, int32 *nkeys) {
47   Datum *keys = NULL;
48 
49   int32 weight, siglen = BFP_SIGLEN(bfp);
50   uint8 *fp = (uint8 *)VARDATA(bfp);
51 
52   *nkeys = weight = bitstringWeight(siglen, fp);
53 
54   if (weight != 0) {
55     int32 i, j, keycount;
56 
57     keys = palloc(sizeof(Datum) * weight);
58 
59     for (keycount = 0, i = 0; i < siglen; ++i) {
60       uint8 byte = fp[i];
61       for (j = 0; j < 8; ++j) {
62         if (byte & 0x01) {
63           int32 key = 8 * i + j;
64           keys[keycount++] = Int32GetDatum(key);
65         }
66         byte >>= 1;
67       }
68     }
69   }
70   return keys;
71 }
72 
73 PGDLLEXPORT Datum gin_bfp_extract_value(PG_FUNCTION_ARGS);
74 PG_FUNCTION_INFO_V1(gin_bfp_extract_value);
gin_bfp_extract_value(PG_FUNCTION_ARGS)75 Datum gin_bfp_extract_value(PG_FUNCTION_ARGS) {
76   Bfp *bfp = PG_GETARG_BFP_P(0);
77   int32 *nkeys = (int32 *)PG_GETARG_POINTER(1);
78 
79   PG_RETURN_POINTER(gin_bfp_extract(bfp, nkeys));
80 }
81 
82 PGDLLEXPORT Datum gin_bfp_extract_query(PG_FUNCTION_ARGS);
83 PG_FUNCTION_INFO_V1(gin_bfp_extract_query);
gin_bfp_extract_query(PG_FUNCTION_ARGS)84 Datum gin_bfp_extract_query(PG_FUNCTION_ARGS) {
85   Bfp *bfp = PG_GETARG_BFP_P(0);
86   int32 *nkeys = (int32 *)PG_GETARG_POINTER(1);
87   /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
88   /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
89   /* Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); */
90   /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
91   int32 *searchMode = (int32 *)PG_GETARG_POINTER(6);
92 
93   Datum *keys = gin_bfp_extract(bfp, nkeys);
94 
95   if (*nkeys == 0) {
96     *searchMode = GIN_SEARCH_MODE_ALL;
97   }
98 
99   PG_RETURN_POINTER(keys);
100 }
101 
102 PGDLLEXPORT Datum gin_bfp_consistent(PG_FUNCTION_ARGS);
103 PG_FUNCTION_INFO_V1(gin_bfp_consistent);
gin_bfp_consistent(PG_FUNCTION_ARGS)104 Datum gin_bfp_consistent(PG_FUNCTION_ARGS) {
105   bool *check = (bool *)PG_GETARG_POINTER(0);
106   StrategyNumber strategy = PG_GETARG_UINT16(1);
107   /* Bfp *query = PG_GETARG_BFP_P(2); */
108   int32 nkeys = PG_GETARG_INT32(3);
109   /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
110   bool *recheck = (bool *)PG_GETARG_POINTER(5);
111   /* Datum * queryKeys = PG_GETARG_POINTER(6); */
112   /* bool *nullFlags = (bool *) PG_GETARG_POINTER(7); */
113 
114   double threshold;
115   bool result;
116 
117   int32 i, nCommon = 0;
118   for (i = 0; i < nkeys; ++i) {
119     if (check[i] == true) {
120       ++nCommon;
121     }
122   }
123 
124   switch (strategy) {
125     case RDKitTanimotoStrategy:
126       /*
127        * Nsame / (Na + Nb - Nsame)
128        */
129       threshold = getTanimotoLimit();
130       result = nCommon >= threshold * nkeys;
131       break;
132     case RDKitDiceStrategy:
133       /*
134        * 2 * Nsame / (Na + Nb)
135        */
136       threshold = getDiceLimit();
137       result = 2.0 * nCommon >= threshold * (nCommon + nkeys);
138       break;
139     default:
140       elog(ERROR, "Unknown strategy: %d", strategy);
141   }
142 
143   *recheck = result;
144 
145   PG_RETURN_BOOL(result);
146 }
147 
148 PGDLLEXPORT Datum gin_bfp_triconsistent(PG_FUNCTION_ARGS);
149 PG_FUNCTION_INFO_V1(gin_bfp_triconsistent);
gin_bfp_triconsistent(PG_FUNCTION_ARGS)150 Datum gin_bfp_triconsistent(PG_FUNCTION_ARGS) {
151 #if PG_VERSION_NUM >= 90300
152   /*
153 
154    */
155   GinTernaryValue *check = (GinTernaryValue *)PG_GETARG_POINTER(0);
156   StrategyNumber strategy = PG_GETARG_UINT16(1);
157   /* Bfp *query = PG_GETARG_BFP_P(2); */
158   int32 nkeys = PG_GETARG_INT32(3);
159   /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
160   /* Datum * queryKeys = PG_GETARG_POINTER(5); */
161   /* bool *nullFlags = (bool *) PG_GETARG_POINTER(6); */
162 
163   double threshold;
164   GinTernaryValue result = GIN_MAYBE;
165 
166   int32 i, nCommon = 0, nCommonMaybe = 0;
167   for (i = 0; i < nkeys; ++i) {
168     if (check[i] == GIN_TRUE) {
169       ++nCommon;
170       ++nCommonMaybe;
171     } else if (check[i] == GIN_MAYBE) {
172       ++nCommonMaybe;
173     }
174   }
175 
176   switch (strategy) {
177     case RDKitTanimotoStrategy:
178       /*
179        * Nsame / (Na + Nb - Nsame)
180        */
181       threshold = getTanimotoLimit();
182       if (nCommonMaybe < threshold * nkeys) {
183         result = GIN_FALSE;
184       }
185       break;
186     case RDKitDiceStrategy:
187       /*
188        * 2 * Nsame / (Na + Nb)
189        */
190       threshold = getDiceLimit();
191       if (2.0 * nCommonMaybe < threshold * (nCommonMaybe + nkeys)) {
192         result = GIN_FALSE;
193       }
194       break;
195     default:
196       elog(ERROR, "Unknown strategy: %d", strategy);
197   }
198 
199   PG_RETURN_GIN_TERNARY_VALUE(result);
200 #endif
201 }
202