1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <vdb/extern.h>
28
29 #include "blob.h"
30 #include "blob-headers.h"
31 #include "page-map.h"
32 #include "blob-priv.h"
33 #include "xform-priv.h"
34
35 #include <vdb/xform.h>
36 #include <vdb/table.h>
37 #include <vdb/vdb.h>
38 #include <kdb/index.h>
39 #include <klib/rc.h>
40 #include <klib/log.h>
41 #include <sysalloc.h>
42 #include <atomic32.h>
43
44 #include <stdint.h>
45 #include <stdlib.h>
46 #include <string.h>
47
48 #include <assert.h>
49
50 typedef struct tag_self_t {
51 const KIndex *ndx;
52 uint32_t elem_bits;
53 uint8_t case_sensitivity;
54 } self_t;
55
self_whack(void * Self)56 static void CC self_whack( void *Self )
57 {
58 self_t *self = Self;
59
60 KIndexRelease( self->ndx );
61 free( self );
62 }
63
64 #include <stdio.h>
65
66 static
index_project_impl(void * Self,const VXformInfo * info,int64_t row_id,VBlob ** rslt,uint32_t argc,const VBlob * argv[])67 rc_t CC index_project_impl(
68 void *Self,
69 const VXformInfo *info,
70 int64_t row_id,
71 VBlob **rslt,
72 uint32_t argc, const VBlob *argv[]
73 ) {
74 rc_t rc;
75 const self_t *self = Self;
76 KDataBuffer temp_buff;
77 uint64_t id_count;
78 int64_t start_id;
79 int64_t empty_row_id_start = -1;
80 int64_t empty_row_id_count = -1;
81 size_t sz = 1023;
82 bool attached_to_col = argc > 0 && argv[0] != NULL;
83
84 /* first try to load value from the column. if returned blob is empty or row is not found, go to index */
85 if ( attached_to_col ) {
86 /*** this types of blobs may have holes in them ***/
87 rc = VBlobSubblob(argv[0],rslt,row_id );
88 if (rc != 0) {
89 if (GetRCState(rc) == rcEmpty && GetRCObject(rc) == rcRow) {
90 empty_row_id_start = row_id;
91 empty_row_id_count = 1;
92 }
93 else {
94 return rc;
95 }
96 }
97 else if ((*rslt)->data.elem_count > 0) {
98 return rc;
99 }
100 else {
101 empty_row_id_start = (*rslt)->start_id;
102 empty_row_id_count = (*rslt)->stop_id - (*rslt)->start_id + 1;
103
104 TRACK_BLOB( VBlobRelease, *rslt );
105 (void)VBlobRelease( *rslt );
106 }
107
108 assert(empty_row_id_count >= 1);
109 }
110
111 rc = KDataBufferMakeBytes( &temp_buff, sz + 1 );
112 if ( rc != 0 )
113 return rc;
114
115 for ( ; ; ) {
116 rc = KIndexProjectText(self->ndx, row_id, &start_id, &id_count, temp_buff.base, temp_buff.elem_count, &sz);
117 if ((GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcId) || sz==0 ){
118 if ( !attached_to_col )
119 rc = RC(rcVDB, rcFunction, rcExecuting, rcRow, rcNotFound);
120 else
121 {
122 // return an empty row, but we don't know how many empty rows
123 // are there, since even row_id+1 may have a key stored in index
124 rc = 0;
125 sz = 0;
126 start_id = row_id;
127 id_count = 1;
128 }
129
130 break;
131 }
132 if ( GetRCState( rc ) == rcInsufficient && GetRCObject( rc ) == (enum RCObject)rcBuffer )
133 {
134 rc = KDataBufferResize ( &temp_buff, (uint32_t)( sz + 1 ) );
135 if (rc == 0) {
136 continue;
137 }
138 }
139
140 // When in case_sensitivity mode is case insensitive, index does not accurately represent actual values,
141 // as we still store key in a column when it differs from what we inserted into index
142 if (self->case_sensitivity != CASE_SENSITIVE && attached_to_col)
143 {
144 if ( start_id < empty_row_id_start )
145 {
146 id_count -= empty_row_id_start - start_id;
147 start_id = empty_row_id_start;
148 }
149
150 if ( start_id + id_count > empty_row_id_start + empty_row_id_count )
151 {
152 id_count = empty_row_id_start + empty_row_id_count - start_id;
153 }
154 }
155 break;
156 }
157
158 if ( rc == 0 )
159 {
160 /* it seems old index returns length including \0 so we have to adjust */
161 while (sz > 0 && ((char *)temp_buff.base)[sz - 1] == '\0')
162 --sz;
163
164 // now we know real size of the data, lets set in data buffer too
165 assert ( temp_buff.elem_count >= sz );
166 if ( temp_buff.elem_count != sz )
167 rc = KDataBufferResize ( &temp_buff, (uint32_t)( sz ) );
168 }
169
170 if (rc == 0)
171 {
172 rc = VBlobCreateFromSingleRow ( rslt, start_id, start_id + id_count - 1, &temp_buff, vboNative );
173 }
174
175 KDataBufferWhack(&temp_buff);
176 return rc;
177 }
178
179 VTRANSFACT_BUILTIN_IMPL(idx_text_project, 1, 1, 1) (
180 const void *Self,
181 const VXfactInfo *info,
182 VFuncDesc *rslt,
183 const VFactoryParams *cp,
184 const VFunctionParams *dp
185 ) {
186 rc_t rc;
187 const KIndex *ndx;
188 KIdxType type;
189
190 rc = VTableOpenIndexRead(info->tbl, &ndx, "%.*s", (int)cp->argv[0].count, cp->argv[0].data.ascii);
191 if ( rc != 0 )
192 {
193 if ( GetRCState ( rc ) != rcNotFound )
194 PLOGERR (klogErr, (klogErr, rc, "Failed to open index '$(index)'", "index=%.*s", (int)cp->argv[0].count, cp->argv[0].data.ascii));
195 return rc;
196 }
197
198 rc = KIndexType(ndx, &type);
199 if (rc == 0) {
200 if (type == kitProj + kitText) {
201 self_t *self;
202
203 self = malloc(sizeof(*self));
204 if (self) {
205 self->ndx = ndx;
206 self->elem_bits = VTypedescSizeof(&info->fdesc.desc);
207 self->case_sensitivity = cp->argc >= 2 ? *cp->argv[1].data.u8 : CASE_SENSITIVE;
208 rslt->self = self;
209 rslt->whack = self_whack;
210 rslt->variant = vftBlobN;
211 VFUNCDESC_INTERNAL_FUNCS(rslt)->bfN = index_project_impl;
212 return 0;
213 }
214 rc = RC(rcVDB, rcFunction, rcConstructing, rcMemory, rcExhausted);
215 }
216 else
217 rc = RC(rcVDB, rcFunction, rcConstructing, rcIndex, rcIncorrect);
218 }
219 KIndexRelease(ndx);
220 return rc;
221 }
222