1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <vdb/extern.h>
28 
29 #include "blob.h"
30 #include "blob-headers.h"
31 #include "page-map.h"
32 #include "blob-priv.h"
33 #include "xform-priv.h"
34 
35 #include <vdb/xform.h>
36 #include <vdb/table.h>
37 #include <vdb/vdb.h>
38 #include <kdb/index.h>
39 #include <klib/rc.h>
40 #include <klib/log.h>
41 #include <sysalloc.h>
42 #include <atomic32.h>
43 
44 #include <stdint.h>
45 #include <stdlib.h>
46 #include <string.h>
47 
48 #include <assert.h>
49 
50 typedef struct tag_self_t {
51     const KIndex *ndx;
52     uint32_t elem_bits;
53     uint8_t case_sensitivity;
54 } self_t;
55 
self_whack(void * Self)56 static void CC self_whack( void *Self )
57 {
58     self_t *self = Self;
59 
60     KIndexRelease( self->ndx );
61     free( self );
62 }
63 
64 #include <stdio.h>
65 
66 static
index_project_impl(void * Self,const VXformInfo * info,int64_t row_id,VBlob ** rslt,uint32_t argc,const VBlob * argv[])67 rc_t CC index_project_impl(
68                             void *Self,
69                             const VXformInfo *info,
70                             int64_t row_id,
71                             VBlob **rslt,
72                             uint32_t argc, const VBlob *argv[]
73 ) {
74     rc_t rc;
75     const self_t *self = Self;
76     KDataBuffer temp_buff;
77     uint64_t id_count;
78     int64_t start_id;
79     int64_t empty_row_id_start = -1;
80     int64_t empty_row_id_count = -1;
81     size_t sz = 1023;
82     bool attached_to_col = argc > 0 && argv[0] != NULL;
83 
84     /* first try to load value from the column. if returned blob is empty or row is not found, go to index */
85     if ( attached_to_col ) {
86         /*** this types of blobs may have holes in them ***/
87         rc = VBlobSubblob(argv[0],rslt,row_id );
88         if (rc != 0) {
89             if (GetRCState(rc) == rcEmpty && GetRCObject(rc) == rcRow) {
90                 empty_row_id_start = row_id;
91                 empty_row_id_count = 1;
92             }
93             else {
94                 return rc;
95             }
96         }
97         else if ((*rslt)->data.elem_count > 0) {
98             return rc;
99         }
100         else {
101             empty_row_id_start = (*rslt)->start_id;
102             empty_row_id_count = (*rslt)->stop_id - (*rslt)->start_id + 1;
103 
104             TRACK_BLOB( VBlobRelease, *rslt );
105             (void)VBlobRelease( *rslt );
106         }
107 
108         assert(empty_row_id_count >= 1);
109     }
110 
111     rc = KDataBufferMakeBytes( &temp_buff, sz + 1 );
112     if ( rc != 0 )
113         return rc;
114 
115     for ( ; ; ) {
116         rc = KIndexProjectText(self->ndx, row_id, &start_id, &id_count, temp_buff.base, temp_buff.elem_count, &sz);
117         if ((GetRCState(rc) == rcNotFound && GetRCObject(rc) == rcId) || sz==0 ){
118             if ( !attached_to_col )
119                 rc = RC(rcVDB, rcFunction, rcExecuting, rcRow, rcNotFound);
120             else
121             {
122                 // return an empty row, but we don't know how many empty rows
123                 // are there, since even row_id+1 may have a key stored in index
124                 rc = 0;
125                 sz = 0;
126                 start_id = row_id;
127                 id_count = 1;
128             }
129 
130             break;
131         }
132         if ( GetRCState( rc ) == rcInsufficient && GetRCObject( rc ) == (enum RCObject)rcBuffer )
133         {
134             rc = KDataBufferResize ( &temp_buff, (uint32_t)( sz + 1 ) );
135             if (rc == 0) {
136                 continue;
137             }
138         }
139 
140         // When in case_sensitivity mode is case insensitive, index does not accurately represent actual values,
141         // as we still store key in a column when it differs from what we inserted into index
142         if (self->case_sensitivity != CASE_SENSITIVE && attached_to_col)
143         {
144             if ( start_id < empty_row_id_start )
145             {
146                 id_count -= empty_row_id_start - start_id;
147                 start_id = empty_row_id_start;
148             }
149 
150             if ( start_id + id_count > empty_row_id_start + empty_row_id_count )
151             {
152                 id_count = empty_row_id_start + empty_row_id_count - start_id;
153             }
154         }
155         break;
156     }
157 
158     if ( rc == 0 )
159     {
160         /* it seems old index returns length including \0 so we have to adjust */
161         while (sz > 0 && ((char *)temp_buff.base)[sz - 1] == '\0')
162             --sz;
163 
164         // now we know real size of the data, lets set in data buffer too
165         assert ( temp_buff.elem_count >= sz );
166         if ( temp_buff.elem_count != sz )
167             rc = KDataBufferResize ( &temp_buff, (uint32_t)( sz ) );
168     }
169 
170     if (rc == 0)
171     {
172         rc = VBlobCreateFromSingleRow ( rslt, start_id, start_id + id_count - 1, &temp_buff, vboNative );
173     }
174 
175     KDataBufferWhack(&temp_buff);
176     return rc;
177 }
178 
179 VTRANSFACT_BUILTIN_IMPL(idx_text_project, 1, 1, 1) (
180                                            const void *Self,
181                                            const VXfactInfo *info,
182                                            VFuncDesc *rslt,
183                                            const VFactoryParams *cp,
184                                            const VFunctionParams *dp
185 ) {
186     rc_t rc;
187     const KIndex *ndx;
188     KIdxType type;
189 
190     rc = VTableOpenIndexRead(info->tbl, &ndx, "%.*s", (int)cp->argv[0].count, cp->argv[0].data.ascii);
191     if ( rc != 0 )
192     {
193         if ( GetRCState ( rc ) != rcNotFound )
194             PLOGERR (klogErr, (klogErr, rc, "Failed to open index '$(index)'", "index=%.*s", (int)cp->argv[0].count, cp->argv[0].data.ascii));
195         return rc;
196     }
197 
198     rc = KIndexType(ndx, &type);
199     if (rc == 0) {
200         if (type == kitProj + kitText) {
201             self_t *self;
202 
203             self = malloc(sizeof(*self));
204             if (self) {
205                 self->ndx = ndx;
206                 self->elem_bits = VTypedescSizeof(&info->fdesc.desc);
207                 self->case_sensitivity = cp->argc >= 2 ? *cp->argv[1].data.u8 : CASE_SENSITIVE;
208                 rslt->self = self;
209                 rslt->whack = self_whack;
210                 rslt->variant = vftBlobN;
211                 VFUNCDESC_INTERNAL_FUNCS(rslt)->bfN = index_project_impl;
212                 return 0;
213             }
214             rc = RC(rcVDB, rcFunction, rcConstructing, rcMemory, rcExhausted);
215         }
216         else
217             rc = RC(rcVDB, rcFunction, rcConstructing, rcIndex, rcIncorrect);
218     }
219     KIndexRelease(ndx);
220     return rc;
221 }
222