1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_column_priv_
28 #define _h_column_priv_
29 
30 #ifndef _h_vdb_extern_
31 #include <vdb/extern.h>
32 #endif
33 
34 #ifndef _h_vdb_xform_
35 #include <vdb/xform.h>
36 #endif
37 
38 #ifndef _h_klib_container_
39 #include <klib/container.h>
40 #endif
41 
42 #ifndef _h_klib_data_buffer_
43 #include <klib/data-buffer.h>
44 #endif
45 
46 #include <os-native.h>
47 
48 #ifndef KONST
49 #define KONST
50 #endif
51 
52 #ifdef __cplusplus
53 extern "C" {
54 #endif
55 
56 
57 /*--------------------------------------------------------------------------
58  * forwards
59  */
60 struct Vector;
61 struct KColumn;
62 struct VDBManager;
63 struct VSchema;
64 struct SColumn;
65 struct SExpression;
66 struct VProduction;
67 struct VBlob;
68 struct VBlobMRUCacheCursorContext;
69 
70 
71 /*--------------------------------------------------------------------------
72  * VColumn
73  *  externally visible column object
74  */
75 typedef struct VColumn VColumn;
76 struct VColumn
77 {
78     /* for type queries */
79     struct VSchema const *schema;
80 
81     /* typed column */
82     struct SColumn const *scol;
83 
84     /* read production */
85     struct VProduction *in;
86 
87     /* cached output */
88     struct VBlob *cache;
89 
90     /* type information */
91     VTypedecl td;
92     VTypedesc desc;
93 
94     /* vector ids */
95     uint32_t ord;
96 
97     bool read_only;
98     uint8_t align [ 3 ];
99 };
100 
101 void CC VColumnWhack ( void *item, void *curs );
102 void VColumnDestroy ( VColumn *self );
103 rc_t VColumnInit ( VColumn *self,
104     struct VSchema const *schema, struct SColumn const *scol );
105 
106 int CC VColumnCmp ( const void *item, const void *n );
107 int CC VColumnSort ( const void *item, const void *n );
108 
109 rc_t VColumnMake ( VColumn **col,
110     struct VSchema const *schema, struct SColumn const *scol );
111 
112 rc_t VColumnIdRange ( const VColumn *self,
113     int64_t *first, int64_t *last );
114 rc_t VColumnIdRangeRead ( const VColumn *self,
115     int64_t *first, int64_t *last );
116 rc_t VColumnPageIdRange ( const VColumn *self,
117     int64_t id, int64_t *first, int64_t *last );
118 
119 rc_t VColumnDatatype ( const VColumn *self,
120     struct VTypedecl *type, struct VTypedesc *desc );
121 
122 rc_t VColumnRead ( const VColumn *self, int64_t row_id,
123    uint32_t *elem_bits, const void **base, uint32_t *boff, uint32_t *row_len,
124    struct VBlob **vblob );
125 
126 rc_t VColumnReadBlob ( const VColumn *self, struct VBlob const **blob, int64_t row_id,
127    uint32_t *elem_bits, const void **base, uint32_t *boff, uint32_t *row_len, uint32_t *repeat_count,
128    struct VBlobMRUCacheCursorContext *cctx);
129 
130 rc_t VColumnReadCachedBlob ( const VColumn *self, struct VBlob const *blob, int64_t row_id,
131    uint32_t *elem_bits, const void **base, uint32_t *boff, uint32_t *row_len, uint32_t *repeat_count );
132 
133 rc_t VColumnIsStatic ( const VColumn *self, bool *is_static );
134 
135 rc_t VColumnGetKColumn ( const VColumn * self, struct KColumn ** kcol, bool * is_static );
136 
137 /*--------------------------------------------------------------------------
138  * WColumn
139  *  column with input buffer
140  */
141 typedef struct WColumn WColumn;
142 struct WColumn
143 {
144     VColumn dad;
145 
146     /* half-closed range of buffered rows */
147     int64_t start_id, end_id;
148 
149     /* half-closed id for page cutoff */
150     int64_t cutoff_id;
151 
152     /* alternate read production */
153     struct VProduction *alt;
154 
155     /* validate production */
156     struct VProduction *val;
157 
158     /* write production */
159     struct VProduction *out;
160 
161     /* output page */
162     struct VBlob *page;
163 
164     /* default row data */
165     KDataBuffer dflt;
166 
167     /* accumulators */
168     KDataBuffer data, rowmap;
169 
170     /* peak byte size history of data accumulator */
171     size_t data_peak_hist [ 16 ];
172     size_t data_peak;
173 
174     /* total committed bits in buffer */
175     bitsz_t bits_in_buffer;
176 
177     /* number of uncommitted bits in buffer */
178     bitsz_t row_len;
179 
180     /* size ( in bytes ) to trigger page commit */
181     size_t trigger;
182 
183     /* number of committed rows in buffer */
184     size_t num_rows;
185 
186     /* peak history index */
187     uint8_t peak_hist_idx;
188 
189     /* true if there is a default value */
190     bool have_dflt;
191 
192     /* set upon any successful write */
193     bool row_written;
194 
195     /* set if the last row written was default */
196     bool dflt_last;
197 
198     /* set upon row commit */
199     bool row_committed;
200 };
201 
202 /* WColumnRowMap
203  */
204 typedef struct WColumnRowMap WColumnRowMap;
205 struct WColumnRowMap
206 {
207     int64_t start_id;
208     uint64_t len, cnt;
209 };
210 
211 rc_t WColumnMake ( VColumn **col, struct VSchema const *schema,
212     struct SColumn const *scol, struct SExpression const *dflt_limit,
213     struct VDBManager *mgr, struct Vector *cx_bind );
214 
215 rc_t WColumnSetDefault ( VColumn *self,
216     bitsz_t elem_bits, const void *buffer, bitsz_t boff, uint64_t len );
217 rc_t WColumnWrite ( VColumn *self,
218     bitsz_t elem_bits, const void *buffer, bitsz_t boff, uint64_t len );
219 
220 /* OpenRow
221  *  update state
222  *
223  *  "const_row_id" [ IN, CONST ] - id of row being opened. useful
224  *  only on initial open when no other rows are buffered.
225  */
226 void CC WColumnOpenRow ( void *self, void *const_row_id );
227 
228 /* RowDefaults
229  *  if a row has not been written but has a default value,
230  *  that value is written to the row. if no default exists,
231  *  an error is generated.
232  *
233  *  "rc" [ OUT, DEFAULT ZERO ] - preset to 0
234  *
235  *  returns true if any error occurs ( i.e. "*rc != 0" )
236  */
237 bool CC WColumnRowDefaults ( void *self, void *rc );
238 
239 /* CommitRow
240  *  closes the row to further writes and accepts
241  *  all data written so far as complete. if the accumulated
242  *  page data trigger a flush, the flush parameter is set.
243  *
244  *  "end_id" [ IN/OUT ] - used to calculate the minimum
245  *  end_id for pages. if the column decides that it has too
246  *  much data in its buffer and wants a cutoff < current
247  *  value, it can lower the id.
248  *
249  *  returns true if there was a memory error.
250  */
251 bool CC WColumnCommitRow ( void *self, void *end_id );
252 
253 /* RepeatRow
254  *  go into the last row entry
255  *  extend the count by count
256  *  data points to this structure
257  */
258 typedef struct WColumnRepeatRowData WColumnRepeatRowData;
259 struct WColumnRepeatRowData
260 {
261     uint64_t count;
262     int64_t row_id;
263     int64_t end_id;
264 };
265 void CC WColumnRepeatRow ( void *self, void *data );
266 
267 /* CloseRow
268  *  discards uncommitted data
269  *  update state
270  */
271 void CC WColumnCloseRow ( void *self, void *ignore );
272 
273 /* BufferPage
274  *  captures page range
275  *
276  *  "end_id" [ IN, CONST ] - half-closed id of buffered range end
277  *  column should capture this information for creating page
278  *  id range either on demand, or pre-prepared.
279  *
280  *  returns true if there was a memory error.
281  */
282 bool CC WColumnBufferPage ( void *self, void *const_end_id );
283 
284 /* DropPage
285  *  drops all rows associated with committed page
286  */
287 void CC WColumnDropPage ( void *self, void *ignore );
288 
289 /* ReadBlob
290  *  reads an input blob
291  *  returns a blob with all rows in commit range
292  *
293  *  "vblob" [ OUT ] - page to return
294  *
295  *  "id" [ IN ] - an id that must be within page range
296  *  returns rcNotFound and NULL blob if id is not within range
297  */
298 rc_t WColumnReadBlob ( WColumn *self, struct VBlob **vblob, int64_t id );
299 
300 #ifdef __cplusplus
301 }
302 #endif
303 
304 #endif /* _h_column_priv_ */
305