1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_vdb_cursor_
28 #define _h_vdb_cursor_
29 
30 #ifndef _h_vdb_extern_
31 #include <vdb/extern.h>
32 #endif
33 
34 #ifndef _h_klib_defs_
35 #include <klib/defs.h>
36 #endif
37 
38 #ifndef _h_klib_namelist_
39 #include <klib/namelist.h>
40 #endif
41 
42 #include <stdarg.h>
43 
44 #ifdef __cplusplus
45 extern "C" {
46 #endif
47 
48 
49 /*--------------------------------------------------------------------------
50  * forwards
51  */
52 struct VBlob;
53 struct VTable;
54 struct VTypedesc;
55 struct VTypedecl;
56 struct VView;
57 
58 
59 /*--------------------------------------------------------------------------
60  * KCreateMode
61  *  mode definitions for cursor creation
62  *
63  *  NB - typedef'd as uint32_t in <klib/defs.h> to ensure uniformly
64  *  predictable binary representation.
65  */
66 enum
67 {
68     kcmUpdate,          /* kcmOpen   */
69     kcmReplace,         /* kcmInit   */
70     kcmInsert           /* kcmCreate */
71 };
72 
73 /*--------------------------------------------------------------------------
74  * VCursor
75  *  a row cursor onto a VTable
76  */
77 typedef struct VCursor VCursor;
78 
79 
80 /* AddRef
81  * Release
82  *  all objects are reference counted
83  *  NULL references are ignored
84  */
85 VDB_EXTERN rc_t CC VCursorAddRef ( const VCursor *self );
86 VDB_EXTERN rc_t CC VCursorRelease ( const VCursor *self );
87 
88 
89 /* CreateCursor
90  *  creates a cursor object onto table
91  *  multiple read cursors are allowed
92  *  only a single write cursor is allowed
93  *
94  *  "curs" [ OUT ] - return parameter for newly created cursor
95  *
96  *  "mode" [ IN ] - describes update behavior
97  *    kcmUpdate   : allow inserts or updates
98  *    kcmReplace  : replace all existing rows with newly written rows
99  *    kcmInsert   : allow only inserts, i.e. new rows
100  *
101  *  NB - CreateCursorRead will be deprecated in future releases
102  *  use CreateCachedCursorRead instead.
103  */
104 VDB_EXTERN rc_t CC VTableCreateCursorRead ( struct VTable const *self, const VCursor **curs );
105 VDB_EXTERN rc_t CC VTableCreateCursorWrite ( struct VTable *self, VCursor **curs, KCreateMode mode );
106 
107 /* ViewCreateCursor
108  *  creates a read cursor object onto view
109  *
110  *  "curs" [ OUT ] - return parameter for newly created cursor
111  */
112 VDB_EXTERN rc_t CC VViewCreateCursor ( struct VView const *self, const VCursor **curs );
113 
114 /* CreateCachedCursorRead
115  *  creates a read cursor object onto table with a cache limit in bytes
116  *
117  *  AVAILABILITY: version 2.1
118  *
119  *  "curs" [ OUT ] - return parameter for newly created cursor
120  *
121  *  "capacity" [ IN ] - the maximum bytes to cache on the cursor before
122  *  dropping least recently used blobs
123  */
124 VDB_EXTERN rc_t CC VTableCreateCachedCursorRead ( struct VTable const *self,
125     const VCursor **curs, size_t capacity );
126 
127 
128 /* AddColumn
129  *  add a column to an unopened cursor
130  *
131  *  "idx" [ OUT ] - return parameter for column index
132  *
133  *  "name" [ IN ] - NUL terminated column name spec.
134  *  to identify a column by name, provide the column name
135  *  by itself. if there are multiple types available under
136  *  that name, the default type for that column will be
137  *  selected. to select a specific type, the name may
138  *  be cast to that type using a cast expression, e.g.
139  *    "( type ) name"
140  *
141  * NB - may return a non-zero status code of rcColumn, rcExists
142  *  if the column was not added. the return "idx" will still
143  *  be set properly and this does NOT indicate an error.
144  */
145 VDB_EXTERN rc_t CC VCursorAddColumn ( const VCursor *self,
146     uint32_t *idx, const char *name, ... );
147 VDB_EXTERN rc_t CC VCursorVAddColumn ( const VCursor *self,
148     uint32_t *idx, const char *name, va_list args );
149 
150 
151 /* GetColumnIdx
152  *  retrieve column index by name spec
153  *
154  *  "idx" [ OUT ] - return parameter for column index
155  *
156  *  "name" [ IN ] - NUL terminated column name spec.
157  */
158 VDB_EXTERN rc_t CC VCursorGetColumnIdx ( const VCursor *self,
159     uint32_t *idx, const char *name, ... );
160 VDB_EXTERN rc_t CC VCursorVGetColumnIdx ( const VCursor *self,
161     uint32_t *idx, const char *name, va_list args );
162 
163 
164 /* Datatype
165  *  returns typedecl and/or typedef for column data
166  *
167  *  "idx" [ IN ] - column index
168  *
169  *  "type" [ OUT, NULL OKAY ] - returns the column type declaration
170  *
171  *  "def" [ OUT, NULL OKAY ] - returns the definition of the type
172  *  returned in "type_decl"
173  *
174  * NB - one of "type" and "def" must be non-NULL
175  */
176 VDB_EXTERN rc_t CC VCursorDatatype ( const VCursor *self, uint32_t idx,
177     struct VTypedecl *type, struct VTypedesc *desc );
178 
179 
180 /* IdRange
181  *  returns id range for column
182  *
183  *  "idx" [ IN, DEFAULT ZERO ] - single column index or
184  *  zero to indicate the range for all columns in cursor
185  *
186  *  "id" [ IN ] - page containing this row id is target
187  *
188  *  "first" [ OUT, NULL OKAY ] and "count" [ OUT, NULL OKAY ] -
189  *  id range is returned in these output parameters, where
190  *  at least ONE must be NOT-NULL
191  */
192 VDB_EXTERN rc_t CC VCursorIdRange ( const VCursor *self, uint32_t idx,
193     int64_t *first, uint64_t *count );
194 
195 
196 /* Open
197  *  open cursor, resolving schema for the set of opened columns
198  *
199  *  when cursor is created for read, its initial row id
200  *  is set to first row available in any contained column.
201  *
202  *  when cursor is created for write, its initial row id
203  *  is set for inserts ( appending ). when empty, initial
204  *  row id is set to 1. otherwise, it is set to 1 beyond
205  *  the last row available in any contained column.
206  *
207  *  NB - there is no corresponding "Close"
208  *  use "Release" instead.
209  */
210 VDB_EXTERN rc_t CC VCursorOpen ( const VCursor *self );
211 
212 
213 /* RowId
214  *  report current row id
215  * SetRowId
216  *  seek to given row id
217  */
218 VDB_EXTERN rc_t CC VCursorRowId ( const VCursor *self, int64_t *row_id );
219 VDB_EXTERN rc_t CC VCursorSetRowId ( const VCursor *self, int64_t row_id );
220 
221 
222 /* FindNextRowId
223  * FindNextRowIdDirect
224  *  returns next non-empty row given either the cursor's current row-id + 1,
225  *  or a direct "start_id" provided as a parameter.
226  *
227  *  if the starting row-id has a non-null cell, that row-id will be returned.
228  *  otherwise, the first row-id following the starting id that has a non-null cell
229  *  will be returned. in the event that no non-null cells can be found, the returned
230  *  rc_t will have RCState of rcNotFound.
231  *
232  *  "idx" [ IN, ZERO OKAY ] - when non-zero, represents the one-based index of a
233  *  particular column. when zero, represents all columns simultaneously.
234  *
235  *  "start_id" [ IN ] - when specified directly, gives a starting row id to
236  *  use when starting the search for non-null cells. if the row "start_id"
237  *  contains non-null cells, it will be returned immediately.
238  *
239  *  when "start_id" is not used ( VCursorFindNextRowId ), the cursor's current
240  *  row-id + 1 will be substituted. the meaning is that if the last accessed
241  *  row was valid, this will find the next valid row. if the last accessed row
242  *  was not valid ( null cell ), then it is known to be invalid and the search
243  *  starts with the following row.
244  *
245  *  "next" [ OUT ] - return parameter for found row-id. when the "rc_t" is 0
246  */
247 VDB_EXTERN rc_t CC VCursorFindNextRowId ( const VCursor *self,
248     uint32_t idx, int64_t * next );
249 VDB_EXTERN rc_t CC VCursorFindNextRowIdDirect ( const VCursor *self,
250     uint32_t idx, int64_t start_id, int64_t * next );
251 
252 
253 /* OpenRow
254  *  open currently closed row indicated by row id
255  */
256 VDB_EXTERN rc_t CC VCursorOpenRow ( const VCursor *self );
257 
258 /* CommitRow
259  *  commit row after writing
260  *  prevents further writes
261  */
262 VDB_EXTERN rc_t CC VCursorCommitRow ( VCursor *self );
263 
264 /* RepeatRow
265  *  repeats the current row by the count provided
266  *  row must have been committed
267  *
268  *  AVAILABILITY: version 2.6
269  *
270  *  "count" [ IN ] - the number of times to repeat
271  *  the current row.
272  */
273 VDB_EXTERN rc_t CC VCursorRepeatRow ( VCursor *self, uint64_t count );
274 
275 /* CloseRow
276  *  balances OpenRow message
277  *  if there are uncommitted modifications,
278  *  discard all changes. otherwise,
279  *  advance to next row
280  */
281 VDB_EXTERN rc_t CC VCursorCloseRow ( const VCursor *self );
282 
283 
284 /* FlushPage
285  *  forces flush of all buffered page data
286  *  fails if row is open
287  *
288  *  pages are normally auto-committed based upon
289  *  size and column affinity
290  */
291 VDB_EXTERN rc_t CC VCursorFlushPage ( VCursor *self );
292 
293 
294 /* GetBlob
295  *  retrieve a blob of data containing the current row id
296  * GetBlobDirect
297  *  retrieve a blob of data containing the requested row id
298  *
299  *  "blob" [ OUT ] - return parameter for a new reference
300  *  to VBlob containing requested cell. NB - must be released
301  *  via VBlobRelease when no longer needed.
302  *
303  *  "row_id" [ IN ] - allows ReadDirect random access to any cell
304  *  in column
305  *
306  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
307  */
308 VDB_EXTERN rc_t CC VCursorGetBlob ( const VCursor *self,
309     struct VBlob const **blob, uint32_t col_idx );
310 VDB_EXTERN rc_t CC VCursorGetBlobDirect ( const VCursor *self,
311     struct VBlob const **blob, int64_t row_id, uint32_t col_idx );
312 
313 
314 /* Read
315  *  read entire single row of byte-aligned data into a buffer
316  * ReadDirect
317  *  bypass the need to use SetRowId/OpenRow/CloseRow for addressing
318  *
319  *  "row_id" [ IN ] - allows ReadDirect random access to any cell
320  *  in column
321  *
322  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
323  *
324  *  "elem_bits" [ IN ] - expected element size in bits, required
325  *  to be compatible with the actual element size, and be a multiple
326  *  of 8 ( byte-aligned ). for non-byte-aligned data, see ReadBits
327  *
328  *  "buffer" [ OUT ] and "blen" [ IN ] - return buffer for row data
329  *  where "blen" gives buffer capacity in elements. the total buffer
330  *  size in bytes == ( "elem_bits" * "blen" + 7 ) / 8.
331  *
332  *  "row_len" [ OUT ] - return parameter for the number of elements
333  *  in the requested row.
334  *
335  *  when the return code is 0, "row_len" will contain the number of
336  *  elements read into buffer. if the return code indicates that the
337  *  buffer is too small, "row_len" will give the required buffer length.
338  */
339 VDB_EXTERN rc_t CC VCursorRead ( const VCursor *self, uint32_t col_idx,
340     uint32_t elem_bits, void *buffer, uint32_t blen, uint32_t *row_len );
341 VDB_EXTERN rc_t CC VCursorReadDirect ( const VCursor *self, int64_t row_id, uint32_t col_idx,
342     uint32_t elem_bits, void *buffer, uint32_t blen, uint32_t *row_len );
343 
344 
345 /* ReadBits
346  *  read single row of potentially bit-aligned column data into a buffer
347  * ReadBitsDirect
348  *  bypass the need to use SetRowId/OpenRow/CloseRow for addressing
349  *
350  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
351  *
352  *  "elem_bits" [ IN ] - expected element size in bits, required to be
353  *  compatible with the actual element size, and may ( or may not ) be
354  *  a multiple of 8 ( byte aligned ).
355  *
356  *  "start" [ IN ] - zero-based starting index to first element,
357  *  valid from 0 .. row_len - 1
358  *
359  *  "buffer" [ IN ], "boff" [ IN ] and "blen" [ IN ] -
360  *  return buffer for row data, where "boff" is in BITS
361  *  and "blen" is in ELEMENTS.
362  *
363  *  "num_read" [ OUT ] - return parameter for the number of elements
364  *  read, which is <= "blen"
365  *
366  *  "remaining" [ OUT, NULL OKAY ] - optional return parameter for
367  *  the number of elements remaining to be read. specifically,
368  *  "start" + "num_read" + "remaining" == row length, assuming that
369  *  "start" <= row length.
370  */
371 VDB_EXTERN rc_t CC VCursorReadBits ( const VCursor *self, uint32_t col_idx,
372     uint32_t elem_bits, uint32_t start, void *buffer, uint32_t boff,
373     uint32_t blen, uint32_t *num_read, uint32_t *remaining );
374 VDB_EXTERN rc_t CC VCursorReadBitsDirect ( const VCursor *self, int64_t row_id, uint32_t col_idx,
375     uint32_t elem_bits, uint32_t start, void *buffer, uint32_t boff,
376     uint32_t blen, uint32_t *num_read, uint32_t *remaining );
377 
378 
379 /* CellData
380  *  access pointer to single cell of potentially bit-aligned column data
381  * CellDataDirect
382  *  bypass the need to use SetRowId/OpenRow/CloseRow for addressing
383  *
384  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
385  *
386  *  "elem_bits" [ OUT, NULL OKAY ] - optional return parameter for
387  *  element size in bits
388  *
389  *  "base" [ OUT ] and "boff" [ OUT, NULL OKAY ] -
390  *  compound return parameter for pointer to row starting bit
391  *  where "boff" is in BITS
392  *
393  *  "row_len" [ OUT, NULL OKAY ] - the number of elements in cell
394  */
395 VDB_EXTERN rc_t CC VCursorCellData ( const VCursor *self, uint32_t col_idx,
396     uint32_t *elem_bits, const void **base, uint32_t *boff,
397     uint32_t *row_len );
398 VDB_EXTERN rc_t CC VCursorCellDataDirect ( const VCursor *self, int64_t row_id,
399     uint32_t col_idx, uint32_t *elem_bits, const void **base,
400     uint32_t *boff, uint32_t *row_len );
401 
402 
403 /* VCursorDataPrefetch
404  * -- will prefecth rows into CursorCache (if it exists)
405  * -- no OUT parameters - just primes the cache
406  * -- will cache every produced blob (even a small one)
407  * -- will suspend flushing the cache after inserting first row
408  * -- conducts sort-unique on row_ids to linearize data access
409  *
410  * "row_ids" [ IN ] - rows to be prefetched
411  *
412  * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
413  *
414  * "num_rows" [ IN ] -  number of rows in row_ids
415  *
416  * "min/max_valid_row_id [IN] - ignor all row_ids[i] which will not hit this range
417  *
418  * "continue_on_error" [ IN ] - whether to continue on a failure to prefetch a rows
419  */
420 
421 LIB_EXPORT rc_t CC VCursorDataPrefetch ( const VCursor * self,
422     const int64_t * row_ids, uint32_t col_idx, uint32_t num_rows,
423     int64_t min_valid_row_id, int64_t max_valid_row_id, bool continue_on_error );
424 
425 
426 /* Default
427  *  give a default row value for cell
428  *  TBD - document full cell data, not append
429  *
430  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
431  *
432  *  "elem_bits" [ IN ] - stated element size in bits, required
433  *  to be compatible with the actual element size
434  *
435  *  "buffer" [ IN ] and "boff" [ IN ] - compound pointer and offset
436  *  to start of default row data where "boff" is in BITS
437  *
438  *  "row_len" [ IN ] - the number of elements in default row
439  */
440 VDB_EXTERN rc_t CC VCursorDefault ( VCursor *self, uint32_t col_idx,
441     bitsz_t elem_bits, const void *buffer, bitsz_t boff,
442     uint64_t row_len );
443 
444 
445 /* Write
446  *  append bit-aligned column data to cell
447  *
448  *  "col_idx" [ IN ] - index of column to be read, returned by "AddColumn"
449  *
450  *  "elem_bits" [ IN ] - stated element size in bits, required
451  *  to be compatible with the actual element size
452  *
453  *  "buffer" [ IN ] and "boff" [ IN ] - compound pointer and offset
454  *  to start of default row data where "boff" is in BITS
455  *
456  *  "count" [ IN ] - the number of elements to append
457  */
458 VDB_EXTERN rc_t CC VCursorWrite ( VCursor *self, uint32_t col_idx,
459     bitsz_t elem_bits, const void *buffer, bitsz_t boff,
460     uint64_t count );
461 
462 
463 /* Commit
464  *  commit changes made to cursor
465  *  fails if row is open
466  */
467 VDB_EXTERN rc_t CC VCursorCommit ( VCursor *self );
468 
469 
470 /* OpenParent
471  *  duplicate reference to parent table
472  *  NB - returned reference must be released
473  */
474 VDB_EXTERN rc_t CC VCursorOpenParentRead ( const VCursor *self, struct VTable const **tbl );
475 VDB_EXTERN rc_t CC VCursorOpenParentUpdate ( VCursor *self, struct VTable **tbl );
476 
477 
478 /* GetUserData
479  * SetUserData
480  *  store/retrieve an opaque pointer to user data
481  *
482  *  "data" [ OUT ] - return parameter for getting data
483  *  "data" [ IN ] - parameter for setting data
484  *
485  *  "destroy" [ IN, NULL OKAY ] - optional destructor param
486  *  invoked from destructor of "self"
487  */
488 VDB_EXTERN rc_t CC VCursorGetUserData ( const VCursor *self, void **data );
489 VDB_EXTERN rc_t CC VCursorSetUserData ( const VCursor *self,
490     void *data, void ( CC * destroy ) ( void *data ) );
491 
492 
493 #ifdef __cplusplus
494 }
495 #endif
496 
497 #endif /*  _h_vdb_cursor_ */
498