1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #ifndef _h_vdb_cursor_ 28 #define _h_vdb_cursor_ 29 30 #ifndef _h_vdb_extern_ 31 #include <vdb/extern.h> 32 #endif 33 34 #ifndef _h_klib_defs_ 35 #include <klib/defs.h> 36 #endif 37 38 #ifndef _h_klib_namelist_ 39 #include <klib/namelist.h> 40 #endif 41 42 #include <stdarg.h> 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif 47 48 49 /*-------------------------------------------------------------------------- 50 * forwards 51 */ 52 struct VBlob; 53 struct VTable; 54 struct VTypedesc; 55 struct VTypedecl; 56 struct VView; 57 58 59 /*-------------------------------------------------------------------------- 60 * KCreateMode 61 * mode definitions for cursor creation 62 * 63 * NB - typedef'd as uint32_t in <klib/defs.h> to ensure uniformly 64 * predictable binary representation. 65 */ 66 enum 67 { 68 kcmUpdate, /* kcmOpen */ 69 kcmReplace, /* kcmInit */ 70 kcmInsert /* kcmCreate */ 71 }; 72 73 /*-------------------------------------------------------------------------- 74 * VCursor 75 * a row cursor onto a VTable 76 */ 77 typedef struct VCursor VCursor; 78 79 80 /* AddRef 81 * Release 82 * all objects are reference counted 83 * NULL references are ignored 84 */ 85 VDB_EXTERN rc_t CC VCursorAddRef ( const VCursor *self ); 86 VDB_EXTERN rc_t CC VCursorRelease ( const VCursor *self ); 87 88 89 /* CreateCursor 90 * creates a cursor object onto table 91 * multiple read cursors are allowed 92 * only a single write cursor is allowed 93 * 94 * "curs" [ OUT ] - return parameter for newly created cursor 95 * 96 * "mode" [ IN ] - describes update behavior 97 * kcmUpdate : allow inserts or updates 98 * kcmReplace : replace all existing rows with newly written rows 99 * kcmInsert : allow only inserts, i.e. new rows 100 * 101 * NB - CreateCursorRead will be deprecated in future releases 102 * use CreateCachedCursorRead instead. 103 */ 104 VDB_EXTERN rc_t CC VTableCreateCursorRead ( struct VTable const *self, const VCursor **curs ); 105 VDB_EXTERN rc_t CC VTableCreateCursorWrite ( struct VTable *self, VCursor **curs, KCreateMode mode ); 106 107 /* ViewCreateCursor 108 * creates a read cursor object onto view 109 * 110 * "curs" [ OUT ] - return parameter for newly created cursor 111 */ 112 VDB_EXTERN rc_t CC VViewCreateCursor ( struct VView const *self, const VCursor **curs ); 113 114 /* CreateCachedCursorRead 115 * creates a read cursor object onto table with a cache limit in bytes 116 * 117 * AVAILABILITY: version 2.1 118 * 119 * "curs" [ OUT ] - return parameter for newly created cursor 120 * 121 * "capacity" [ IN ] - the maximum bytes to cache on the cursor before 122 * dropping least recently used blobs 123 */ 124 VDB_EXTERN rc_t CC VTableCreateCachedCursorRead ( struct VTable const *self, 125 const VCursor **curs, size_t capacity ); 126 127 128 /* AddColumn 129 * add a column to an unopened cursor 130 * 131 * "idx" [ OUT ] - return parameter for column index 132 * 133 * "name" [ IN ] - NUL terminated column name spec. 134 * to identify a column by name, provide the column name 135 * by itself. if there are multiple types available under 136 * that name, the default type for that column will be 137 * selected. to select a specific type, the name may 138 * be cast to that type using a cast expression, e.g. 139 * "( type ) name" 140 * 141 * NB - may return a non-zero status code of rcColumn, rcExists 142 * if the column was not added. the return "idx" will still 143 * be set properly and this does NOT indicate an error. 144 */ 145 VDB_EXTERN rc_t CC VCursorAddColumn ( const VCursor *self, 146 uint32_t *idx, const char *name, ... ); 147 VDB_EXTERN rc_t CC VCursorVAddColumn ( const VCursor *self, 148 uint32_t *idx, const char *name, va_list args ); 149 150 151 /* GetColumnIdx 152 * retrieve column index by name spec 153 * 154 * "idx" [ OUT ] - return parameter for column index 155 * 156 * "name" [ IN ] - NUL terminated column name spec. 157 */ 158 VDB_EXTERN rc_t CC VCursorGetColumnIdx ( const VCursor *self, 159 uint32_t *idx, const char *name, ... ); 160 VDB_EXTERN rc_t CC VCursorVGetColumnIdx ( const VCursor *self, 161 uint32_t *idx, const char *name, va_list args ); 162 163 164 /* Datatype 165 * returns typedecl and/or typedef for column data 166 * 167 * "idx" [ IN ] - column index 168 * 169 * "type" [ OUT, NULL OKAY ] - returns the column type declaration 170 * 171 * "def" [ OUT, NULL OKAY ] - returns the definition of the type 172 * returned in "type_decl" 173 * 174 * NB - one of "type" and "def" must be non-NULL 175 */ 176 VDB_EXTERN rc_t CC VCursorDatatype ( const VCursor *self, uint32_t idx, 177 struct VTypedecl *type, struct VTypedesc *desc ); 178 179 180 /* IdRange 181 * returns id range for column 182 * 183 * "idx" [ IN, DEFAULT ZERO ] - single column index or 184 * zero to indicate the range for all columns in cursor 185 * 186 * "id" [ IN ] - page containing this row id is target 187 * 188 * "first" [ OUT, NULL OKAY ] and "count" [ OUT, NULL OKAY ] - 189 * id range is returned in these output parameters, where 190 * at least ONE must be NOT-NULL 191 */ 192 VDB_EXTERN rc_t CC VCursorIdRange ( const VCursor *self, uint32_t idx, 193 int64_t *first, uint64_t *count ); 194 195 196 /* Open 197 * open cursor, resolving schema for the set of opened columns 198 * 199 * when cursor is created for read, its initial row id 200 * is set to first row available in any contained column. 201 * 202 * when cursor is created for write, its initial row id 203 * is set for inserts ( appending ). when empty, initial 204 * row id is set to 1. otherwise, it is set to 1 beyond 205 * the last row available in any contained column. 206 * 207 * NB - there is no corresponding "Close" 208 * use "Release" instead. 209 */ 210 VDB_EXTERN rc_t CC VCursorOpen ( const VCursor *self ); 211 212 213 /* RowId 214 * report current row id 215 * SetRowId 216 * seek to given row id 217 */ 218 VDB_EXTERN rc_t CC VCursorRowId ( const VCursor *self, int64_t *row_id ); 219 VDB_EXTERN rc_t CC VCursorSetRowId ( const VCursor *self, int64_t row_id ); 220 221 222 /* FindNextRowId 223 * FindNextRowIdDirect 224 * returns next non-empty row given either the cursor's current row-id + 1, 225 * or a direct "start_id" provided as a parameter. 226 * 227 * if the starting row-id has a non-null cell, that row-id will be returned. 228 * otherwise, the first row-id following the starting id that has a non-null cell 229 * will be returned. in the event that no non-null cells can be found, the returned 230 * rc_t will have RCState of rcNotFound. 231 * 232 * "idx" [ IN, ZERO OKAY ] - when non-zero, represents the one-based index of a 233 * particular column. when zero, represents all columns simultaneously. 234 * 235 * "start_id" [ IN ] - when specified directly, gives a starting row id to 236 * use when starting the search for non-null cells. if the row "start_id" 237 * contains non-null cells, it will be returned immediately. 238 * 239 * when "start_id" is not used ( VCursorFindNextRowId ), the cursor's current 240 * row-id + 1 will be substituted. the meaning is that if the last accessed 241 * row was valid, this will find the next valid row. if the last accessed row 242 * was not valid ( null cell ), then it is known to be invalid and the search 243 * starts with the following row. 244 * 245 * "next" [ OUT ] - return parameter for found row-id. when the "rc_t" is 0 246 */ 247 VDB_EXTERN rc_t CC VCursorFindNextRowId ( const VCursor *self, 248 uint32_t idx, int64_t * next ); 249 VDB_EXTERN rc_t CC VCursorFindNextRowIdDirect ( const VCursor *self, 250 uint32_t idx, int64_t start_id, int64_t * next ); 251 252 253 /* OpenRow 254 * open currently closed row indicated by row id 255 */ 256 VDB_EXTERN rc_t CC VCursorOpenRow ( const VCursor *self ); 257 258 /* CommitRow 259 * commit row after writing 260 * prevents further writes 261 */ 262 VDB_EXTERN rc_t CC VCursorCommitRow ( VCursor *self ); 263 264 /* RepeatRow 265 * repeats the current row by the count provided 266 * row must have been committed 267 * 268 * AVAILABILITY: version 2.6 269 * 270 * "count" [ IN ] - the number of times to repeat 271 * the current row. 272 */ 273 VDB_EXTERN rc_t CC VCursorRepeatRow ( VCursor *self, uint64_t count ); 274 275 /* CloseRow 276 * balances OpenRow message 277 * if there are uncommitted modifications, 278 * discard all changes. otherwise, 279 * advance to next row 280 */ 281 VDB_EXTERN rc_t CC VCursorCloseRow ( const VCursor *self ); 282 283 284 /* FlushPage 285 * forces flush of all buffered page data 286 * fails if row is open 287 * 288 * pages are normally auto-committed based upon 289 * size and column affinity 290 */ 291 VDB_EXTERN rc_t CC VCursorFlushPage ( VCursor *self ); 292 293 294 /* GetBlob 295 * retrieve a blob of data containing the current row id 296 * GetBlobDirect 297 * retrieve a blob of data containing the requested row id 298 * 299 * "blob" [ OUT ] - return parameter for a new reference 300 * to VBlob containing requested cell. NB - must be released 301 * via VBlobRelease when no longer needed. 302 * 303 * "row_id" [ IN ] - allows ReadDirect random access to any cell 304 * in column 305 * 306 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 307 */ 308 VDB_EXTERN rc_t CC VCursorGetBlob ( const VCursor *self, 309 struct VBlob const **blob, uint32_t col_idx ); 310 VDB_EXTERN rc_t CC VCursorGetBlobDirect ( const VCursor *self, 311 struct VBlob const **blob, int64_t row_id, uint32_t col_idx ); 312 313 314 /* Read 315 * read entire single row of byte-aligned data into a buffer 316 * ReadDirect 317 * bypass the need to use SetRowId/OpenRow/CloseRow for addressing 318 * 319 * "row_id" [ IN ] - allows ReadDirect random access to any cell 320 * in column 321 * 322 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 323 * 324 * "elem_bits" [ IN ] - expected element size in bits, required 325 * to be compatible with the actual element size, and be a multiple 326 * of 8 ( byte-aligned ). for non-byte-aligned data, see ReadBits 327 * 328 * "buffer" [ OUT ] and "blen" [ IN ] - return buffer for row data 329 * where "blen" gives buffer capacity in elements. the total buffer 330 * size in bytes == ( "elem_bits" * "blen" + 7 ) / 8. 331 * 332 * "row_len" [ OUT ] - return parameter for the number of elements 333 * in the requested row. 334 * 335 * when the return code is 0, "row_len" will contain the number of 336 * elements read into buffer. if the return code indicates that the 337 * buffer is too small, "row_len" will give the required buffer length. 338 */ 339 VDB_EXTERN rc_t CC VCursorRead ( const VCursor *self, uint32_t col_idx, 340 uint32_t elem_bits, void *buffer, uint32_t blen, uint32_t *row_len ); 341 VDB_EXTERN rc_t CC VCursorReadDirect ( const VCursor *self, int64_t row_id, uint32_t col_idx, 342 uint32_t elem_bits, void *buffer, uint32_t blen, uint32_t *row_len ); 343 344 345 /* ReadBits 346 * read single row of potentially bit-aligned column data into a buffer 347 * ReadBitsDirect 348 * bypass the need to use SetRowId/OpenRow/CloseRow for addressing 349 * 350 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 351 * 352 * "elem_bits" [ IN ] - expected element size in bits, required to be 353 * compatible with the actual element size, and may ( or may not ) be 354 * a multiple of 8 ( byte aligned ). 355 * 356 * "start" [ IN ] - zero-based starting index to first element, 357 * valid from 0 .. row_len - 1 358 * 359 * "buffer" [ IN ], "boff" [ IN ] and "blen" [ IN ] - 360 * return buffer for row data, where "boff" is in BITS 361 * and "blen" is in ELEMENTS. 362 * 363 * "num_read" [ OUT ] - return parameter for the number of elements 364 * read, which is <= "blen" 365 * 366 * "remaining" [ OUT, NULL OKAY ] - optional return parameter for 367 * the number of elements remaining to be read. specifically, 368 * "start" + "num_read" + "remaining" == row length, assuming that 369 * "start" <= row length. 370 */ 371 VDB_EXTERN rc_t CC VCursorReadBits ( const VCursor *self, uint32_t col_idx, 372 uint32_t elem_bits, uint32_t start, void *buffer, uint32_t boff, 373 uint32_t blen, uint32_t *num_read, uint32_t *remaining ); 374 VDB_EXTERN rc_t CC VCursorReadBitsDirect ( const VCursor *self, int64_t row_id, uint32_t col_idx, 375 uint32_t elem_bits, uint32_t start, void *buffer, uint32_t boff, 376 uint32_t blen, uint32_t *num_read, uint32_t *remaining ); 377 378 379 /* CellData 380 * access pointer to single cell of potentially bit-aligned column data 381 * CellDataDirect 382 * bypass the need to use SetRowId/OpenRow/CloseRow for addressing 383 * 384 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 385 * 386 * "elem_bits" [ OUT, NULL OKAY ] - optional return parameter for 387 * element size in bits 388 * 389 * "base" [ OUT ] and "boff" [ OUT, NULL OKAY ] - 390 * compound return parameter for pointer to row starting bit 391 * where "boff" is in BITS 392 * 393 * "row_len" [ OUT, NULL OKAY ] - the number of elements in cell 394 */ 395 VDB_EXTERN rc_t CC VCursorCellData ( const VCursor *self, uint32_t col_idx, 396 uint32_t *elem_bits, const void **base, uint32_t *boff, 397 uint32_t *row_len ); 398 VDB_EXTERN rc_t CC VCursorCellDataDirect ( const VCursor *self, int64_t row_id, 399 uint32_t col_idx, uint32_t *elem_bits, const void **base, 400 uint32_t *boff, uint32_t *row_len ); 401 402 403 /* VCursorDataPrefetch 404 * -- will prefecth rows into CursorCache (if it exists) 405 * -- no OUT parameters - just primes the cache 406 * -- will cache every produced blob (even a small one) 407 * -- will suspend flushing the cache after inserting first row 408 * -- conducts sort-unique on row_ids to linearize data access 409 * 410 * "row_ids" [ IN ] - rows to be prefetched 411 * 412 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 413 * 414 * "num_rows" [ IN ] - number of rows in row_ids 415 * 416 * "min/max_valid_row_id [IN] - ignor all row_ids[i] which will not hit this range 417 * 418 * "continue_on_error" [ IN ] - whether to continue on a failure to prefetch a rows 419 */ 420 421 LIB_EXPORT rc_t CC VCursorDataPrefetch ( const VCursor * self, 422 const int64_t * row_ids, uint32_t col_idx, uint32_t num_rows, 423 int64_t min_valid_row_id, int64_t max_valid_row_id, bool continue_on_error ); 424 425 426 /* Default 427 * give a default row value for cell 428 * TBD - document full cell data, not append 429 * 430 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 431 * 432 * "elem_bits" [ IN ] - stated element size in bits, required 433 * to be compatible with the actual element size 434 * 435 * "buffer" [ IN ] and "boff" [ IN ] - compound pointer and offset 436 * to start of default row data where "boff" is in BITS 437 * 438 * "row_len" [ IN ] - the number of elements in default row 439 */ 440 VDB_EXTERN rc_t CC VCursorDefault ( VCursor *self, uint32_t col_idx, 441 bitsz_t elem_bits, const void *buffer, bitsz_t boff, 442 uint64_t row_len ); 443 444 445 /* Write 446 * append bit-aligned column data to cell 447 * 448 * "col_idx" [ IN ] - index of column to be read, returned by "AddColumn" 449 * 450 * "elem_bits" [ IN ] - stated element size in bits, required 451 * to be compatible with the actual element size 452 * 453 * "buffer" [ IN ] and "boff" [ IN ] - compound pointer and offset 454 * to start of default row data where "boff" is in BITS 455 * 456 * "count" [ IN ] - the number of elements to append 457 */ 458 VDB_EXTERN rc_t CC VCursorWrite ( VCursor *self, uint32_t col_idx, 459 bitsz_t elem_bits, const void *buffer, bitsz_t boff, 460 uint64_t count ); 461 462 463 /* Commit 464 * commit changes made to cursor 465 * fails if row is open 466 */ 467 VDB_EXTERN rc_t CC VCursorCommit ( VCursor *self ); 468 469 470 /* OpenParent 471 * duplicate reference to parent table 472 * NB - returned reference must be released 473 */ 474 VDB_EXTERN rc_t CC VCursorOpenParentRead ( const VCursor *self, struct VTable const **tbl ); 475 VDB_EXTERN rc_t CC VCursorOpenParentUpdate ( VCursor *self, struct VTable **tbl ); 476 477 478 /* GetUserData 479 * SetUserData 480 * store/retrieve an opaque pointer to user data 481 * 482 * "data" [ OUT ] - return parameter for getting data 483 * "data" [ IN ] - parameter for setting data 484 * 485 * "destroy" [ IN, NULL OKAY ] - optional destructor param 486 * invoked from destructor of "self" 487 */ 488 VDB_EXTERN rc_t CC VCursorGetUserData ( const VCursor *self, void **data ); 489 VDB_EXTERN rc_t CC VCursorSetUserData ( const VCursor *self, 490 void *data, void ( CC * destroy ) ( void *data ) ); 491 492 493 #ifdef __cplusplus 494 } 495 #endif 496 497 #endif /* _h_vdb_cursor_ */ 498