1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #ifndef _h_align_iterator_ 28 #define _h_align_iterator_ 29 30 #ifndef _h_align_extern_ 31 #include <align/extern.h> 32 #endif 33 34 #ifndef _h_klib_container_ 35 #include <klib/container.h> 36 #endif 37 38 #ifndef _h_insdc_insdc_ 39 #include <insdc/insdc.h> 40 #endif 41 42 #ifndef _h_vdb_database_ 43 #include <vdb/database.h> 44 #endif 45 46 #ifdef __cplusplus 47 extern "C" { 48 #endif 49 50 51 /*-------------------------------------------------------------------------- 52 * forwards 53 */ 54 struct VCursor; 55 struct AlignMgr; 56 struct ReferenceObj; 57 58 59 /*-------------------------------------------------------------------------- 60 * AlignmentIterator 61 * walk across a single alignment in reference space 62 */ 63 typedef struct AlignmentIterator AlignmentIterator; 64 65 66 /* Make 67 * create an encapsulation of alignment 68 * 69 * iter [ OUT ] - return parameter for new iterator 70 * 71 * copy [ IN ] - when "true" the data will be copied. 72 * otherwise, pointers will be expected to refer to data 73 * with a lifetime >= that of the iterator being created. 74 * 75 * "ref_pos" [ IN ] and "ref_len" [ IN ] - projection onto reference 76 * 77 * "read" [ IN ] and "read_len" [ IN ] - full sequence of alignment 78 * 79 * "has_mismatch" [ IN ] - describes comparison result of each base 80 * in "read" against the alignment. 81 * 82 * "has_ref_offset" [ IN ] - describes positions of reference offsets 83 * 84 * "ref_offset" [ IN ] and "ref_offset_len" [ IN ] - packed array of 85 * offsets of position against reference. 86 */ 87 ALIGN_EXTERN rc_t CC AlignMgrMakeAlignmentIterator ( struct AlignMgr const *self, 88 AlignmentIterator **iter, 89 bool copy, 90 INSDC_coord_zero ref_pos, 91 INSDC_coord_len ref_len, 92 const INSDC_4na_bin *read, 93 INSDC_coord_len read_len, 94 const bool *has_mismatch, 95 const bool *has_ref_offset, 96 const int32_t *ref_offset, 97 uint32_t ref_offset_len, 98 INSDC_coord_zero ref_window_start, 99 INSDC_coord_len ref_window_len ); 100 101 102 /* AddRef 103 * Release 104 */ 105 ALIGN_EXTERN rc_t CC AlignmentIteratorAddRef ( const AlignmentIterator *self ); 106 ALIGN_EXTERN rc_t CC AlignmentIteratorRelease ( const AlignmentIterator *self ); 107 108 109 /* Next 110 * advance position by 1 in reference space 111 * must be called initially to advance to first element 112 * 113 * returns RCState = rcDone when done 114 */ 115 ALIGN_EXTERN rc_t CC AlignmentIteratorNext ( AlignmentIterator *self ); 116 117 118 /* State 119 * returns bitmap of state bits and event code at the current position 120 * will return invalid before initial Next message or after final 121 * 122 * bits [ 0..7 ] : 123 * { 0..15 } = 4na mismatch (NACMGRSVTWYHKDBN) 124 * 64 = match 125 * 128 = skip 126 * bit [ 8 ] = have insert if ! 0 127 * bit [ 9 ] = have delete if ! 0 128 * bit [ 10 ] = first base if ! 0 129 * bit [ 11 ] = last base if ! 0 130 * bit [ 31 ] = iterator position is invalid if ! 0 131 * NB - converts state word to negative 132 */ 133 enum 134 { 135 align_iter_match = ( 1 << 8 ), 136 align_iter_skip = ( 1 << 9 ), 137 138 align_iter_insert = ( 1 << 10 ), 139 align_iter_delete = ( 1 << 11 ), 140 align_iter_first = ( 1 << 12 ), 141 align_iter_last = ( 1 << 13 ), 142 143 align_iter_invalid = ( int ) ( 1U << 31 ) 144 }; 145 146 ALIGN_EXTERN int32_t CC AlignmentIteratorState ( const AlignmentIterator *self, 147 INSDC_coord_zero *seq_pos ); 148 149 150 /* Position 151 * return current position of iterator relative to reference 152 */ 153 ALIGN_EXTERN rc_t CC AlignmentIteratorPosition ( const AlignmentIterator *self, 154 INSDC_coord_zero *pos ); 155 156 157 /* BasesInserted 158 * return the number of inserted bases and a pointer to their values 159 * 160 * "bases" [ OUT, NULL OKAY ] - optional output parameter to inserted bases 161 * 162 * returns count of bases inserted at current position 163 */ 164 ALIGN_EXTERN uint32_t CC AlignmentIteratorBasesInserted 165 ( const AlignmentIterator *self, const INSDC_4na_bin **bases ); 166 167 168 /* BasesDeleted 169 * return the number of bases deleted at the current position 170 * also returns the location on the reference where the delete starts 171 * 172 * "pos" [ OUT ] - return parameter for location on the reference 173 * where delete starts, and continues for the number of bases given by function return 174 * 175 * returns count of bases deleted at current position 176 */ 177 ALIGN_EXTERN uint32_t CC AlignmentIteratorBasesDeleted 178 ( const AlignmentIterator *self, INSDC_coord_zero *pos ); 179 180 181 /*-------------------------------------------------------------------------- 182 * PlacementRecord 183 * record describing a placement 184 */ 185 typedef struct PlacementRecord PlacementRecord; 186 struct PlacementRecord 187 { 188 DLNode n; 189 190 /* row id of alignment record */ 191 int64_t id; 192 193 /* object representing reference sequence */ 194 struct ReferenceObj const *ref; 195 196 /* placement position and length on reference */ 197 INSDC_coord_zero pos; 198 INSDC_coord_len len; 199 200 /* mapping quality of alignment */ 201 int32_t mapq; 202 203 /* spotgroup is now in here too */ 204 uint32_t spot_group_len; 205 char * spot_group; 206 }; 207 208 209 /* Cast 210 * cast to an extended object 211 * 212 * "ext" [ IN ] - selects the extended object level 213 * can be placementRecordExtension0 or placementRecordExtension1 214 */ 215 216 enum { placementRecordExtension0, placementRecordExtension1 }; 217 218 ALIGN_EXTERN void* CC PlacementRecordCast ( const PlacementRecord *self, uint32_t ext ); 219 220 221 ALIGN_EXTERN void* CC PlacementRecord_get_ext_data_ptr ( const PlacementRecord *self, uint32_t ext ); 222 223 224 /* Whack 225 * destroys PlacementRecord and any associated extensions 226 */ 227 ALIGN_EXTERN void CC PlacementRecordWhack ( const PlacementRecord *self ); 228 229 230 /* structure of function pointers for creating extensions 231 all function pointers are optional ( NULL OKAY ) */ 232 typedef struct PlacementRecordExtendFuncs PlacementRecordExtendFuncs; 233 struct PlacementRecordExtendFuncs 234 { 235 /* opaque pointer to data passed to each function */ 236 void *data; 237 238 /* destructor */ 239 void ( CC * destroy ) ( void *obj, void *data ); 240 241 /* constructor */ 242 rc_t ( CC * populate ) ( void *obj, const PlacementRecord *placement, 243 struct VCursor const *curs, INSDC_coord_zero ref_window_start, 244 INSDC_coord_len ref_window_len, void *data, void * placement_ctx ); 245 246 /* variable allocation size calculation 247 when non-NULL, takes precedence over "fixed_size" */ 248 bool ( CC * filter ) ( struct VCursor const *curs, int64_t row_id, 249 const PlacementRecord *placement, INSDC_coord_zero ref_window_start, 250 INSDC_coord_len ref_window_len, void *data, void * placement_ctx ); 251 252 /* variable allocation size calculation 253 when non-NULL, takes precedence over "fixed_size" */ 254 rc_t ( CC * alloc_size ) ( struct VCursor const *curs, int64_t row_id, size_t * size, void *data, void * placement_ctx ); 255 256 /* fixed allocation size 257 ignored if "alloc_size" is non-NULL, 258 must be non-zero otherwise */ 259 size_t fixed_size; 260 }; 261 262 263 /* external functions for extension of a placement record 264 to include ( construct ) an AlignmentIterator */ 265 ALIGN_EXTERN void CC AlignIteratorRecordDestroy ( void *obj, void *data ); 266 ALIGN_EXTERN rc_t CC AlignIteratorRecordPopulate ( void *obj, 267 const PlacementRecord *placement, struct VCursor const *curs, 268 INSDC_coord_zero ref_window_start, INSDC_coord_len ref_window_len, void *data ); 269 ALIGN_EXTERN rc_t CC AlignIteratorRecordSize ( struct VCursor const *curs, int64_t row_id, size_t * size, void *data ); 270 271 272 /*-------------------------------------------------------------------------- 273 * PlacementIterator 274 * walk across placements from an alignment db within a reference window 275 */ 276 typedef struct PlacementIterator PlacementIterator; 277 278 279 /* Make 280 * create a placement iterator 281 * 282 * "iter" [ OUT ] - return parameter for iterator 283 * 284 * "ref_obj" [ IN, NULL OKAY ] - optional parameter giving an object 285 * representing the reference sequence for this iterator. it will be 286 * inserted into each PlacementRecord (see above) and made available to 287 * outer code. 288 * 289 * "ref_pos" [ IN ] and "ref_len" [ IN ] - window onto reference 290 * 291 * "min_mapq" [ IN ] - minimum map quality value 292 * 293 * "ref_cur" [ IN ] - read-only cursor on REFERENCE table 294 * will be modified as necessary to contain requisite columns 295 * will be opened by iterator. 296 * 297 * "align_cur" [ IN ] - read-only cursor on PRIMARY_ALIGNMENT or SECONDARY_ALIGNMENT 298 * table ( see "secondary" ). will be modified as necessary to contain 299 * requisite columns. will be opened by iterator. 300 * 301 * "ids" [ IN ] - an enum describing which column of alignment ids should 302 * be used when reading "ref" 303 * 304 * "ext_0" [ IN, NULL OKAY ] and "ext_1" [ IN, NULL OKAY ] - optional pointers 305 * to blocks describing how to extend the basic placement record 306 * 307 * rd_group [ IN, NULL OKAY ] 308 * != NULL, non empty string ... produce all alignments with this string as 309 * spot-group ( no matter what the "real" spot-group of the 310 * alignment is ) 311 * 312 * != NULL, empty string ... produce all alignments with the "real" spot-group 313 * read from the column "SPOT_GROUP" 314 * 315 * == NULL, ... produce all alignments with no spot-group assigned ( the user 316 * does not wish the data to be read, the alignment to be bined ) 317 */ 318 319 typedef uint8_t align_id_src; 320 enum { primary_align_ids, secondary_align_ids, evidence_align_ids }; 321 322 ALIGN_EXTERN rc_t CC AlignMgrMakePlacementIterator ( struct AlignMgr const *self, 323 PlacementIterator **iter, struct ReferenceObj const *ref_obj, 324 INSDC_coord_zero ref_pos, INSDC_coord_len ref_len, int32_t min_mapq, 325 struct VCursor const *ref_cur, struct VCursor const *align_cur, align_id_src ids, 326 const PlacementRecordExtendFuncs *ext_0, const PlacementRecordExtendFuncs *ext_1, 327 const char * spot_group ); 328 329 330 /* AddRef 331 * Release 332 */ 333 ALIGN_EXTERN rc_t CC PlacementIteratorAddRef ( const PlacementIterator *self ); 334 ALIGN_EXTERN rc_t CC PlacementIteratorRelease ( const PlacementIterator *self ); 335 336 337 /* RefWindow 338 * returns the reference identification string and iteration window 339 */ 340 ALIGN_EXTERN rc_t CC PlacementIteratorRefWindow ( const PlacementIterator *self, 341 const char **idstr, INSDC_coord_zero *pos, INSDC_coord_len *len ); 342 343 344 /* RefObj 345 * returns the Ref-obj, that was used to create this placement-iterator 346 */ 347 ALIGN_EXTERN rc_t CC PlacementIteratorRefObj( const PlacementIterator * self, 348 struct ReferenceObj const ** refobj ); 349 350 351 /* NextAvailPos 352 * check the next available position having one or more placements 353 * 354 * "pos" [ OUT ] - next position on reference having one or more placements 355 * may return negative position, indicating an alignment that wraps around 356 * a circular reference, and starts in negative space after linearization. 357 * 358 * "len" [ OUT, NULL OKAY ] - optional return parameter for length of 359 * placement at that position 360 * 361 * returns non-zero rc when no window is done 362 * (rcRange, rcDone) 363 */ 364 ALIGN_EXTERN rc_t CC PlacementIteratorNextAvailPos ( const PlacementIterator *self, 365 INSDC_coord_zero *pos, INSDC_coord_len *len ); 366 367 368 /* NextRecordAt 369 * retrieve a placement at the requested position 370 * 371 * "pos" [ IN ] - required position of the placement 372 * obtained from "NextAvailPos" 373 * 374 * "rec" [ OUT ] - returned record 375 * must be freed via PlacementRecordWhack 376 * 377 * returns non-zero rc when no more placements are available 378 * (rcOffset, rcDone) 379 */ 380 ALIGN_EXTERN rc_t CC PlacementIteratorNextRecordAt ( PlacementIterator *self, 381 INSDC_coord_zero pos, const PlacementRecord **rec ); 382 383 384 /* NextIdAt 385 * retrieve a row id at the requested position 386 * 387 * "pos" [ IN ] - required position of the placement 388 * obtained from "NextAvailPos" 389 * 390 * "id" [ OUT ] - returned row-id, within domain of align cursor 391 * 392 * "len" [ OUT, NULL OKAY ] - optional return parameter for length of 393 * placement on reference 394 * 395 * returns non-zero rc when no more placements are available 396 * (rcOffset, rcDone) 397 */ 398 ALIGN_EXTERN rc_t CC PlacementIteratorNextIdAt ( PlacementIterator *self, 399 INSDC_coord_zero pos, int64_t *row_id, INSDC_coord_len *len ); 400 401 402 /*-------------------------------------------------------------------------- 403 * PlacementSetIterator 404 * walk across placements from an alignment db within a reference window 405 */ 406 typedef struct PlacementSetIterator PlacementSetIterator; 407 408 409 /* Make 410 * create a placement set iterator 411 * 412 * "iter" [ OUT ] - return parameter for iterator 413 * 414 * "ref_pos" [ IN ] and "ref_len" [ IN ] - window onto reference 415 */ 416 ALIGN_EXTERN rc_t CC AlignMgrMakePlacementSetIterator ( struct AlignMgr const *self, 417 PlacementSetIterator **iter ); 418 419 420 /* AddPlacementIterator 421 * adds a placement iterator 422 * used to provide ordered placements within window 423 */ 424 ALIGN_EXTERN rc_t CC PlacementSetIteratorAddPlacementIterator 425 ( PlacementSetIterator *self, PlacementIterator *pi ); 426 427 428 /* AddRef 429 * Release 430 */ 431 ALIGN_EXTERN rc_t CC PlacementSetIteratorAddRef ( const PlacementSetIterator *self ); 432 ALIGN_EXTERN rc_t CC PlacementSetIteratorRelease ( const PlacementSetIterator *self ); 433 434 435 ALIGN_EXTERN rc_t CC PlacementSetIteratorNextReference ( PlacementSetIterator *self, 436 INSDC_coord_zero *first_pos, INSDC_coord_len *len, struct ReferenceObj const ** refobj ); 437 438 ALIGN_EXTERN rc_t CC PlacementSetIteratorNextWindow ( PlacementSetIterator *self, 439 INSDC_coord_zero *first_pos, INSDC_coord_len *len ); 440 441 /* NextAvailPos 442 * check the next available position having one or more placements 443 * 444 * "pos" [ OUT ] - next position on reference having one or more placements 445 * may return negative position, indicating an alignment that wraps around 446 * a circular reference, and starts in negative space after linearization. 447 * 448 * "len" [ OUT, NULL OKAY ] - optional return parameter for length of 449 * placement at that position 450 * 451 * returns non-zero rc when no more placements are available 452 * TBD - define a proper value 453 */ 454 ALIGN_EXTERN rc_t CC PlacementSetIteratorNextAvailPos ( const PlacementSetIterator *self, 455 INSDC_coord_zero *pos, INSDC_coord_len *len ); 456 457 458 /* NextRecordAt 459 * retrieve a placement at the requested position 460 * 461 * "pos" [ IN ] - required position of the placement 462 * obtained from "NextAvailPos" 463 * 464 * "rec" [ OUT ] - returned record 465 * must be freed via PlacementRecordWhack 466 */ 467 ALIGN_EXTERN rc_t CC PlacementSetIteratorNextRecordAt ( PlacementSetIterator *self, 468 INSDC_coord_zero pos, const PlacementRecord **rec ); 469 470 471 /* NextIdAt 472 * retrieve a row id at the requested position 473 * 474 * "pos" [ IN ] - required position of the placement 475 * obtained from "NextAvailPos" 476 * 477 * "id" [ OUT ] - returned row-id, within domain of align cursor 478 * 479 * "len" [ OUT, NULL OKAY ] - optional return parameter for length of 480 * placement on reference 481 */ 482 ALIGN_EXTERN rc_t CC PlacementSetIteratorNextIdAt ( PlacementSetIterator *self, 483 INSDC_coord_zero pos, int64_t *row_id, INSDC_coord_len *len ); 484 485 486 /*-------------------------------------------------------------------------- 487 * ReferenceIterator 488 * walk across placements from an alignment db within a reference window 489 */ 490 typedef struct ReferenceIterator ReferenceIterator; 491 492 493 /* Make 494 * create a reference iterator 495 * 496 * "iter" [ OUT ] - return parameter for iterator 497 * 498 * "ext_1" [ IN, NULL OKAY ] - optional pointer to a block describing how 499 * to extend the align-iterator record 500 * 501 * "min_mapq" [ IN ] - minimum map quality value 502 */ 503 ALIGN_EXTERN rc_t CC AlignMgrMakeReferenceIterator ( struct AlignMgr const *self, 504 ReferenceIterator **iter, const PlacementRecordExtendFuncs *ext_1, int32_t min_mapq ); 505 506 507 /* AddRef 508 * Release 509 */ 510 ALIGN_EXTERN rc_t CC ReferenceIteratorAddRef ( const ReferenceIterator *self ); 511 ALIGN_EXTERN rc_t CC ReferenceIteratorRelease ( const ReferenceIterator *self ); 512 513 514 /* AddPlacementIterator 515 * adds a placement iterator 516 * used to provide ordered placements within window 517 */ 518 #if 0 519 ALIGN_EXTERN rc_t CC ReferenceIteratorAddPlacementIterator 520 ( ReferenceIterator *self, PlacementIterator *pi ); 521 #endif 522 523 /* AddPlacements 524 * adds a source for placements (file/table) 525 * used to provide ordered placements within window 526 */ 527 ALIGN_EXTERN rc_t CC ReferenceIteratorAddPlacements ( ReferenceIterator *self, 528 struct ReferenceObj const *ref_obj, INSDC_coord_zero ref_pos, INSDC_coord_len ref_len, 529 struct VCursor const *ref, struct VCursor const *align, align_id_src ids, 530 const char * spot_group, void * placement_ctx ); 531 532 533 /* NextReference 534 * advance to the next reference 535 */ 536 ALIGN_EXTERN rc_t CC ReferenceIteratorNextReference ( ReferenceIterator *self, 537 INSDC_coord_zero *first_pos, INSDC_coord_len *len, struct ReferenceObj const ** refobj ); 538 539 /* NextWindow 540 * advance to the next window on the reference 541 */ 542 ALIGN_EXTERN rc_t CC ReferenceIteratorNextWindow ( ReferenceIterator *self, 543 INSDC_coord_zero *first_pos, INSDC_coord_len *len ); 544 545 /* NextSpotGroup 546 * advance to the next spot_group on the reference 547 */ 548 ALIGN_EXTERN rc_t CC ReferenceIteratorNextSpotGroup ( ReferenceIterator *self, 549 const char ** name, size_t * len ); 550 551 552 /* NextPos 553 * advance to the next position on current reference 554 * resets internal iterator on placements at that position 555 */ 556 ALIGN_EXTERN rc_t CC ReferenceIteratorNextPos ( ReferenceIterator *self, bool skip_empty ); 557 558 559 /* Position 560 * return current position on the reference 561 */ 562 ALIGN_EXTERN rc_t CC ReferenceIteratorPosition ( const ReferenceIterator *self, 563 INSDC_coord_zero *pos, uint32_t * depth, INSDC_4na_bin * base ); 564 565 566 /* NextPlacement 567 * advance internal iterator to next placement in list 568 * returns a pointer to the next placement object at current position 569 */ 570 ALIGN_EXTERN rc_t CC ReferenceIteratorNextPlacement ( ReferenceIterator *self, 571 const PlacementRecord **rec ); 572 573 574 /* State 575 * return state of current placement at current position 576 */ 577 ALIGN_EXTERN int32_t CC ReferenceIteratorState ( const ReferenceIterator *self, INSDC_coord_zero *seq_pos ); 578 579 580 /* BasesInserted 581 * return the number of inserted bases and a pointer to their values 582 * 583 * "bases" [ OUT, NULL OKAY ] - optional output parameter to inserted bases 584 * 585 * returns count of bases inserted at current position 586 */ 587 ALIGN_EXTERN uint32_t CC ReferenceIteratorBasesInserted ( const ReferenceIterator *self, 588 const INSDC_4na_bin **bases ); 589 590 591 /* BasesDeleted 592 * return the number of bases deleted at the current position 593 * also returns the location on the reference where the delete starts 594 * 595 * "pos" [ OUT ] - return parameter for location on the reference 596 * where delete starts, and continues for the number of bases given by function return 597 * 598 * "bases" [ OUT, NULL OKAY ] - optional output parameter to deleted bases 599 * 600 * returns count of bases deleted at current position 601 */ 602 ALIGN_EXTERN uint32_t CC ReferenceIteratorBasesDeleted ( const ReferenceIterator *self, 603 INSDC_coord_zero *pos, const INSDC_4na_bin **bases ); 604 605 606 #ifdef __cplusplus 607 } 608 #endif 609 610 #endif /* _h_align_iterator_ */ 611