1 /*************************************************************** 2 3 The Subread software package is free software package: 4 you can redistribute it and/or modify it under the terms 5 of the GNU General Public License as published by the 6 Free Software Foundation, either version 3 of the License, 7 or (at your option) any later version. 8 9 Subread is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty 11 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 Authors: Drs Yang Liao and Wei Shi 16 17 ***************************************************************/ 18 19 20 //Core.c is the totally refactored body of the current subread, subjunc, subfusion and subindel programs. 21 //This program runs in this way: 22 // 1, it loads reads from FASTQ, FASTA, PLAIN or SAM files 23 // 2, it change the read and the quality strings according to FF/FR/RR/RF orders, such that every read is in the "F" manner. 24 // Namely, the second read in a pair is reversed. 25 // 3, it builds voting table for each read. 26 // 4, it performs the first iteration. 27 // 5, it performs the second iteration. 28 // 6, it reports results regarding the reads. 29 // 7, if the input files contain too many reads (e.g., greater than 14M in total), step 1 ~ 6 are repeated until all reads are finalised. 30 // 8, it reports global results. 31 32 //Core.c maintains the following context: 33 // 1, the global context. 34 // 2, the overall context for each module. 35 // 3, the local context for voting each read for each module. 36 // The structure of each context is named as 37 // Context_ModuleName_Scope 38 39 //Thread context is maintained for each thread. 40 41 // There should be a function for each module to store/load its global context. 42 43 #ifndef _SUBREAD_CORE_H_ 44 #define _SUBREAD_CORE_H_ 45 #include <pthread.h> 46 #include "subread.h" 47 #include "sambam-file.h" 48 49 //#define is_target(s) (memcmp((s),"chr901_242237_242737_0:0:0_0:0:0_3761", 22)==0) 50 51 #define is_target(s) 0 52 53 //#define _global_retrieve_voting_context(global_context, pair_number) (global_context->chunk_vote_records[pair_number]) 54 55 //#define _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_alignment_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_alignment_records+pair_number*global_context->config.multi_best_reads + best_read_id))) 56 57 //#define _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_subjunc_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_subjunc_records+pair_number*global_context->config.multi_best_reads + best_read_id))) 58 59 //#define _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read) (is_second_read?(global_context->big_margin_record + (2*pair_number+is_second_read)* global_context->config.big_margin_record_size):(global_context->big_margin_record + pair_number * global_context->config.big_margin_record_size)) 60 61 #define mark_gapped_read(res) (res)-> result_flags|= CORE_IS_GAPPED_READ; 62 63 #define CORE_MAX_CIGAR_STR_LEN 110 64 #define CORE_ADDITIONAL_INFO_LENGTH 400 65 #define READPAIRS_FOR_CALC_EXPT_TLEN 1000 66 67 typedef struct{ 68 subread_read_number_t fragments; 69 unsigned int maximum_interval_length; 70 unsigned int expected_items; 71 HashTable * entry_table; 72 } bucketed_table_t; 73 74 typedef struct{ 75 int capacity; 76 int items; 77 unsigned int keyed_bucket; 78 unsigned int maximum_interval_length; 79 80 unsigned int * positions; 81 void ** details; 82 } bucketed_table_bucket_t; 83 84 85 typedef struct{ 86 unsigned long long capacity; 87 subread_read_number_t fragments; 88 subread_read_number_t * fragment_numbers; 89 } fragment_list_t; 90 91 92 93 94 typedef struct{ 95 int is_paired_end_reads; 96 int is_internal_error; 97 gene_input_t first_read_file; 98 gene_input_t second_read_file; 99 unsigned long long first_read_file_size; 100 subread_lock_t input_lock; 101 double avg_read_length; 102 } read_input_t; 103 104 typedef struct{ 105 char read_name[MAX_READ_NAME_LEN]; 106 unsigned short flags; 107 char chro_name[MAX_CHROMOSOME_NAME_LEN]; 108 unsigned int location; 109 unsigned short map_quality; 110 char cigar[CORE_MAX_CIGAR_STR_LEN]; 111 char other_chro_name[MAX_CHROMOSOME_NAME_LEN]; 112 unsigned int other_location; 113 long long int tlen; 114 int rlen; 115 char read_text[MAX_READ_LENGTH]; 116 char qual_text[MAX_READ_LENGTH]; 117 char additional_columns[CORE_ADDITIONAL_INFO_LENGTH]; 118 } output_read_buffer_t; 119 120 typedef struct{ 121 int fragment_number_in_chunk; 122 int multi_mapping_locations; 123 int this_mapping_location; 124 output_read_buffer_t r1; 125 output_read_buffer_t r2; 126 } output_fragment_buffer_t; 127 128 typedef struct{ 129 // running_scheme 130 int all_threads; 131 int is_first_iteration_running; 132 int is_second_iteration_running; 133 int is_third_iteration_running; 134 int use_memory_buffer; 135 float memory_use_multiplex; 136 char temp_file_prefix[MAX_FILE_NAME_LENGTH]; 137 subread_read_number_t reads_per_chunk; 138 int fast_run; 139 140 // input_scheme 141 char first_read_file[MAX_FILE_NAME_LENGTH * MAX_SCRNA_FASTQ_FILES * 3]; 142 char second_read_file[MAX_FILE_NAME_LENGTH]; 143 char exon_annotation_file[MAX_FILE_NAME_LENGTH]; 144 char exon_annotation_file_screen_out[MAX_FILE_NAME_LENGTH]; 145 char exon_annotation_alias_file[MAX_FILE_NAME_LENGTH]; 146 int exon_annotation_file_type; 147 char exon_annotation_gene_id_column[MAX_READ_NAME_LEN]; 148 char exon_annotation_feature_name_column[MAX_READ_NAME_LEN]; 149 150 151 int do_remove_neighbour_for_scRNA; 152 int scRNA_input_mode; 153 short read_trim_5; 154 short read_trim_3; 155 int is_first_read_reversed; 156 int is_second_read_reversed; 157 int space_type; 158 int is_methylation_reads; 159 int phred_score_format; 160 int is_SAM_file_input; 161 int is_gzip_fastq; 162 163 164 // reporting scheme 165 char read_group_id[MAX_FILE_NAME_LENGTH]; 166 char read_group_txt[MAX_FILE_NAME_LENGTH]; 167 char output_prefix[MAX_FILE_NAME_LENGTH]; 168 int report_sam_file; 169 int report_no_unpaired_reads; 170 int min_mapped_fraction; 171 int max_mismatch_exonic_reads; 172 int max_mismatch_junction_reads; 173 int ignore_unmapped_reads; 174 int report_unmapped_using_mate_pos; 175 int report_multi_mapping_reads; 176 int downscale_mapping_quality; 177 int is_BAM_input; 178 int is_BAM_output; 179 int is_input_read_order_required; 180 int sort_reads_by_coordinates; 181 int convert_color_to_base; 182 int SAM_extra_columns; 183 int report_multiple_best_in_pairs; 184 unsigned int multi_best_reads; 185 unsigned int reported_multi_best_reads; 186 187 // basic voting 188 char index_prefix[MAX_FILE_NAME_LENGTH]; 189 int top_scores; 190 int max_vote_combinations; 191 int max_vote_simples; 192 int max_vote_number_cutoff; 193 int total_subreads; 194 int minimum_subread_for_first_read; 195 int minimum_subread_for_second_read; 196 float minimum_exonic_subread_fraction; 197 int minimum_pair_distance; 198 int maximum_pair_distance; 199 int restrected_read_order; 200 int show_soft_cliping; 201 int max_indel_length; 202 int expected_pair_distance; 203 int ambiguous_mapping_tolerance; 204 int use_hamming_distance_break_ties; 205 int use_quality_score_break_ties; 206 int big_margin_record_size; 207 int PE_predominant_weight; 208 int no_TLEN_preference; 209 210 // subjunc 211 int entry_program_name; 212 int experiment_type; 213 int do_breakpoint_detection; 214 int do_big_margin_filtering_for_junctions; 215 int do_big_margin_filtering_for_reads; 216 int limited_tree_scan; 217 int use_hamming_distance_in_exon; 218 unsigned int maximum_intron_length; 219 int high_quality_base_threshold; 220 int max_insertion_at_junctions; 221 int check_donor_at_junctions; 222 223 // subfusion 224 int do_fusion_detection; 225 int do_long_del_detection; 226 int do_structural_variance_detection; 227 int prefer_donor_receptor_junctions; 228 int more_accurate_fusions; 229 int maximum_translocation_length; 230 int maximum_colocating_distance; 231 232 // indel 233 char do_superlong_indel_detection; 234 char extending_search_indels; 235 int k_mer_length; 236 int realignment_minimum_variant_distance; 237 int reassembly_start_read_number; 238 int reassembly_key_length; 239 int reassembly_subread_length; 240 int reassembly_window_multiplex; 241 int reassembly_tolerable_voting; 242 int reassembly_window_alleles; 243 int init_max_event_number; 244 int use_dynamic_programming_indel; 245 int use_bitmap_event_table; 246 int maximise_sensitivity_indel; 247 int flanking_subread_indel_mismatch; 248 int DP_penalty_create_gap; 249 int DP_penalty_extend_gap; 250 int DP_match_score; 251 int DP_mismatch_penalty; 252 253 } configuration_t; 254 255 #define CORE_IS_GT_AG_DONORS 1 256 #define CORE_NOTFOUND_DONORS 2 257 #define CORE_IS_STRAND_JUMPED 4 258 #define CORE_IS_NEGATIVE_STRAND 8 259 #define CORE_IS_FULLY_EXPLAINED 16 260 #define CORE_IS_BREAKEVEN 32 261 #define CORE_IS_GAPPED_READ 64 262 #define CORE_IS_PAIRED_END 128 263 #define CORE_TOO_MANY_MISMATCHES 256 264 265 #define CORE_CIGAR_OPT_M 0 266 #define CORE_CIGAR_OPT_S 1 267 #define CORE_CIGAR_OPT_D 2 268 #define CORE_CIGAR_OPT_I 3 269 #define CORE_CIGAR_OPT_B 4 270 #define CORE_CIGAR_OPT_N 5 271 #define CORE_CIGAR_OPT_BB 6 272 #define CORE_CIGAR_OPT_NN 7 273 274 #define CORE_PROGRAM_SUBREAD 100 275 #define CORE_PROGRAM_SUBJUNC 200 276 #define CORE_PROGRAM_SUBINDEL 1000 277 278 #define CORE_EXPERIMENT_DNASEQ 1000 279 #define CORE_EXPERIMENT_RNASEQ 2000 280 281 #define PRINT_BOX_WRAPPED 4 282 #define PRINT_BOX_NOCOLOR_FOR_COLON 2 283 #define PRINT_BOX_CENTER 1 284 285 typedef struct{ 286 unsigned int event_small_side; // the last base before the event 287 unsigned int event_large_side; // the first base after the event 288 // for exon-exon junctions, these two numbers are the last and first base in the exons. (ZERO-BASED INDEX) 289 // for indels, the smaller side is the base before in inserted/deleted bases; the larger side is the base after them. 290 // for a fusion, the two sides are the base before the fusion point (the fusion point is not a base, but a vertical bar between two bases -- two sides.) 291 // 292 // 293 // The 'V' marked bases are the two sides. 294 // V V 295 // Exon-exon junction: 100 .......s|gt........ag|s....... 300 chrX 296 // ^ ^ These two bars are the junction point. 297 // 298 // 299 // 300 // DNA-fusions: 301 // 302 // small_side ===> .|. <=== large_side normal arrangement (N/Y) 303 // 304 // small_side ===> .| 305 // large_side ===> .| strand jumpped (N/N) 306 // 307 // |. <=== small_side strand jumpped (Y/Y) 308 // |. <=== large_side 309 // 310 // ^ This is fusion point. It is not a base, but a vertical bar between two sides. 311 // 312 // 313 // 314 // note: the fusion point is larger than event_small_side if small_side_increasing_coordinate == N; it is smaller than event_small_side if small_side_increasing_coordinate == Y; 315 // the fusion point is smaller than event_large_side if large_side_increasing_coordinate == Y; it is larger than event_large_side if large_side_increasing_coordinate == N. 316 317 short indel_length; 318 short junction_flanking_left; 319 short junction_flanking_right; 320 321 char indel_at_junction; 322 char is_negative_strand; // this only works to junction detection, according to 'GT/AG' or 'CT/AC' donors. This only applys to junctions. 323 char is_strand_jumped; // "strand jumped" means that the left and right sides are on different strands. This only applys to fusions. 324 char is_donor_found_or_annotation; // only for junctions: GT/AG is found at the location. 1: found, 0:not found: 64: from annotation (thus unknown) 325 // Also, if "is_strand_jumped" is true, all coordinates (e.g., splicing points, cover_start, cover_end, etc) are on "reversed read" view. 326 327 char small_side_increasing_coordinate; 328 char large_side_increasing_coordinate; // normal exon-exon junctions must have N and Y on small/large increasing coordinates. 329 330 //char is_ambiguous; 331 char connected_next_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "3I" will have 1 here . 332 char connected_previous_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "1I" will have 1 here. 333 334 //char inserted_bases[(1+MAX_INSERTION_LENGTH) / 4 + 1]; 335 char * inserted_bases; 336 unsigned short supporting_reads; 337 unsigned short anti_supporting_reads; 338 unsigned short final_counted_reads; 339 unsigned short final_reads_mismatches; 340 unsigned char event_type; 341 unsigned int global_event_id; 342 float event_quality; 343 344 unsigned long long critical_read_id; 345 int critical_supporting_reads; 346 } chromosome_event_t; 347 348 349 350 typedef struct 351 { 352 353 unsigned int selected_position; 354 short result_flags; 355 short read_length; 356 // 4 bytes 357 gene_vote_number_t selected_votes; 358 gene_vote_number_t used_subreads_in_vote; 359 unsigned char noninformative_subreads_in_vote; 360 // this coverage is the range on reads, in point of view of "main piece" strand (i.e., "is_negative_strand") 361 char indels_in_confident_coverage; 362 char is_fully_covered; 363 364 gene_vote_number_t selected_indel_record [MAX_INDEL_SECTIONS*3 + 1]; 365 unsigned short confident_coverage_start; 366 unsigned short confident_coverage_end; 367 368 short subread_quality; 369 370 } mapping_result_t; 371 372 typedef struct{ 373 mapping_result_t * mapping_result; 374 unsigned int first_base_position; 375 char cigar_string[CORE_MAX_CIGAR_STR_LEN]; 376 chromosome_event_t * supporting_chromosome_events[MAX_EVENTS_IN_READ]; 377 short flanking_size_left[MAX_EVENTS_IN_READ]; 378 short flanking_size_right[MAX_EVENTS_IN_READ]; 379 char crirical_support[MAX_EVENTS_IN_READ]; 380 381 char first_base_is_jumpped; 382 short final_mismatched_bases; 383 short final_matched_bases; 384 short best_second_diff_bases; 385 short realign_flags; 386 short final_quality; 387 short chromosomal_length; 388 short MAPQ_adjustment; 389 int known_junction_supp; 390 int final_penalty; 391 } realignment_result_t; 392 393 #define BUCKETED_TABLE_INIT_ITEMS 3 394 #define FRAGMENT_LIST_INIT_ITEMS 3 395 396 397 typedef struct 398 { 399 short split_point; 400 gene_vote_number_t minor_votes; 401 char double_indel_offset; 402 char indel_at_junction; 403 char small_side_increasing_coordinate; 404 char large_side_increasing_coordinate; 405 unsigned int minor_position; 406 // this coverage is the range on reads, in point of view of "main piece" strand 407 unsigned short minor_coverage_start; 408 unsigned short minor_coverage_end; 409 410 } subjunc_result_t; 411 412 // THREE_TOP_UPDATE dictates the number of top-votes. 413 414 typedef struct { 415 int is_vote_t_item; 416 int item_index_i; 417 int item_index_j; 418 unsigned int mapping_position; 419 int major_half_votes; 420 unsigned short read_start_base; 421 }simple_mapping_t; 422 423 typedef struct{ 424 simple_mapping_t * r1_loc; 425 simple_mapping_t * r2_loc; 426 427 unsigned long long score_adj; 428 } vote_combination_t; 429 430 typedef struct{ 431 unsigned long long maxinum_read_number; 432 int result_chunk_fd; 433 int junction_chunk_fd; 434 int big_margin_chunk_fd; 435 436 mapping_result_t * result_chunk_addr; 437 subjunc_result_t * junction_chunk_addr; 438 unsigned short * big_margin_chunk_addr; 439 440 subread_lock_t resize_lock; 441 } bigtable_t; 442 443 444 // this cached item is for an entire read (not read pair, not one best result) 445 typedef struct { 446 int status; 447 subread_read_number_t pair_number; 448 int is_second_read; 449 450 unsigned short big_margin_data[9*3]; 451 mapping_result_t * alignment_res; 452 subjunc_result_t * subjunc_res; 453 454 } bigtable_cached_result_t; 455 456 457 typedef struct{ 458 void * junction_tmp_r1; 459 void * junction_tmp_r2; 460 461 void * alignment_tmp_r1; 462 void * alignment_tmp_r2; 463 464 void * vote_simple_1_buffer; 465 void * vote_simple_2_buffer; 466 467 void * comb_buffer; 468 } topK_buffer_t; 469 470 typedef struct{ 471 unsigned long long all_correct_PE_reads; 472 int thread_id; 473 pthread_t thread; 474 475 // module_thread_context is different to module_context, though some contents in module_thread_context could be also in module context (e.g., junction tables) 476 // modules functions may deside to use objects in which context. 477 void * module_thread_contexts[5]; 478 gene_value_index_t * current_value_index; 479 480 subread_lock_t output_lock; 481 unsigned int all_mapped_reads; 482 unsigned int not_properly_pairs_wrong_arrangement; 483 unsigned int not_properly_pairs_different_chro; 484 unsigned int not_properly_different_strands; 485 unsigned int not_properly_pairs_TLEN_wrong; 486 unsigned int all_unmapped_reads; 487 unsigned int not_properly_pairs_only_one_end_mapped; 488 unsigned int all_multimapping_reads; 489 unsigned int all_uniquely_mapped_reads; 490 491 topK_buffer_t topKbuff; 492 } thread_context_t; 493 494 495 typedef struct{ 496 // basic running configuration 497 configuration_t config; 498 499 // for the index 500 gehash_t * current_index; 501 gene_value_index_t * current_value_index; 502 gene_value_index_t all_value_indexes[100]; 503 int index_block_number; 504 int current_index_block_number; 505 int will_remove_input_file; 506 int is_phred_warning; 507 int is_final_voting_run; 508 509 // global locks 510 subread_lock_t thread_initial_lock; 511 512 // global FILE* 513 SamBam_Writer * output_bam_writer; 514 FILE * output_sam_fp; 515 FILE * long_insertion_FASTA_fp; 516 char * output_sam_inner_buffer; 517 int output_sam_is_full; 518 519 // running contexts 520 void * module_contexts[5]; 521 thread_context_t * all_thread_contexts; 522 subread_read_number_t last_written_fragment_number; 523 int need_merge_buffer_now; 524 read_input_t input_reads; 525 bigtable_t bigtable; 526 int rebuilt_command_line_size; 527 char * rebuilt_command_line; 528 529 subread_lock_t bigtable_lock; 530 subread_lock_t output_lock; 531 int bigtable_cache_size; 532 FILE * bigtable_cache_file_fp; 533 long long bigtable_cache_file_loaded_fragments_begin; 534 long long bigtable_cache_file_fragments; 535 bigtable_cached_result_t * bigtable_cache; 536 char *bigtable_cache_malloc_ptr, *bigtable_cache_malloc_ptr_junc; 537 unsigned int bigtable_chunked_fragments; 538 int bigtable_dirty_data; 539 540 gene_offset_t chromosome_table; 541 double start_time; 542 double align_start_time; 543 double timecost_load_index; 544 double timecost_voting; 545 double timecost_before_realign; 546 double timecost_for_realign; 547 548 unsigned long long expected_TLEN_sum; 549 unsigned int expected_TLEN_read_numbers; 550 unsigned long long all_processed_reads; 551 unsigned long long all_correct_PE_reads; 552 unsigned int all_junctions; 553 unsigned int all_fusions; 554 unsigned int all_indels; 555 556 unsigned int all_mapped_reads; 557 unsigned int not_properly_pairs_wrong_arrangement; 558 unsigned int not_properly_pairs_different_chro; 559 unsigned int not_properly_different_strands; 560 unsigned int not_properly_pairs_TLEN_wrong; 561 unsigned int all_unmapped_reads; 562 unsigned int not_properly_pairs_only_one_end_mapped; 563 unsigned int all_multimapping_reads; 564 unsigned int all_uniquely_mapped_reads; 565 566 unsigned long long current_circle_start_abs_offset_file1; 567 gene_inputfile_position_t current_circle_start_position_file1; 568 gene_inputfile_position_t current_circle_start_position_file2; 569 gene_inputfile_position_t current_circle_end_position_file1; 570 gene_inputfile_position_t current_circle_end_position_file2; 571 subread_read_number_t processed_reads_in_chunk; 572 subread_read_number_t running_processed_reads_in_chunk; 573 574 // sunfusion structural variance 575 bucketed_table_t funky_table_BC; 576 bucketed_table_t funky_table_DE; 577 fragment_list_t funky_list_A; 578 fragment_list_t funky_list_DE; 579 580 bucketed_table_t breakpoint_table_P; 581 bucketed_table_t breakpoint_table_QR; 582 bucketed_table_t breakpoint_table_YZ; 583 584 bucketed_table_t translocation_result_table; 585 bucketed_table_t inversion_result_table; 586 topK_buffer_t topKbuff; 587 588 // per chunk parameters 589 subread_read_number_t read_block_start; 590 char * exonic_region_bitmap; 591 HashTable * sam_chro_to_anno_chr_alias; 592 HashTable * annotation_chro_table; 593 } global_context_t; 594 595 596 #define MODULE_INDEL_ID 0 597 #define MODULE_JUNCTION_ID 1 598 599 #define STEP_VOTING 10 600 #define STEP_ITERATION_ONE 20 601 #define STEP_ITERATION_TWO 30 602 #define STEP_ITERATION_THREE 40 603 #define STEP_WRITE_CHUNK_RESULTS 90 604 605 #define MEMORY_OPTIMISATION_LAPTOP 10 606 #define MEMORY_OPTIMISATION_DESKTOP 20 607 #define MEMORY_OPTIMISATION_SUPERCOMPUTER 100 608 609 // This function initialise the module contexts that are needed according to the running configuration. 610 int init_modules(global_context_t * context); 611 612 // This function undo any memory allocation and close opened files in init_modules. 613 int destroy_modules(global_context_t * context); 614 615 // This function shows welcome messages and shows the current configuration to stdout. 616 // This function also checks the legality of the parameters in context->config 617 int print_configuration(global_context_t * context); 618 619 // This function opens all related files except the index. It however counts the piece number of the index. 620 int load_global_context(global_context_t * context); 621 622 // This function deallocates memory and closes files that were allowcated and opened in load_global_context. 623 int destroy_global_context(global_context_t * context); 624 625 626 // This function processes the read chunks (24M reads each), including three steps: 1, voting; 2, iteration one; 3, iteration two. 627 int read_chunk_circles(global_context_t * context); 628 629 630 // write the final results in each module. Note that the SAM file is writen in read_chunk_circles, after each read chunk is processed. 631 int write_final_results(global_context_t * context); 632 633 // this function is called by an interface to start the main process with a customized opt parser. 634 int core_main(int argc , char ** argv, int (parse_opts (int , char **, global_context_t * ))); 635 636 // decompress a cigar string to cigar 637 // cigar_len is the maximum length of decompressed cigar 638 // bincigar is the maximum length of compressed cigar 639 // it returns the length of decompressed cigar, or -1 if buffer is too short. 640 int bincigar2cigar(char * cigar, int cigar_len, char * bincigar, int bincigar_max_len, int read_len); 641 642 // compress a cigar string to bincigar 643 // bincigar_len is the maximum length of compressed cigar 644 // it returns the length of compressed cigar, or -1 if buffer is too short. 645 int cigar2bincigar(char *cigar, char *bincigar, int bincigar_len); 646 647 // print the logo of our program 648 void print_subread_logo(); 649 650 // print a line in the box 651 void print_in_box(int line_width, int is_boundary, int options, char * pattern,...); 652 653 // find the value index covering this read 654 // it returns NULL if no index is found. 655 gene_value_index_t * find_current_value_index(global_context_t * global_context, unsigned int pos, int len); 656 657 // generate the time string 658 void char_strftime(char * tbuf); 659 660 int term_strncpy(char * dst, char * src, int max_dst_memory); 661 662 int is_result_in_PE(mapping_result_t * aln); 663 664 void core_version_number(char * program); 665 666 mapping_result_t * _global_retrieve_alignment_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id); 667 subjunc_result_t * _global_retrieve_subjunc_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id); 668 unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_context, subread_read_number_t pair_number, subread_read_number_t is_second_read); 669 670 // This assumes the first part of Cigar has differet strandness to the main part of the cigar. 671 // Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n'). 672 // The first base in the read actually has a larger coordinate than Pos. 673 unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar); 674 675 int chimeric_cigar_parts(global_context_t * global_context , unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * out_read_lens, char * read_name); 676 677 void warning_file_limit(); 678 void quick_sort(void * arr,int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)); 679 void basic_sort(void * arr,int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)); 680 681 // L_Minus_R should return -1, 0 or 1 when L<R, L==R or L>R. 682 // The result is from Small to Large. 683 void merge_sort(void * arr, int arr_size, int L_Minus_R (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge_SmallFirst(void * arr, int start, int items, int items2)); 684 685 void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, char * res); 686 687 void test_PE_and_same_chro(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome , int rlen1, int rlen2); 688 689 int FIXLENstrcmp(char * fixed_len, char * rname); 690 691 int is_valid_digit(char * optarg, char * optname); 692 int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc); 693 int is_valid_float(char * optarg, char * optname); 694 int exec_cmd(char * cmd, char * outstr, int out_limit); 695 int is_pos_in_annotated_exon_regions(global_context_t * global_context, unsigned int pos); 696 char * get_sam_chro_name_from_alias(HashTable * tab, char * anno_chro); 697 void subread_rebuild_cmd(int argc, char ** argv, global_context_t * global_context); 698 #endif 699