1 /*************************************************************** 2 3 The Subread software package is free software package: 4 you can redistribute it and/or modify it under the terms 5 of the GNU General Public License as published by the 6 Free Software Foundation, either version 3 of the License, 7 or (at your option) any later version. 8 9 Subread is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty 11 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 Authors: Drs Yang Liao and Wei Shi 16 17 ***************************************************************/ 18 19 20 #ifndef SUBREAD_CORE_JUNCTION_H_ 21 #define SUBREAD_CORE_JUNCTION_H_ 22 #include "subread.h" 23 #include "hashtable.h" 24 #include "core.h" 25 26 //#warning "======== REMOVE *2000 =============" 27 #define REALIGN_TOTAL_TRIES (50) 28 29 #define FUNKY_FRAGMENT_A 1 // same strand and gapped (0<gap<tra_len) 30 #define FUNKY_FRAGMENT_BC 2 // very far far away (>=tra_len) or chimeric. 31 #define FUNKY_FRAGMENT_DE 4 // tlen < tra_len and strand jumpped 32 #define NOT_FUNKY 0 // normal fragment 33 #define FUNKY_COLOCATION_TOLERANCE 500 34 #define BREAK_POINT_MAXIMUM_TOLERANCE 80 35 #define S12_LIST_CAPACITY 100 36 37 38 // as the python sub-string rule: start is the first wanted base and end is the first unwanted base. 39 typedef struct{ 40 short read_pos_start; 41 short read_pos_end; 42 unsigned int abs_offset_for_start; 43 // jumped from the "main piece" view 44 char is_strand_jumped; 45 char is_connected_to_large_side; 46 47 chromosome_event_t * event_after_section; 48 } perfect_section_in_read_t; 49 50 typedef struct{ 51 // result context 52 53 //unsigned char back_search_confirmed_sections; 54 //unsigned char front_search_confirmed_sections; 55 // NOTE THAT EVERYTHING IN back_search_junctions IS BACKWARD. 56 // 1, ORDER OF EXONS ARE BACKWARD 57 // 2, "ABS_OFFSET_FOR_START" ARE ACTUALLY AT END OF SECTIONS 58 //perfect_section_in_read_t back_search_junctions[MAX_EVENTS_IN_READ]; 59 //perfect_section_in_read_t front_search_junctions[MAX_EVENTS_IN_READ]; 60 61 // middle result context 62 unsigned char tmp_search_sections; 63 perfect_section_in_read_t tmp_search_junctions [MAX_EVENTS_IN_READ]; 64 char current_is_strand_jumped; 65 66 perfect_section_in_read_t result_back_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ]; 67 perfect_section_in_read_t result_front_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ]; 68 int result_back_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR]; 69 int result_front_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR]; 70 int all_back_alignments; 71 int all_front_alignments; 72 int known_junctions; 73 unsigned int total_tries; 74 75 // unsigned int tmp_jump_length; 76 // unsigned int best_jump_length; 77 78 // for the BEST record 79 // they are not restored 80 int best_matching_bases; 81 int best_second_match_diff; 82 int second_best_matching_bases; 83 int best_indel_penalty; 84 int tmp_total_matched_bases; 85 int tmp_indel_penalty; 86 int is_currently_tie; 87 int best_is_complex; 88 int best_support_as_simple; 89 int best_min_unsupport_as_simple; 90 int best_min_support_as_complex; 91 int best_is_pure_donor_found_explain; 92 93 // for the "current" stack (they are restored after poping from the stack) 94 int tmp_support_as_simple; 95 int tmp_min_unsupport; 96 int tmp_min_support_as_complex; 97 int tmp_is_pure_donor_found_explain; 98 99 // input context 100 int full_read_len; 101 int is_fully_covered; 102 char * full_read_text; 103 char * full_qual_text; 104 char * read_name; 105 int is_confirmed_section_negative_strand; 106 subread_read_number_t pair_number; 107 int is_second_read; 108 int best_read_id; 109 }explain_context_t; 110 111 typedef struct{ 112 } junction_context_t; 113 114 typedef struct 115 { 116 int read_len_1; 117 int read_len_2; 118 char * read_text_1; 119 char * read_text_2; 120 char is_negative_strand; 121 } new_junction_context_t; 122 123 void new_explain_try_replace(global_context_t* global_context, thread_context_t * thread_context, explain_context_t * explain_context, int remainder_len, int search_to_back); 124 125 int init_junction_tables(global_context_t * context); 126 int destroy_junction_tables(global_context_t * context); 127 int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads); 128 int init_junction_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task); 129 int finalise_junction_thread(global_context_t * global_context, thread_context_t * thread_context, int task); 130 unsigned int explain_read(global_context_t * global_context, thread_context_t * thread_context, realignment_result_t * realigns, subread_read_number_t pair_number,int read_len, char * read_name , char *read_text, char *qual, int is_second_read, int best_read_id, int is_negative_strand); 131 int write_junction_final_results(global_context_t * global_context); 132 133 // back_search_read_tail IS THE EXACT VERY SURE POSITION IN THE READ 134 // back_search_tail_position IS THE EXACT VERY SURE POSITION ON CHROMOSOME CORRESPONDING TO back_search_read_tail 135 int do_explain_back_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int back_search_read_tail, unsigned int back_search_tail_position); 136 int do_explain_front_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int front_search_read_head, unsigned int front_search_head_position); 137 138 unsigned int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, realignment_result_t * realigns); 139 140 void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump); 141 142 void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump); 143 144 void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id); 145 146 int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indels, int right_indels, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * inserted_bases, int * small_side_inc_coor, int * large_side_inc_coor, char *read_name); 147 148 int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int is_left_part_on_left_as_reversed, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * small_side_inc_coor, int * large_side_inc_coor); 149 150 int write_fusion_final_results(global_context_t * global_context); 151 152 153 int is_ambiguous_voting(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative); 154 void core_search_short_exons(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qualityb0, int rl, unsigned int P1_Pos, unsigned int P2_Pos, short read_coverage_start, short read_coverage_end); 155 156 157 void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * rname, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1); 158 159 int is_funky_fragment(global_context_t * global_context, char * rname1, char * chr1, unsigned int pos1, int rlen1, int is_1_negative, char * cigar1, char * seq1, char * rname2, char * chr2, unsigned int pos2, int rlen2, int is_2_negative, char * cigar2, char * seq2, int tlen_removed_intron); 160 161 void finalise_structural_variances(global_context_t * global_context); 162 163 void debug_show_event(global_context_t* global_context, chromosome_event_t * event); 164 void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, int * small_pos, unsigned int * small_abs, char ** large_chro, int * large_pos, unsigned int * large_abs); 165 int get_offset_maximum_chro_pos(global_context_t * global_context, thread_context_t * thread_context, unsigned int linear); 166 #endif 167