1 /***************************************************************
2 
3    The Subread software package is free software package:
4    you can redistribute it and/or modify it under the terms
5    of the GNU General Public License as published by the
6    Free Software Foundation, either version 3 of the License,
7    or (at your option) any later version.
8 
9    Subread is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty
11    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13    See the GNU General Public License for more details.
14 
15    Authors: Drs Yang Liao and Wei Shi
16 
17   ***************************************************************/
18 
19 
20 #ifndef SUBREAD_CORE_JUNCTION_H_
21 #define SUBREAD_CORE_JUNCTION_H_
22 #include "subread.h"
23 #include "hashtable.h"
24 #include "core.h"
25 
26 //#warning "======== REMOVE *2000 ============="
27 #define REALIGN_TOTAL_TRIES (50)
28 
29 #define FUNKY_FRAGMENT_A	1	// same strand and gapped (0<gap<tra_len)
30 #define FUNKY_FRAGMENT_BC	2	// very far far away (>=tra_len) or chimeric.
31 #define FUNKY_FRAGMENT_DE	4	// tlen < tra_len and strand jumpped
32 #define NOT_FUNKY		0	// normal fragment
33 #define FUNKY_COLOCATION_TOLERANCE 500
34 #define BREAK_POINT_MAXIMUM_TOLERANCE 80
35 #define S12_LIST_CAPACITY 100
36 
37 
38 // as the python sub-string rule: start is the first wanted base and end is the first unwanted base.
39 typedef struct{
40 	short read_pos_start;
41 	short read_pos_end;
42 	unsigned int abs_offset_for_start;
43 	// jumped from the "main piece" view
44 	char is_strand_jumped;
45 	char is_connected_to_large_side;
46 
47 	chromosome_event_t * event_after_section;
48 } perfect_section_in_read_t;
49 
50 typedef struct{
51 	// result context
52 
53 	//unsigned char  back_search_confirmed_sections;
54 	//unsigned char front_search_confirmed_sections;
55 	// NOTE THAT EVERYTHING IN back_search_junctions IS BACKWARD.
56 	// 1, ORDER OF EXONS ARE BACKWARD
57 	// 2, "ABS_OFFSET_FOR_START" ARE ACTUALLY AT END OF SECTIONS
58 	//perfect_section_in_read_t  back_search_junctions[MAX_EVENTS_IN_READ];
59 	//perfect_section_in_read_t front_search_junctions[MAX_EVENTS_IN_READ];
60 
61 	// middle result context
62 	unsigned char tmp_search_sections;
63 	perfect_section_in_read_t  tmp_search_junctions [MAX_EVENTS_IN_READ];
64 	char current_is_strand_jumped;
65 
66 	perfect_section_in_read_t result_back_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ];
67 	perfect_section_in_read_t result_front_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ];
68 	int result_back_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR];
69 	int result_front_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR];
70 	int all_back_alignments;
71 	int all_front_alignments;
72 	int known_junctions;
73 	unsigned int total_tries;
74 
75 //	unsigned int tmp_jump_length;
76 //	unsigned int best_jump_length;
77 
78 	// for the BEST record
79 	// they are not restored
80 	int best_matching_bases;
81 	int best_second_match_diff;
82 	int second_best_matching_bases;
83 	int best_indel_penalty;
84 	int tmp_total_matched_bases;
85 	int tmp_indel_penalty;
86 	int is_currently_tie;
87 	int best_is_complex;
88 	int best_support_as_simple;
89 	int best_min_unsupport_as_simple;
90 	int best_min_support_as_complex;
91 	int best_is_pure_donor_found_explain;
92 
93 	// for the "current" stack (they are restored after poping from the stack)
94 	int tmp_support_as_simple;
95 	int tmp_min_unsupport;
96 	int tmp_min_support_as_complex;
97 	int tmp_is_pure_donor_found_explain;
98 
99 	// input context
100 	int full_read_len;
101 	int is_fully_covered;
102 	char * full_read_text;
103 	char * full_qual_text;
104 	char * read_name;
105 	int is_confirmed_section_negative_strand;
106 	subread_read_number_t pair_number;
107 	int is_second_read;
108 	int best_read_id;
109 }explain_context_t;
110 
111 typedef struct{
112 } junction_context_t;
113 
114 typedef struct
115 {
116 	int read_len_1;
117 	int read_len_2;
118 	char * read_text_1;
119 	char * read_text_2;
120 	char is_negative_strand;
121 } new_junction_context_t;
122 
123 void new_explain_try_replace(global_context_t* global_context, thread_context_t * thread_context, explain_context_t * explain_context, int remainder_len, int search_to_back);
124 
125 int init_junction_tables(global_context_t * context);
126 int destroy_junction_tables(global_context_t * context);
127 int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2,  int read_len_1, int read_len_2, int is_negative_strand,  gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads);
128 int init_junction_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task);
129 int finalise_junction_thread(global_context_t * global_context, thread_context_t * thread_context, int task);
130 unsigned int explain_read(global_context_t * global_context, thread_context_t * thread_context, realignment_result_t * realigns, subread_read_number_t pair_number,int read_len, char * read_name , char *read_text, char *qual, int is_second_read, int best_read_id, int is_negative_strand);
131 int write_junction_final_results(global_context_t * global_context);
132 
133 // back_search_read_tail IS THE EXACT VERY SURE POSITION IN THE READ
134 // back_search_tail_position IS THE EXACT VERY SURE POSITION ON CHROMOSOME CORRESPONDING TO back_search_read_tail
135 int do_explain_back_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int back_search_read_tail, unsigned int back_search_tail_position);
136 int do_explain_front_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int front_search_read_head, unsigned int front_search_head_position);
137 
138 unsigned int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, realignment_result_t * realigns);
139 
140 void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump);
141 
142 void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump);
143 
144 void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id);
145 
146 int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indels, int right_indels, int normally_arranged, int guess_start, int guess_end,  char * read_text, int read_len, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * inserted_bases, int * small_side_inc_coor, int * large_side_inc_coor, char *read_name);
147 
148 int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end,  char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int is_left_part_on_left_as_reversed, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * small_side_inc_coor, int * large_side_inc_coor);
149 
150 int write_fusion_final_results(global_context_t * global_context);
151 
152 
153 int is_ambiguous_voting(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative);
154 void core_search_short_exons(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qualityb0, int rl, unsigned int P1_Pos, unsigned int P2_Pos, short read_coverage_start, short read_coverage_end);
155 
156 
157 void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * rname, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1);
158 
159 int is_funky_fragment(global_context_t * global_context, char * rname1, char * chr1, unsigned int pos1, int rlen1, int is_1_negative, char * cigar1, char * seq1, char * rname2, char * chr2, unsigned int pos2, int rlen2, int is_2_negative, char * cigar2, char * seq2, int tlen_removed_intron);
160 
161 void finalise_structural_variances(global_context_t * global_context);
162 
163 void debug_show_event(global_context_t* global_context, chromosome_event_t * event);
164 void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, int * small_pos, unsigned int * small_abs, char ** large_chro,  int * large_pos, unsigned int * large_abs);
165 int get_offset_maximum_chro_pos(global_context_t * global_context, thread_context_t * thread_context, unsigned int linear);
166 #endif
167