1 /***************************************************************
2 
3    The Subread software package is free software package:
4    you can redistribute it and/or modify it under the terms
5    of the GNU General Public License as published by the
6    Free Software Foundation, either version 3 of the License,
7    or (at your option) any later version.
8 
9    Subread is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty
11    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13    See the GNU General Public License for more details.
14 
15    Authors: Drs Yang Liao and Wei Shi
16 
17   ***************************************************************/
18 
19 
20 #ifndef SUBREAD_CORE_INDEL_H_
21 #define SUBREAD_CORE_INDEL_H_
22 
23 #include "subread.h"
24 #include "hashtable.h"
25 #include "core.h"
26 
27 // chromosome events can be indels, junctions or fusions.
28 // if it is an insertion event, event_large_site = event_small_site+1.
29 
30 //#define MAX_EVENT_ENTRIES_PER_SITE 5
31 //#define MAX_EVENT_ENTRIES_PER_SITE 12
32 //
33 #define EVENT_ENTRIES_INIT_SIZE (9)
34 #define MAX_EVENT_ENTRIES_PER_SITE 9
35 #define CHRO_EVENT_TYPE_REMOVED 0
36 #define CHRO_EVENT_TYPE_INDEL 8
37 #define CHRO_EVENT_TYPE_LONG_INDEL 16
38 #define CHRO_EVENT_TYPE_POTENTIAL_INDEL 32
39 #define CHRO_EVENT_TYPE_JUNCTION 64
40 #define CHRO_EVENT_TYPE_FUSION 128
41 #define CHRO_EVENT_TYPE_SNP 256
42 
43 #define EVENT_SEARCH_BY_SMALL_SIDE 10
44 #define EVENT_SEARCH_BY_LARGE_SIDE 20
45 #define EVENT_SEARCH_BY_BOTH_SIDES 30
46 
47 
48 #define REASSEMBLY_WINDOW_LENGTH 350
49 
50 //#define is_target_window_X(x) ((x + 1) * REASSEMBLY_WINDOW_LENGTH / 2 >= (10734463 % BASE_BLOCK_LENGTH) && (x- 1) * REASSEMBLY_WINDOW_LENGTH /2-1 <= (10734463%BASE_BLOCK_LENGTH) )
51 #define is_target_window_X(x) 0
52 //#define MAXIMUM_EVENT_NUMBER 300000
53 
54 
55 typedef struct{
56 	int is_precisely_called;
57 	unsigned int source_left_side;	// the base BEFORE the translocated sequence.
58 	unsigned int target_left_side;  // tge base BEFORE the inserted translocated sequence.
59 	unsigned int length;
60 
61 	unsigned int event_P_number;
62 	unsigned int event_Q_number;
63 	unsigned int event_R_number;
64 
65 	int is_inv;
66 	unsigned int all_sup_P;
67 	unsigned int max_sup_QR;
68 } translocation_result_t;
69 
70 typedef struct{
71 	int is_precisely_called;
72 
73 	unsigned int event_Y_rough_small_abs;
74 	unsigned int event_Z_rough_large_abs;
75 
76 	unsigned int small_side;	// the base BEFORE the reversed sequence
77 	unsigned int length;
78 
79 	unsigned int event_Y_number;	// event_no in the event space.
80 	unsigned int event_Z_number;
81 
82 	unsigned int all_sup_D;
83 	unsigned int max_sup_E;
84 } inversion_result_t;
85 
86 struct reassmebly_window_allele
87 {
88 	char rebuilt_window[8000];
89 	float allele_quality;
90 	int rebuilt_size;
91 };
92 
93 typedef struct{
94 	gehash_t * voting_indexes;
95 	char * chro_name;
96 	unsigned long long int * start_keys;
97 	short * start_offsets;
98 
99 	unsigned int * read_no_counter;
100 	unsigned int block_start_linear_pos;
101 	HashTable * read_sequence_table;
102 	HashTable * read_position_table;
103 	HashTable * read_quality_table;
104 	gene_vote_t * vote_list;
105 	gene_vote_t * vote_list_rectify;
106 	short * read_rectify_space;
107 
108 	char rebuilt_window[2500];
109 	int rebuilt_window_size;
110 
111 
112 	struct reassmebly_window_allele * final_alleles;
113 
114 	unsigned int used_read_ids[2000];
115 	int used_read_number;
116 
117 
118 	int search_cost;
119 	int total_matched_bases;
120 	int max_matched_bases;
121 	unsigned int window_start_pos;
122 } reassembly_by_voting_block_context_t;
123 
124 
125 
126 typedef struct{
127 	HashTable ** de_bruijn_graphs;
128 	char * chro_name;
129 	unsigned long long int * start_keys;
130 	short * start_offsets;
131 
132 	unsigned int block_start_linear_pos;
133 } reassembly_block_context_t;
134 
135 #define EVENT_BODY_LOCK_BUCKETS 14929
136 
137 
138 typedef struct{
139 	HashTable * event_entry_table;
140 	unsigned int total_events;
141 	unsigned int current_max_event_number;
142 	chromosome_event_t * event_space_dynamic;
143 	HashTable * local_reassembly_pileup_files;
144 	subread_lock_t event_body_locks[EVENT_BODY_LOCK_BUCKETS];
145 
146 	short ** dynamic_align_table;
147 	char ** dynamic_align_table_mask;
148 } indel_context_t;
149 
150 typedef struct{
151 	HashTable * event_entry_table;
152 	unsigned int total_events;
153 	unsigned int current_max_event_number;
154 	chromosome_event_t * event_space_dynamic;
155 	unsigned short * final_counted_reads_array;
156 	unsigned short * final_reads_mismatches_array;
157 
158 	short ** dynamic_align_table;
159 	char ** dynamic_align_table_mask;
160 } indel_thread_context_t;
161 
162 int init_indel_tables(global_context_t * context);
163 int destroy_indel_module(global_context_t * context);
164 int init_indel_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task);
165 int sort_global_event_table(global_context_t * global_context);
166 int load_known_junctions(global_context_t * global_context);
167 int finalise_indel_and_junction_thread(global_context_t * global_context, thread_context_t * thread_contexts, int task);
168 int find_new_indels(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id);
169 int write_indel_final_results(global_context_t * context);
170 int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, unsigned char event_type, chromosome_event_t ** return_buffer);
171 
172 void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, int is_PE, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote, int noninformative_subreads_in_vote, int major_indel_offset, int minor_indel_offset, int main_hamming, int minor_hamming, int main_quality, int minor_quality);
173 
174 void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int event_no);
175 void remove_neighbour(global_context_t * global_context);
176 int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context , int pair_number, char * read_name_1 , char * read_text_1 ,char * qual_text_1 , int read_len_1, int read_len_2, int is_second_read, int best_read_id, int is_paired_unmapped, mapping_result_t * current_res, mapping_result_t * mate_res);
177 int finalise_long_insertions(global_context_t * global_context);
178 
179 // This function sets the global context with default values.
180 void init_global_context(global_context_t * context);
181 
182 int write_local_reassembly(global_context_t *global_context, HashTable *pileup_fp_table, unsigned int anchor_pos, char * read_name , char * read_text ,char * qual_text , int read_len, int is_anchor_certain);
183 
184 int finalise_long_insertions_by_hashtable(global_context_t * global_context);
185 
186 void destroy_pileup_table(HashTable* local_reassembly_pileup_files);
187 
188 chromosome_event_t * reallocate_event_space(global_context_t* global_context,thread_context_t* thread_context,int event_no);
189 
190 int there_are_events_in_range(char * bitmap, unsigned int pos, int sec_len);
191 
192 int anti_supporting_read_scan(global_context_t * global_context);
193 
194 int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name);
195 
196 void init_core_temp_path(global_context_t * context);
197 
198 chromosome_event_t * local_add_indel_event(global_context_t * global_context, thread_context_t * thread_context, HashTable * event_table, char * read_text, unsigned int left_edge, int indels, int score_supporting_read_added, int is_ambiguous, int mismatched_bases,int * old_event_id);
199 
200 void print_indel_table(global_context_t * global_context);
201 int sort_junction_entry_table(global_context_t * global_context);
202 void mark_event_bitmap(unsigned char * bitmap, unsigned int pos);
203 int check_event_bitmap(unsigned char * bitmap, unsigned int pos);
204 #endif
205