1 /*************************************************************** 2 3 The Subread software package is free software package: 4 you can redistribute it and/or modify it under the terms 5 of the GNU General Public License as published by the 6 Free Software Foundation, either version 3 of the License, 7 or (at your option) any later version. 8 9 Subread is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty 11 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 Authors: Drs Yang Liao and Wei Shi 16 17 ***************************************************************/ 18 19 20 #ifndef SUBREAD_CORE_INDEL_H_ 21 #define SUBREAD_CORE_INDEL_H_ 22 23 #include "subread.h" 24 #include "hashtable.h" 25 #include "core.h" 26 27 // chromosome events can be indels, junctions or fusions. 28 // if it is an insertion event, event_large_site = event_small_site+1. 29 30 //#define MAX_EVENT_ENTRIES_PER_SITE 5 31 //#define MAX_EVENT_ENTRIES_PER_SITE 12 32 // 33 #define EVENT_ENTRIES_INIT_SIZE (9) 34 #define MAX_EVENT_ENTRIES_PER_SITE 9 35 #define CHRO_EVENT_TYPE_REMOVED 0 36 #define CHRO_EVENT_TYPE_INDEL 8 37 #define CHRO_EVENT_TYPE_LONG_INDEL 16 38 #define CHRO_EVENT_TYPE_POTENTIAL_INDEL 32 39 #define CHRO_EVENT_TYPE_JUNCTION 64 40 #define CHRO_EVENT_TYPE_FUSION 128 41 #define CHRO_EVENT_TYPE_SNP 256 42 43 #define EVENT_SEARCH_BY_SMALL_SIDE 10 44 #define EVENT_SEARCH_BY_LARGE_SIDE 20 45 #define EVENT_SEARCH_BY_BOTH_SIDES 30 46 47 48 #define REASSEMBLY_WINDOW_LENGTH 350 49 50 //#define is_target_window_X(x) ((x + 1) * REASSEMBLY_WINDOW_LENGTH / 2 >= (10734463 % BASE_BLOCK_LENGTH) && (x- 1) * REASSEMBLY_WINDOW_LENGTH /2-1 <= (10734463%BASE_BLOCK_LENGTH) ) 51 #define is_target_window_X(x) 0 52 //#define MAXIMUM_EVENT_NUMBER 300000 53 54 55 typedef struct{ 56 int is_precisely_called; 57 unsigned int source_left_side; // the base BEFORE the translocated sequence. 58 unsigned int target_left_side; // tge base BEFORE the inserted translocated sequence. 59 unsigned int length; 60 61 unsigned int event_P_number; 62 unsigned int event_Q_number; 63 unsigned int event_R_number; 64 65 int is_inv; 66 unsigned int all_sup_P; 67 unsigned int max_sup_QR; 68 } translocation_result_t; 69 70 typedef struct{ 71 int is_precisely_called; 72 73 unsigned int event_Y_rough_small_abs; 74 unsigned int event_Z_rough_large_abs; 75 76 unsigned int small_side; // the base BEFORE the reversed sequence 77 unsigned int length; 78 79 unsigned int event_Y_number; // event_no in the event space. 80 unsigned int event_Z_number; 81 82 unsigned int all_sup_D; 83 unsigned int max_sup_E; 84 } inversion_result_t; 85 86 struct reassmebly_window_allele 87 { 88 char rebuilt_window[8000]; 89 float allele_quality; 90 int rebuilt_size; 91 }; 92 93 typedef struct{ 94 gehash_t * voting_indexes; 95 char * chro_name; 96 unsigned long long int * start_keys; 97 short * start_offsets; 98 99 unsigned int * read_no_counter; 100 unsigned int block_start_linear_pos; 101 HashTable * read_sequence_table; 102 HashTable * read_position_table; 103 HashTable * read_quality_table; 104 gene_vote_t * vote_list; 105 gene_vote_t * vote_list_rectify; 106 short * read_rectify_space; 107 108 char rebuilt_window[2500]; 109 int rebuilt_window_size; 110 111 112 struct reassmebly_window_allele * final_alleles; 113 114 unsigned int used_read_ids[2000]; 115 int used_read_number; 116 117 118 int search_cost; 119 int total_matched_bases; 120 int max_matched_bases; 121 unsigned int window_start_pos; 122 } reassembly_by_voting_block_context_t; 123 124 125 126 typedef struct{ 127 HashTable ** de_bruijn_graphs; 128 char * chro_name; 129 unsigned long long int * start_keys; 130 short * start_offsets; 131 132 unsigned int block_start_linear_pos; 133 } reassembly_block_context_t; 134 135 #define EVENT_BODY_LOCK_BUCKETS 14929 136 137 138 typedef struct{ 139 HashTable * event_entry_table; 140 unsigned int total_events; 141 unsigned int current_max_event_number; 142 chromosome_event_t * event_space_dynamic; 143 HashTable * local_reassembly_pileup_files; 144 subread_lock_t event_body_locks[EVENT_BODY_LOCK_BUCKETS]; 145 146 short ** dynamic_align_table; 147 char ** dynamic_align_table_mask; 148 } indel_context_t; 149 150 typedef struct{ 151 HashTable * event_entry_table; 152 unsigned int total_events; 153 unsigned int current_max_event_number; 154 chromosome_event_t * event_space_dynamic; 155 unsigned short * final_counted_reads_array; 156 unsigned short * final_reads_mismatches_array; 157 158 short ** dynamic_align_table; 159 char ** dynamic_align_table_mask; 160 } indel_thread_context_t; 161 162 int init_indel_tables(global_context_t * context); 163 int destroy_indel_module(global_context_t * context); 164 int init_indel_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task); 165 int sort_global_event_table(global_context_t * global_context); 166 int load_known_junctions(global_context_t * global_context); 167 int finalise_indel_and_junction_thread(global_context_t * global_context, thread_context_t * thread_contexts, int task); 168 int find_new_indels(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id); 169 int write_indel_final_results(global_context_t * context); 170 int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, unsigned char event_type, chromosome_event_t ** return_buffer); 171 172 void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, int is_PE, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote, int noninformative_subreads_in_vote, int major_indel_offset, int minor_indel_offset, int main_hamming, int minor_hamming, int main_quality, int minor_quality); 173 174 void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int event_no); 175 void remove_neighbour(global_context_t * global_context); 176 int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context , int pair_number, char * read_name_1 , char * read_text_1 ,char * qual_text_1 , int read_len_1, int read_len_2, int is_second_read, int best_read_id, int is_paired_unmapped, mapping_result_t * current_res, mapping_result_t * mate_res); 177 int finalise_long_insertions(global_context_t * global_context); 178 179 // This function sets the global context with default values. 180 void init_global_context(global_context_t * context); 181 182 int write_local_reassembly(global_context_t *global_context, HashTable *pileup_fp_table, unsigned int anchor_pos, char * read_name , char * read_text ,char * qual_text , int read_len, int is_anchor_certain); 183 184 int finalise_long_insertions_by_hashtable(global_context_t * global_context); 185 186 void destroy_pileup_table(HashTable* local_reassembly_pileup_files); 187 188 chromosome_event_t * reallocate_event_space(global_context_t* global_context,thread_context_t* thread_context,int event_no); 189 190 int there_are_events_in_range(char * bitmap, unsigned int pos, int sec_len); 191 192 int anti_supporting_read_scan(global_context_t * global_context); 193 194 int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name); 195 196 void init_core_temp_path(global_context_t * context); 197 198 chromosome_event_t * local_add_indel_event(global_context_t * global_context, thread_context_t * thread_context, HashTable * event_table, char * read_text, unsigned int left_edge, int indels, int score_supporting_read_added, int is_ambiguous, int mismatched_bases,int * old_event_id); 199 200 void print_indel_table(global_context_t * global_context); 201 int sort_junction_entry_table(global_context_t * global_context); 202 void mark_event_bitmap(unsigned char * bitmap, unsigned int pos); 203 int check_event_bitmap(unsigned char * bitmap, unsigned int pos); 204 #endif 205