1 /***************************************************************
2 
3    The Subread software package is free software package:
4    you can redistribute it and/or modify it under the terms
5    of the GNU General Public License as published by the
6    Free Software Foundation, either version 3 of the License,
7    or (at your option) any later version.
8 
9    Subread is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty
11    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13    See the GNU General Public License for more details.
14 
15    Authors: Drs Yang Liao and Wei Shi
16 
17   ***************************************************************/
18 
19 /***************************************************************
20 
21    The ASCII Art used in this file was generated using FIGlet and
22    the big font, contributed by Glenn Chappell to FIGlet.
23 
24   ***************************************************************/
25 
26 #include <stdio.h>
27 #include <assert.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdarg.h>
31 #include <time.h>
32 #include <sys/time.h>
33 #include <getopt.h>
34 #include <sys/types.h>
35 #ifndef __MINGW32__
36 #include <sys/resource.h>
37 #endif
38 #include <unistd.h>
39 #include <sys/stat.h>
40 #include <locale.h>
41 #include <ctype.h>
42 
43 
44 
45 #include "subread.h"
46 #include "sublog.h"
47 #include "core.h"
48 #include "input-files.h"
49 #include "input-blc.h"
50 #include "sorted-hashtable.h"
51 #include "HelperFunctions.h"
52 
53 #include "core-bigtable.h"
54 #include "core-indel.h"
55 #include "core-junction.h"
56 
57 static struct option long_options[] =
58 {
59 	{"memory-optimisation",  required_argument, 0, 0},
60 	{0, 0, 0, 0}
61 };
62 
63 int (*progress_report_callback)(int, int, int);
64 
core_version_number(char * program)65 void core_version_number(char * program)
66 {
67 	SUBREADprintf("\n%s v%s\n\n" , program, SUBREAD_VERSION);
68 }
69 
is_valid_float(char * optarg,char * optname)70 int is_valid_float(char * optarg, char * optname){
71 	int xk1=0;
72 	while(1){
73 		int nch = optarg[xk1++];
74 		if(!nch){
75 			if(xk1 == 1){
76 				SUBREADprintf("Value for argumant %s-%s is missing.\n", optname[1]?"-":"", optname);
77 				return 0;
78 			}
79 			break;
80 		}
81 		if(( nch!='-' || xk1 > 1 ) && nch != '.' && !isdigit(nch)){
82 			SUBREADprintf("Value for argumant %s-%s is not a valid number: '%s'\n", optname[1]?"-":"", optname, optarg);
83 			return 0;
84 		}
85 	}
86 	return 1;
87 }
88 
is_valid_digit(char * optarg,char * optname)89 int is_valid_digit(char * optarg, char * optname){
90 	int xk1=0;
91 	while(1){
92 		int nch = optarg[xk1++];
93 		if(!nch){
94 			if(xk1 == 1){
95 				SUBREADprintf("Value for argumant %s-%s is missing.\n", optname[1]?"-":"", optname);
96 				return 0;
97 			}
98 			break;
99 		}
100 		if(( nch!='-' || xk1 > 1 ) && !isdigit(nch)){
101 			SUBREADprintf("Value for argumant %s-%s is not a valid number: '%s'\n", optname[1]?"-":"", optname, optarg);
102 			return 0;
103 		}
104 	}
105 	return 1;
106 }
107 
is_valid_digit_range(char * optarg,char * optname,int min,int max_inc)108 int is_valid_digit_range(char * optarg, char * optname, int min, int max_inc){
109 	if(!is_valid_digit( optarg, optname )) return 0;
110 
111 	int tv = atoi(optarg);
112 	if(tv < min || tv > max_inc){
113 		SUBREADprintf("Value for argumant %s-%s is out of range: %d to %d\n", optname[1]?"-":"", optname, min, max_inc);
114 		return 0;
115 	}
116 	return 1;
117 }
118 
warning_file_limit()119 void warning_file_limit()
120 {
121 	#ifndef __MINGW32__
122 	struct rlimit limit_st;
123 	getrlimit(RLIMIT_NOFILE, & limit_st);
124 
125 	{
126 		if(min(limit_st.rlim_cur , limit_st.rlim_max) < 400)
127 		{
128 			print_in_box(80,0,0,"WARNING This operation needs to open many files at same time,");
129 			print_in_box(80,0,0,"	but your OS only allows to open %d files.", min(limit_st.rlim_cur , limit_st.rlim_max));
130 			print_in_box(80,0,0,"	You can use command 'ulimit -n 500' to raise this limit");
131 			print_in_box(80,0,0,"	to 500, or the program may become very slow.");
132 			print_in_box(80,0,0,"");
133 		}
134 	}
135 	#endif
136 }
137 
exec_cmd(char * cmd,char * outstr,int out_limit)138 int exec_cmd(char * cmd, char * outstr, int out_limit){
139 	FILE* pipe = popen(cmd, "r");
140 	if (!pipe) return -1;
141 
142 	outstr[0]=0;
143 	char * linebuf = malloc(3000);
144 	int out_ptr = 0;
145 
146 	while (!feof(pipe)) {
147 		if (fgets(linebuf, 128, pipe) != NULL){
148 			if(out_ptr + strlen(linebuf) < out_limit){
149 				strcat(outstr+out_ptr, linebuf);
150 				out_ptr += strlen(linebuf);
151 			}
152 		}
153 	}
154 	pclose(pipe);
155 	free(linebuf);
156 	return out_ptr;
157 }
158 
print_in_box(int line_width,int is_boundary,int options,char * pattern,...)159 void print_in_box(int line_width, int is_boundary, int options, char * pattern,...)
160 {
161 	int put_color_for_colon, is_center, is_wrapped;
162 	va_list args;
163 	va_start(args , pattern);
164 	char * content, *out_line_buff;
165 
166 	put_color_for_colon = (options & PRINT_BOX_NOCOLOR_FOR_COLON)?0:1;
167 	is_center = (options & PRINT_BOX_CENTER)?1:0;
168 	is_wrapped = (options & PRINT_BOX_WRAPPED)?1:0;
169 
170 	content= malloc(1200);
171 	int content_len = vsprintf(content, pattern, args);
172 	out_line_buff= malloc(1200);
173 	out_line_buff[0]=0;;
174 
175 	if(is_wrapped){
176 		int seg_i = 0;
177 		for(seg_i=0; seg_i < content_len; seg_i += line_width-7){
178 			strcpy(out_line_buff, content + seg_i);
179 			out_line_buff[line_width-7] = 0;
180 
181 			print_in_box(line_width, is_boundary, options & (~PRINT_BOX_WRAPPED), out_line_buff);
182 		}
183 	}else{
184 		int is_R_code,x1,content_len = strlen(content), state, txt_len, is_cut = 0, real_lenwidth;
185 
186 		is_R_code = 1;
187 		#ifdef MAKE_STANDALONE
188 			is_R_code = 0;
189 		#endif
190 
191 		if(content_len>0&&content[content_len-1]=='\r'){
192 			content_len--;
193 			content[content_len] = 0;
194 		}
195 
196 		if(content_len>0&&content[content_len-1]=='\n'){
197 			content_len--;
198 			content[content_len] = 0;
199 		}
200 
201 		state = 0;
202 		txt_len = 0;
203 		real_lenwidth = line_width;
204 		for(x1 = 0; content [x1]; x1++)
205 		{
206 			char nch = content [x1];
207 			if(nch == CHAR_ESC)
208 				state = 1;
209 			if(state){
210 				real_lenwidth --;
211 			}else{
212 				txt_len++;
213 
214 				if(txt_len == 80 - 6)
215 				{
216 					is_cut = 1;
217 				}
218 			}
219 
220 			if(nch == 'm' && state)
221 				state = 0;
222 		}
223 
224 		if(is_cut)
225 		{
226 			state = 0;
227 			txt_len = 0;
228 			for(x1 = 0; content [x1]; x1++)
229 			{
230 				char nch = content [x1];
231 				if(nch == CHAR_ESC)
232 					state = 1;
233 				if(!state){
234 					txt_len++;
235 					if(txt_len == 80 - 9)
236 					{
237 						strcpy(content+x1, "\x1b[0m ...");
238 						content_len = line_width - 4;
239 						content_len = 80 - 4;
240 						line_width = 80;
241 						break;
242 					}
243 				}
244 				if(nch == 'm' && state)
245 					state = 0;
246 			}
247 		}
248 
249 		if(content_len==0 && is_boundary)
250 		{
251 			strcat(out_line_buff,is_boundary==1?"//":"\\\\");
252 			for(x1=0;x1<line_width-4;x1++)
253 				strcat(out_line_buff,"=");
254 			strcat(out_line_buff,is_boundary==1?"\\\\":"//");
255 			sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "%s", out_line_buff);
256 
257 			free(content);
258 			free(out_line_buff);
259 			return;
260 		}
261 		else if(is_boundary)
262 		{
263 			int left_stars = (line_width - content_len)/2 - 1;
264 			int right_stars = line_width - content_len - 2 - left_stars;
265 			strcat(out_line_buff,is_boundary==1?"//":"\\\\");
266 			for(x1=0;x1<left_stars-2;x1++) strcat(out_line_buff,"=");
267 			sprintf(out_line_buff+strlen(out_line_buff),"%c[36m", CHAR_ESC);
268 			sprintf(out_line_buff+strlen(out_line_buff)," %s ", content);
269 			sprintf(out_line_buff+strlen(out_line_buff),"%c[0m", CHAR_ESC);
270 			for(x1=0;x1<right_stars-2;x1++) strcat(out_line_buff,"=");
271 			strcat(out_line_buff,is_boundary==1?"\\\\":"//");
272 			sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "%s", out_line_buff);
273 
274 			free(content);
275 			free(out_line_buff);
276 			return;
277 		}
278 
279 		int right_spaces, left_spaces;
280 		if(is_center)
281 			left_spaces = (line_width - content_len)/2-2;
282 		else
283 			left_spaces = 1;
284 
285 		right_spaces = line_width - 4 - content_len- left_spaces;
286 
287 		char spaces[81];
288 		memset(spaces , ' ', 80);
289 		spaces[0]='|';
290 		spaces[1]='|';
291 		spaces[80]=0;
292 
293 		//sublog_fwrite(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO,"||");
294 
295 		//for(x1=0;x1<left_spaces;x1++) sublog_fwrite(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO," ");
296 
297 		spaces[left_spaces+2] = 0;
298 		strcat(out_line_buff,spaces);
299 
300 		if(is_R_code)
301 		{
302 			strcat(out_line_buff,content);
303 		}
304 		else
305 		{
306 			int col1w=-1;
307 			for(x1=0; content[x1]; x1++)
308 			{
309 				if(content[x1]==':')
310 				{
311 					col1w=x1;
312 					break;
313 				}
314 			}
315 			if(col1w>0 && col1w < content_len-1 && put_color_for_colon)
316 			{
317 				content[col1w+1]=0;
318 				strcat(out_line_buff,content);
319 				strcat(out_line_buff," ");
320 				sprintf(out_line_buff+strlen(out_line_buff),"%c[36m", CHAR_ESC);
321 				strcat(out_line_buff,content+col1w+2);
322 				sprintf(out_line_buff+strlen(out_line_buff),"%c[0m", CHAR_ESC);
323 			}
324 			else
325 				strcat(out_line_buff,content);
326 		}
327 	//	for(x1=0;x1<right_spaces - 1;x1++) sublog_fwrite(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO," ");
328 
329 		memset(spaces , ' ', 80);
330 		spaces[79]='|';
331 		spaces[78]='|';
332 
333 		right_spaces = max(1,right_spaces);
334 		sprintf(out_line_buff+strlen(out_line_buff)," %c[0m%s", CHAR_ESC , spaces + (78 - right_spaces + 1));
335 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, out_line_buff);
336 	}
337 	free(out_line_buff);
338 	free(content);
339 }
340 
341 
342 
343 
show_summary(global_context_t * global_context)344 int show_summary(global_context_t * global_context)
345 {
346 
347 #ifndef MAKE_STANDALONE
348 	// SAVE summary into files. This is only for Rsubread currently.
349 	char sumname[MAX_FILE_NAME_LENGTH+30];
350 	sprintf(sumname, "%s.summary", global_context->config.output_prefix);
351 	FILE * sumfp = fopen(sumname,"w");
352 	#ifdef __MINGW32__
353 	fprintf(sumfp, "Total_%s\t%I64u\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_processed_reads);
354 	#else
355 	fprintf(sumfp, "Total_%s\t%llu\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_processed_reads);
356 	#endif
357 	fprintf(sumfp, "Mapped_%s\t%u\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_mapped_reads);
358 	fprintf(sumfp, "Uniquely_mapped_%s\t%u\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_uniquely_mapped_reads);
359 	fprintf(sumfp, "Multi_mapping_%s\t%u\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_multimapping_reads);
360 	fprintf(sumfp, "Unmapped_%s\t%u\n", global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_unmapped_reads);
361 
362 	if(global_context->input_reads.is_paired_end_reads){
363 		#ifdef __MINGW32__
364 		fprintf(sumfp, "Properly_paired_fragments\t%I64u\n",global_context -> all_correct_PE_reads);
365 		#else
366 		fprintf(sumfp, "Properly_paired_fragments\t%llu\n",global_context -> all_correct_PE_reads);
367 		#endif
368 		fprintf(sumfp, "Singleton_fragments\t%u\n", global_context -> not_properly_pairs_only_one_end_mapped);
369 		fprintf(sumfp, "More_than_one_chr_fragments\t%u\n", global_context -> not_properly_pairs_different_chro);
370 		fprintf(sumfp, "Unexpected_strandness_fragments\t%u\n", global_context -> not_properly_different_strands);
371 		fprintf(sumfp, "Unexpected_template_length\t%u\n", global_context -> not_properly_pairs_TLEN_wrong);
372 		fprintf(sumfp, "Inversed_mapping\t%u\n", global_context -> not_properly_pairs_wrong_arrangement);
373 	}
374 
375 	if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !(global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection)))
376 		fprintf(sumfp, "Junctions\t%u\n", global_context -> all_junctions);
377 	fprintf(sumfp, "Indels\t%u\n", global_context -> all_indels);
378 
379 
380 	fclose(sumfp);
381 #endif
382 
383 	if(progress_report_callback)
384 	{
385 	        long long int all_reads_K = global_context -> all_processed_reads / 1000;
386 	        float mapped_reads_percentage = global_context -> all_mapped_reads * 1./global_context -> all_processed_reads;
387 	        if(global_context->input_reads.is_paired_end_reads) mapped_reads_percentage/=2;
388 	        progress_report_callback(10, 900000, (int) (miltime()-global_context->start_time));
389 	        progress_report_callback(10, 900010, (int) all_reads_K);
390 	        progress_report_callback(10, 900011, (int) (10000.*mapped_reads_percentage));
391 	}
392 
393 	print_in_box(80,0,1,"  ");
394 	print_in_box(89,0,1,"  %c[36mCompleted successfully.%c[0m", CHAR_ESC, CHAR_ESC);
395 	print_in_box(80,0,1,"  ");
396 	print_in_box(80,2,1,"  ");
397 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "");
398 	print_in_box(80, 1,1,"  Summary");
399 	print_in_box(80, 0,1,"  ");
400 
401 	#ifdef __MINGW32__
402 	if(global_context->input_reads.is_paired_end_reads)
403 		print_in_box(80, 0,0,"            Total fragments : %I64d" , global_context -> all_processed_reads);
404 	else
405 		print_in_box(80, 0,0,"                Total reads : %I64d" , global_context -> all_processed_reads);
406 
407 	print_in_box(81, 0,0,"                     Mapped : %u (%.1f%%%%)", global_context -> all_mapped_reads,  global_context -> all_mapped_reads*100.0 / global_context -> all_processed_reads);
408 	print_in_box(80, 0,0,"            Uniquely mapped : %u",  global_context -> all_uniquely_mapped_reads);
409 	print_in_box(80, 0,0,"              Multi-mapping : %u",  global_context -> all_multimapping_reads);
410 	print_in_box(80, 0,1,"      ");
411 	print_in_box(80, 0,0,"                   Unmapped : %u",  global_context -> all_unmapped_reads);
412 	if(global_context->input_reads.is_paired_end_reads){
413 		print_in_box(80, 0,1,"      ");
414 		print_in_box(80, 0,0,"            Properly paired : %I64d", global_context -> all_correct_PE_reads);
415 		print_in_box(80, 0,0,"        Not properly paired : %I64d", global_context -> all_mapped_reads -  global_context -> all_correct_PE_reads);
416 		print_in_box(80, 0,0,"                  Singleton : %u", global_context -> not_properly_pairs_only_one_end_mapped);
417 		print_in_box(80, 0,0,"                   Chimeric : %u", global_context -> not_properly_pairs_different_chro);
418 		print_in_box(80, 0,0,"      Unexpected strandness : %u", global_context -> not_properly_different_strands);
419 	 	print_in_box(80, 0,0," Unexpected fragment length : %u", global_context -> not_properly_pairs_TLEN_wrong);
420 	 	print_in_box(80, 0,0,"      Unexpected read order : %u", global_context -> not_properly_pairs_wrong_arrangement);
421 	}
422 
423 	print_in_box(80, 0,1,"      ");
424 
425 	if(global_context->config.output_prefix[0])
426 	{
427 	        if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !(global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection)))
428 	                print_in_box(80, 0,0,"                  Junctions : %u", global_context -> all_junctions);
429 	        if((global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
430 	                print_in_box(80, 0,0,"                    Fusions : %u", global_context -> all_fusions);
431 	        print_in_box(80, 0,0,"                     Indels : %u", global_context -> all_indels);
432 	}
433 	#else
434 	if(global_context->input_reads.is_paired_end_reads)
435 		print_in_box(80, 0,0,"            Total fragments : %'llu" , global_context -> all_processed_reads);
436 	else
437 		print_in_box(80, 0,0,"                Total reads : %'llu" , global_context -> all_processed_reads);
438 
439 	print_in_box(81, 0,0,"                     Mapped : %'u (%.1f%%%%)", global_context -> all_mapped_reads,  global_context -> all_mapped_reads*100.0 / global_context -> all_processed_reads);
440 	print_in_box(80, 0,0,"            Uniquely mapped : %'u",  global_context -> all_uniquely_mapped_reads);
441 	print_in_box(80, 0,0,"              Multi-mapping : %'u",  global_context -> all_multimapping_reads);
442 	print_in_box(80, 0,1,"      ");
443 	print_in_box(80, 0,0,"                   Unmapped : %'u",  global_context -> all_unmapped_reads);
444 	if(global_context->input_reads.is_paired_end_reads){
445 		print_in_box(80, 0,1,"      ");
446 		print_in_box(80, 0,0,"            Properly paired : %'llu", global_context -> all_correct_PE_reads);
447 		print_in_box(80, 0,0,"        Not properly paired : %'llu", global_context -> all_mapped_reads -  global_context -> all_correct_PE_reads);
448 		print_in_box(80, 0,0,"                  Singleton : %'u", global_context -> not_properly_pairs_only_one_end_mapped);
449 		print_in_box(80, 0,0,"                   Chimeric : %'u", global_context -> not_properly_pairs_different_chro);
450 		print_in_box(80, 0,0,"      Unexpected strandness : %'u", global_context -> not_properly_different_strands);
451 	 	print_in_box(80, 0,0," Unexpected fragment length : %'u", global_context -> not_properly_pairs_TLEN_wrong);
452 	 	print_in_box(80, 0,0,"      Unexpected read order : %'u", global_context -> not_properly_pairs_wrong_arrangement);
453 	}
454 
455 	print_in_box(80, 0,1,"      ");
456 
457 	if(global_context->config.output_prefix[0])
458 	{
459 	        if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !(global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection)))
460 	                print_in_box(80, 0,0,"                  Junctions : %'u", global_context -> all_junctions);
461 	        if((global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
462 	                print_in_box(80, 0,0,"                    Fusions : %'u", global_context -> all_fusions);
463 	        print_in_box(80, 0,0,"                     Indels : %'u", global_context -> all_indels);
464 	}
465 
466 	#endif
467 
468 	if(global_context -> is_phred_warning)
469 	{
470 		print_in_box(80, 0,1,"      ");
471 		print_in_box(80,0,0,"                    WARNING : Phred offset (%d) incorrect?", global_context->config.phred_score_format == FASTQ_PHRED33?33:64);
472 	}
473 	print_in_box(80, 0,1,"      ");
474 	print_in_box(80, 0,0,"               Running time : %.1f minutes", (miltime()-global_context->start_time)*1./60);
475 
476 	if( global_context->input_reads.is_paired_end_reads && global_context -> config.reported_multi_best_reads<2 && global_context -> expected_TLEN_read_numbers < READPAIRS_FOR_CALC_EXPT_TLEN){
477 	    print_in_box(80, 0,1,"      ");
478 		print_in_box(80,0,0,"  NOTE : No enough read-pairs to derive expected fragment length.");
479 	}
480 /*
481 	print_in_box(80, 0,0,"    Running time 0 : %.2f minutes", global_context->timecost_load_index/60);
482 	print_in_box(80, 0,0,"    Running time 1 : %.2f minutes", global_context->timecost_voting/60);
483 	print_in_box(80, 0,0,"    Running time 2 : %.2f minutes", global_context->timecost_before_realign/60);
484 	print_in_box(80, 0,0,"    Running time 3 : %.2f minutes", global_context->timecost_for_realign/60);
485 */
486 	print_in_box(80, 0,1,"");
487 	print_in_box(80, 2,1,"");
488 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "");
489 
490 	return 0;
491 }
492 
493 
494 
show_progress(global_context_t * global_context,thread_context_t * thread_context,unsigned int current_read_no,int task)495 void show_progress(global_context_t * global_context, thread_context_t * thread_context, unsigned int current_read_no, int task)
496 {
497 
498 	// Read_chunk_start is the file_offset of the very first read in the entire file.
499 	// current_circle_start_position_file1 is the file_offset of the first read in this 5-million read chunk (or whatever the chunk size is)
500 
501 
502 	if(global_context->config.scRNA_input_mode){
503 		if(task == 10){
504 			char minchr[10];
505 			float min_value = (miltime() - global_context -> start_time)*1./60;
506 			if(min_value < 9.91)
507 				sprintf(minchr, "%.01f", min_value);
508 			else sprintf(minchr, "% 3d", (int)min_value);
509 			print_in_box(80,0,0,"   processed % 3d million input reads in %s minutes", current_read_no/1000000, minchr);
510 		}
511 		//SUBREADprintf("TASK=%d, RNO=%u\n", task, current_read_no);
512 		return;
513 	}
514 
515 	if(thread_context&&thread_context->thread_id)
516 	{
517 		SUBREADputs("show_progress can only be called by thread#0\n");
518 		return;
519 	}
520 
521 	gene_input_t * ginp1 = &global_context->input_reads.first_read_file;
522 
523 	unsigned long long ginp1_file_pos = geinput_file_offset(ginp1);
524 
525 	if(task == STEP_VOTING)
526 	{
527 		unsigned long long real_read_number = global_context -> all_processed_reads + current_read_no;
528 		if(real_read_number>1000)
529 			global_context -> input_reads . avg_read_length = (ginp1_file_pos - ginp1 -> read_chunk_start) * 1./real_read_number ;
530 	}
531 
532 	unsigned long long total_file_size = global_context -> input_reads.first_read_file_size;
533 	unsigned long long guessed_all_reads = total_file_size / global_context -> input_reads . avg_read_length;
534 
535 	unsigned long long current_block_start_file_offset = global_context -> current_circle_start_abs_offset_file1;
536 
537 	unsigned long long guessed_this_chunk_reads = (total_file_size - current_block_start_file_offset) / global_context -> input_reads . avg_read_length ;
538 	if(guessed_this_chunk_reads > global_context ->config.reads_per_chunk) guessed_this_chunk_reads = global_context ->config.reads_per_chunk;
539 
540 	unsigned long long guessed_all_reads_before_this_chunk = current_block_start_file_offset / global_context -> input_reads . avg_read_length ;
541 	unsigned long long reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;
542 
543 	int is_thred_step_running = global_context->config.is_third_iteration_running ? 1:0;
544 
545 	if(task != STEP_VOTING)
546 		reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;
547 
548 	unsigned long long finished_steps = guessed_all_reads_before_this_chunk * (global_context -> index_block_number * 2 + 1 + is_thred_step_running);
549 
550 
551 	// add steps for voting
552 	if(task == STEP_VOTING)
553 		finished_steps += guessed_this_chunk_reads * global_context -> current_index_block_number * 2;
554 	else if(task == STEP_ITERATION_TWO)
555 		finished_steps += guessed_this_chunk_reads * global_context -> index_block_number * 2;
556 	else if(task > STEP_ITERATION_TWO)
557 		finished_steps += guessed_this_chunk_reads *(global_context -> index_block_number * 2+1);
558 
559 	if(task == STEP_VOTING)
560 		finished_steps += reads_finished_in_this_chunk*2;
561 	else	finished_steps += reads_finished_in_this_chunk;
562 
563 	unsigned long long guessed_all_steps = guessed_all_reads * (global_context -> index_block_number *2 + 1 + is_thred_step_running);
564 
565 	float finished_rate = finished_steps*1./guessed_all_steps;
566 	float reads_per_second = 0;
567 
568 	if(task == STEP_VOTING)
569 		reads_per_second = finished_rate *1.*guessed_all_reads / (miltime() - global_context -> align_start_time);
570 	else
571 		reads_per_second = finished_rate *1.*guessed_all_reads / (miltime() - global_context -> start_time);
572 
573 	if(current_read_no>1000 && !progress_report_callback)
574 	{
575 		char minchr[10];
576 		float min_value = (miltime() - global_context -> start_time)*1./60;
577 		if(min_value < 9.91)
578 			sprintf(minchr, "%.01f", min_value);
579 		else sprintf(minchr, "% 3d", (int)min_value);
580 
581 	//	print_in_box(81,0,0, "%4d%%%% completed, %s mins elapsed, total=%dk %s, rate=%2.1fk/s\r", (int)(finished_rate*100), minchr,(int)(guessed_all_reads*1./1000), global_context -> input_reads.is_paired_end_reads?"frags":"reads", reads_per_second/1000);
582 		print_in_box(81,0,0, "%4d%%%% completed, %s mins elapsed, rate=%2.1fk %s per second\r", (int)(finished_rate*100), minchr, reads_per_second/1000, global_context -> input_reads.is_paired_end_reads?"fragments":"reads");
583 	}
584 
585 	if(progress_report_callback)
586 	{
587 		progress_report_callback(10 ,task,(int)(10000*finished_rate));
588 		progress_report_callback(20 ,task,(int)(guessed_all_reads/1000));
589 	}
590 }
591 
592 
593 /*
594 int Xmain(int argc , char ** argv);
595 
596 int main(int argc, char ** argv)
597 {
598 	int xk1=0;
599 	for(xk1=0; xk1<4; xk1++)
600 		Xmain(argc ,  argv);
601 	return 0;
602 }
603 */
604 
605 
parse_opts_core(int argc,char ** argv,global_context_t * global_context)606 int parse_opts_core(int argc , char ** argv, global_context_t * global_context)
607 {
608 	int c;
609 	int option_index = 0;
610 
611 	optind = 1;
612 	opterr = 1;
613 	optopt = 63;
614 
615 	while ((c = getopt_long (argc, argv, "ExsS:L:AHd:D:n:m:p:P:R:r:i:l:o:T:Q:I:t:B:b:Q:FcuUfM?", long_options, &option_index)) != -1)
616 	{
617 		switch(c)
618 		{
619 			case 'Q':
620 				global_context->config.multi_best_reads = atoi(optarg);
621 				break;
622 			case 'H':
623 				global_context->config.use_hamming_distance_break_ties = 1;
624 				break;
625 			case 's':
626 				global_context->config.downscale_mapping_quality = 1;
627 				break;
628 			case 'M':
629 				global_context->config.do_big_margin_filtering_for_reads = 1;
630 				global_context->config.report_multi_mapping_reads = 0;
631 				break;
632 
633 			case 'A':
634 				global_context->config.report_sam_file = 0;
635 				break;
636 			case 'E':
637 				global_context->config.max_mismatch_exonic_reads = 200;
638 				global_context->config.max_mismatch_junction_reads = 200;
639 
640 				break;
641 			case 'f':
642 				global_context->config.max_mismatch_exonic_reads = 200;
643 				global_context->config.max_mismatch_junction_reads = 200;
644 				global_context->config.do_fusion_detection = 1;
645 				global_context->config.minimum_subread_for_first_read = 1;
646 				global_context->config.minimum_subread_for_second_read = 1;
647 				global_context->config.total_subreads = 28;
648 				global_context->config.report_no_unpaired_reads = 0;
649 				global_context->config.limited_tree_scan = 0;
650 				global_context->config.use_hamming_distance_in_exon = 1;
651 				break;
652 			case 'x':
653 				global_context->config.max_mismatch_exonic_reads = 10;
654 				global_context->config.max_mismatch_junction_reads = 1;
655 				global_context->config.ambiguous_mapping_tolerance = 39;
656 				global_context->config.extending_search_indels = 0;
657 
658 				global_context->config.do_breakpoint_detection = 1;
659 				global_context->config.total_subreads = 14;
660 				global_context->config.minimum_subread_for_first_read = 3;
661 				global_context->config.minimum_subread_for_second_read = 1;
662 				global_context->config.high_quality_base_threshold = 990000;
663 				global_context->config.do_big_margin_filtering_for_junctions = 1;
664 				global_context->config.report_no_unpaired_reads = 0;
665 				global_context->config.limited_tree_scan = 1;
666 				global_context->config.use_hamming_distance_in_exon = 0;
667 				break;
668 			case 'S':
669 				global_context->config.is_first_read_reversed = optarg[0]=='r'?1:0;
670 				global_context->config.is_second_read_reversed = optarg[0]=='f'?0:1;
671 				break;
672 			case 'U':
673 				global_context->config.report_no_unpaired_reads = 1;
674 				break;
675 			case 'u':
676 				global_context->config.report_multi_mapping_reads = 0;
677 				break;
678 			case 'b':
679 				global_context->config.is_methylation_reads = 1;
680 				break;
681 			case 'D':
682 				global_context->config.maximum_pair_distance = atoi(optarg);
683 				break;
684 			case 'd':
685 				global_context->config.minimum_pair_distance = atoi(optarg);
686 				break;
687 			case 'n':
688 				global_context->config.total_subreads = atoi(optarg);
689 				break;
690 			case 'm':
691 				global_context->config.minimum_subread_for_first_read = atoi(optarg);
692 				break;
693 			case 'T':
694 				global_context->config.all_threads = atoi(optarg);
695 				if(global_context->config.all_threads <1) global_context->config.all_threads = 1;
696 				if(global_context->config.all_threads >32) global_context->config.all_threads = 32;
697 
698 				break;
699 			case 'r':
700 				strncpy(global_context->config.first_read_file, optarg, MAX_FILE_NAME_LENGTH-1);
701 				break;
702 			case 'R':
703 				global_context->input_reads.is_paired_end_reads = 1;
704 				strncpy(global_context->config.second_read_file, optarg, MAX_FILE_NAME_LENGTH-1);
705 				break;
706 			case 'i':
707 				strncpy(global_context->config.index_prefix, optarg, MAX_FILE_NAME_LENGTH-1);
708 				break;
709 			case 'o':
710 				strncpy(global_context->config.output_prefix, optarg, MAX_FILE_NAME_LENGTH-1);
711 				break;
712 			case 'I':
713 				global_context->config.max_indel_length = atoi(optarg);
714 
715 				if(global_context->config.max_indel_length <0)global_context->config.max_indel_length =0;
716 				if(global_context->config.max_indel_length > MAX_INSERTION_LENGTH)global_context->config.max_indel_length = MAX_INSERTION_LENGTH;
717 				if(global_context->config.max_indel_length > 16)
718 				{
719 					global_context->config.reassembly_subread_length = 12;
720 					global_context->config.reassembly_window_multiplex = 3;
721 					global_context->config.reassembly_start_read_number = 5;
722 					global_context->config.reassembly_tolerable_voting = 0;
723 					global_context->config.reassembly_window_alleles = 2;
724 					global_context->config.reassembly_key_length = 28;
725 
726 					global_context->config.is_third_iteration_running = 1;
727 					global_context->config.max_mismatch_exonic_reads = 2;
728 					global_context->config.max_mismatch_junction_reads = 2;
729 					global_context->config.total_subreads = 28;
730 					global_context->config.do_big_margin_filtering_for_reads = 1;
731 
732 					global_context->config.do_superlong_indel_detection = 0;
733 				}
734 				break;
735 			case 'P':
736 				if (optarg[0]=='3')
737 					global_context->config.phred_score_format = FASTQ_PHRED33;
738 				else
739 					global_context->config.phred_score_format = FASTQ_PHRED64;
740 				break;
741 			case 'p':
742 				global_context->config.minimum_subread_for_second_read = atoi(optarg);
743 				break;
744 			case 't':
745 				sprintf(global_context->config.temp_file_prefix, "%s/core-temp-sum-%06u-%05u", optarg, getpid(), myrand_rand()
746 				);
747 				break;
748 			case 'F':
749 				global_context->config.is_second_iteration_running = 0;
750 				global_context->config.report_sam_file = 0;
751 				break;
752 			case 'B':
753 				strcpy(global_context->config.exon_annotation_file, optarg);
754 				break;
755 			case 'c':
756 				global_context->config.space_type = GENE_SPACE_COLOR;
757 				break;
758 
759 			case 0:
760 				//if(strcmp("memory-optimisation",long_options[option_index].name)==0)
761 				//	memory_optimisation = atoi(optarg);
762 				break;
763 			case '?':
764 			default:
765 				return -1 ;
766 		}
767 	}
768 
769 
770 	return 0;
771 }
772 
773 #ifdef MAKE_STANDALONE
main_back(int argc,char ** argv)774 int main_back(int argc , char ** argv)
775 {
776 	progress_report_callback = NULL;
777 #else
778 int subread_core_main(int argc , char ** argv)
779 {
780 	if(progress_report_callback) progress_report_callback(0,0,1);
781 #endif
782 
783 	return core_main(argc, argv, parse_opts_core);
784 }
785 
786 
787 int check_configuration(global_context_t * global_context)
788 {
789 	int expected_type = FILE_TYPE_FAST_;
790 	if(global_context -> config.is_SAM_file_input && global_context -> config.is_BAM_input)
791 		expected_type = FILE_TYPE_BAM;
792 	else if(global_context -> config.is_SAM_file_input && !global_context -> config.is_BAM_input)
793 		expected_type = FILE_TYPE_SAM;
794 	else if(global_context -> config.is_gzip_fastq)
795 		expected_type = FILE_TYPE_GZIP_FAST_;
796 
797 	if(global_context -> config.max_indel_length > 16)
798 		warning_file_limit();
799 
800 	int wret = 0, wret2 = 0;
801 	if( global_context ->config.scRNA_input_mode == 0 )  warning_file_type(global_context -> config.first_read_file, expected_type);
802 	if(global_context -> config.second_read_file[0])
803 	{
804 		if(expected_type==FILE_TYPE_FAST_ || expected_type==FILE_TYPE_GZIP_FAST_)
805 			wret2 = warning_file_type(global_context -> config.second_read_file, expected_type);
806 		else
807 			print_in_box(80,0,0,"Only one input SAM or BAM file is needed. The second input file is ignored.");
808 	}
809 
810 	if(wret == -1 || wret2 == -1){
811 		return -1;
812 	}
813 
814 	if(4 == sizeof(void *) && global_context->config.sort_reads_by_coordinates){
815 		SUBREADputs("ERROR: the sort by coordinate function only works on 64-bit computers.");
816 		return -1;
817 	}
818 
819 	if(global_context->config.is_input_read_order_required && global_context->config.sort_reads_by_coordinates){
820 		SUBREADputs("ERROR: you shouldn't specify keep input order and sort by coordinate at same time.");
821 		return -1;
822 	}
823 
824 	if((global_context -> config.is_BAM_output == 0 || global_context->config.output_prefix[0]==0) && global_context->config.sort_reads_by_coordinates){
825 		if(global_context -> config.is_BAM_output==0)SUBREADputs("ERROR: SAM output doesn't support read sorting by coordinates.");
826 		else SUBREADputs("ERROR: STDOUT output doesn't support read sorting by coordinates.");
827 		return -1;
828 	}
829 
830 	return 0;
831 
832 }
833 
834 unsigned long long myrand_seed = 0;
835 unsigned long long myrand_seed2 = 0;
836 
837 void myrand_srand(unsigned long long seed){
838 
839 	// when it is in R, the seed is NOT used but the R random numbers are used as the seed.
840 	myrand_seed = 0;
841 	myrand_seed2 = 0;
842 
843 	#ifndef MAKE_STANDALONE
844 	GetRNGstate();
845 	seed = 0;
846 	seed = (seed << 16)+ (int)(unif_rand()*65521);
847 	seed = (seed << 16)+ (int)(unif_rand()*65521);
848 	seed = (seed << 16)+ (int)(unif_rand()*65521);
849 	seed = (seed << 16)+ (int)(unif_rand()*65521);
850 
851 	PutRNGstate();
852 	#endif
853 	myrand_seed = seed;
854 	myrand_rand();
855 	myrand_rand();
856 	myrand_rand();
857 	myrand_rand();
858 	myrand_rand();
859 	myrand_rand();
860 	myrand_rand();
861 	myrand_rand();
862 	myrand_rand();
863 }
864 
865 #define THE_966666773323RD_PRIME 28962406029617llu
866 int myrand_rand(){
867 	//if(myrand_seed % 3133LLU == 0) myrand_srand(0);
868 
869 	myrand_seed ^= (myrand_seed % 858173 + myrand_seed % 104729);
870 	myrand_seed2 += myrand_seed;
871 	myrand_seed ^= myrand_seed2 << 13;
872 	return (int)(((myrand_seed^myrand_seed2) %THE_966666773323RD_PRIME) % (1LLU+RAND_MAX));
873 }
874 
875 
876 int core_main(int argc , char ** argv, int (parse_opts (int , char **, global_context_t * )))
877 {
878 	struct timeval xtime;
879 	gettimeofday(&xtime,NULL);
880 	myrand_srand(time(NULL)^xtime.tv_usec);
881 
882 	global_context_t * global_context;
883 	global_context = (global_context_t*)malloc(sizeof(global_context_t));
884 	memset(global_context, 0, sizeof(global_context_t));
885 	init_global_context(global_context);
886 
887 
888 	int ret = parse_opts(argc , argv, global_context);
889 	init_core_temp_path(global_context);
890 	//global_context->config.reads_per_chunk = 200*1024;
891 
892 	if(global_context->config.max_indel_length > 20 && !global_context->input_reads.is_paired_end_reads)
893 	{
894 		global_context->config.total_subreads = 28;
895 		global_context->config.reassembly_start_read_number = 3;
896 		global_context->config.do_superlong_indel_detection = 1;
897 	}
898 
899 	if(global_context->config.fast_run){
900 		global_context -> config.top_scores = 1;
901 		global_context -> config.max_vote_combinations = 1;
902 		global_context -> config.max_vote_simples = 1;
903 		global_context -> config.multi_best_reads = 1;
904 	}
905 
906 	ret = ret || print_configuration(global_context);
907 	ret = ret || check_configuration(global_context);
908 	ret = ret || load_global_context(global_context);
909 	ret = ret || init_modules(global_context);
910 	ret = ret || read_chunk_circles(global_context);
911 	ret = ret || write_final_results(global_context);
912 	ret = ret || destroy_modules(global_context);
913 	ret = ret || destroy_global_context(global_context);
914 	ret = ret || show_summary(global_context);
915 
916 	free(global_context);
917 
918 	return ret;
919 }
920 
921 // the new file name is written into fname then.
922 
923 int convert_BAM_to_SAM(global_context_t * global_context, char * fname, int is_bam)
924 {
925 	char temp_file_name[MAX_FILE_NAME_LENGTH+80], *fline=malloc(3000), tmp_readname[MAX_READ_NAME_LEN];
926 	short tmp_flags;
927 	SamBam_FILE * sambam_reader;
928 
929 	char * env_no_sort = getenv("SUBREAD_DO_NOT_CHECK_INPUT");
930 	if(env_no_sort){
931 		global_context->input_reads.is_paired_end_reads = 1;
932 		SUBREADprintf("\nWARNING: The SAM input file was assumed to be sorted by name.\nENV: SUBREAD_DO_NOT_CHECK_INPUT\n");
933 		return 0;
934 	}
935 
936 	int is_file_sorted = 1;
937 	unsigned long long int read_no = 0;
938 	sambam_reader = SamBam_fopen(fname, is_bam?SAMBAM_FILE_BAM:SAMBAM_FILE_SAM);
939 	if(!sambam_reader)
940 	{
941 		SUBREADprintf("ERROR: Unable to open file '%s'. The file is not in the specified format.\n", fname);
942 		free(fline);
943 		return -1;
944 	}
945 	while(1)
946 	{
947 		char * is_ret = SamBam_fgets(sambam_reader, fline, 2999, 1);
948 		if(!is_ret) break;
949 		if(sambam_reader ->is_bam_broken ) {
950 			SUBREADputs("ERROR: the BAM format is broken.");
951 			return -1;
952 		}
953 		if(fline[0]=='@')continue;
954 		if(is_SAM_unsorted(fline, tmp_readname, &tmp_flags , read_no)){
955 			if(tmp_flags & 1) global_context->input_reads.is_paired_end_reads = 1;
956 			is_file_sorted = 0;
957 			read_no++;
958 			break;
959 		}
960 		read_no++;
961 		if(tmp_flags & 1) global_context->input_reads.is_paired_end_reads = 1;
962 		else global_context->input_reads.is_paired_end_reads = 0;
963 
964 		if(! global_context->input_reads.is_paired_end_reads) break;
965 	}
966 
967 	if(read_no<1)
968 	{
969 		SUBREADprintf("ERROR: unable to find any reads from file '%s'. The file is not in the specified format or is empty.\n", fname);
970 		return -1;
971 	}
972 
973 	SamBam_fclose(sambam_reader);
974 	print_in_box(80,0,0,"The input %s file contains %s-end reads.", is_bam?"BAM":"SAM",  global_context->input_reads.is_paired_end_reads?"paired":"single");
975 	if(!is_file_sorted)
976 		print_in_box(80,0,0,"The input %s file is unsorted. Reorder it...", is_bam?"BAM":"SAM");
977 	else if(is_bam)
978 		print_in_box(80,0,0,"Convert the input BAM file...");
979 
980 	int disk_is_full = 0;
981 	if(is_bam || (global_context->input_reads.is_paired_end_reads && !is_file_sorted))
982 	{
983 		sprintf(temp_file_name, "%s.sam", global_context->config.temp_file_prefix);
984 		sambam_reader = SamBam_fopen(fname, is_bam?SAMBAM_FILE_BAM: SAMBAM_FILE_SAM);
985 		if(!sambam_reader){
986 			SUBREADprintf("Unable to open %s.\n", fname);
987 			return -1;
988 		}
989 		FILE * sam_fp = NULL;
990 		SAM_sort_writer writer;
991 		int writer_opened = 0;
992 
993 		if(is_file_sorted) sam_fp = f_subr_open(temp_file_name,"w");
994 		else writer_opened = sort_SAM_create(&writer, temp_file_name, NULL);
995 
996 		if((is_file_sorted && !sam_fp) || (writer_opened && !is_file_sorted)){
997 			SUBREADprintf("Failed to write to the directory. You may not have permission to write to this directory or the disk is full.\n");
998 			return -1;
999 		}
1000 
1001 		while(1)
1002 		{
1003 			char * is_ret = SamBam_fgets(sambam_reader, fline, 2999, 1);
1004 			if(!is_ret) break;
1005 			if(is_file_sorted){
1006 				int wlen = fputs(fline, sam_fp);
1007 				if(wlen <0){
1008 					SUBREADprintf("ERROR: unable to write into the temporary SAM file. Please check the disk space in the output directory.\n");
1009 					disk_is_full = 1;
1010 					break;
1011 				}
1012 			}else{
1013 				int ret = sort_SAM_add_line(&writer, fline, strlen(fline));
1014 				if(ret== -1) {
1015 					print_in_box(80,0,0,"ERROR: read name is too long; check input format.");
1016 					break;
1017 				}
1018 				if(ret== -2) {
1019 					SUBREADprintf("ERROR: unable to write into the temporary SAM file. Please check the disk space in the output directory.\n");
1020 					disk_is_full = 1;
1021 					break;
1022 				}
1023 			}
1024 		}
1025 
1026 		if(is_file_sorted)
1027 			fclose(sam_fp);
1028 		else{
1029 			int ret = sort_SAM_finalise(&writer);
1030 			if(writer.unpaired_reads)
1031 				#ifdef __MINGW32__
1032 				print_in_box(80,0,0,"%I64d single-end mapped reads in reordering.", writer.unpaired_reads);
1033 				#else
1034 				print_in_box(80,0,0,"%lld single-end mapped reads in reordering.", writer.unpaired_reads);
1035 				#endif
1036 			if(ret) {
1037 				disk_is_full = 1;
1038 				SUBREADprintf("ERROR: unable to create the temporary file. Please check the disk space in the output directory.\n");
1039 			}
1040 		}
1041 
1042 		SamBam_fclose(sambam_reader);
1043 		strcpy(fname, temp_file_name);
1044 		global_context -> will_remove_input_file = 1;
1045 	}
1046 
1047 	free(fline);
1048 	return disk_is_full;
1049 }
1050 
1051 int convert_GZ_to_FQ(global_context_t * global_context, char * fname, int half_n)
1052 {
1053 	int is_OK = 0;
1054 	char temp_file_name[MAX_FILE_NAME_LENGTH+30];
1055 	char * linebuff=malloc(3001);
1056 	gzFile rawfp = gzopen(fname, "r");
1057 
1058 	if(rawfp)
1059 	{
1060 		print_in_box(80,0,0,"Decompress %s...", fname);
1061 		sprintf(temp_file_name, "%s-%d.fq", global_context->config.temp_file_prefix, half_n);
1062 		FILE * outfp = fopen(temp_file_name, "w");
1063 		if(outfp)
1064 		{
1065 			while(1)
1066 			{
1067 				char * bufr =gzgets(rawfp, linebuff, 3000);
1068 				if(!bufr) break;
1069 				fputs(bufr, outfp);
1070 			}
1071 			is_OK = 1;
1072 			fclose(outfp);
1073 		}
1074 		else{
1075 			SUBREADprintf("Unable to create temporary file '%s'\nPlease run the program in a directory where you have the privilege to create files.\n", temp_file_name);
1076 		}
1077 
1078 		gzclose(rawfp);
1079 	}
1080 
1081 	strcpy(fname, temp_file_name);
1082 	global_context -> will_remove_input_file |= (1<< (half_n-1));
1083 
1084 	return !is_OK;
1085 }
1086 
1087 int core_geinput_open(global_context_t * global_context, gene_input_t * fp, int half_number)
1088 {
1089 	char *fname;
1090 	if(global_context->config.is_SAM_file_input) {
1091 		fname = global_context ->config.first_read_file;
1092 		if(half_number == 1)
1093 			if(convert_BAM_to_SAM(global_context, global_context ->config.first_read_file, global_context ->config.is_BAM_input)) return -1;
1094 		if(!global_context->input_reads.is_paired_end_reads) half_number=0;
1095 		return geinput_open_sam(fname, fp, half_number);
1096 	} else {
1097 		int rv = -1;
1098 		//SUBREADprintf("SCRNA_MODE=%d\n", global_context->config.scRNA_input_mode );
1099 		if(global_context -> config.is_gzip_fastq)
1100 			if(convert_GZ_to_FQ(global_context, (half_number==2)? global_context ->config.second_read_file : global_context ->config.first_read_file, half_number)) return -1;
1101 		fname = (half_number == 2)?global_context -> config.second_read_file:global_context -> config.first_read_file;
1102 		if(global_context->config.scRNA_input_mode == GENE_INPUT_BCL)
1103 			rv = geinput_open_bcl(fname , fp, global_context -> config.reads_per_chunk, global_context -> config.all_threads );
1104 		else if(global_context->config.scRNA_input_mode == GENE_INPUT_SCRNA_FASTQ)
1105 			rv = geinput_open_scRNA_fqs(fname , fp, global_context -> config.reads_per_chunk, global_context -> config.all_threads );
1106 		else if(global_context->config.scRNA_input_mode == GENE_INPUT_SCRNA_BAM)
1107 			rv = geinput_open_scRNA_BAM(fname , fp, global_context -> config.reads_per_chunk, global_context -> config.all_threads );
1108 		else
1109 			rv = geinput_open(fname, fp);
1110 
1111 		if(global_context->input_reads.is_paired_end_reads && global_context->config. scRNA_input_mode){
1112 			SUBREADprintf("ERROR: No paired-end input is allowed on scRNA mode.\n");
1113 			return -1;
1114 		}
1115 		return rv;
1116 	}
1117 }
1118 
1119 
1120 int fetch_next_read_pair(global_context_t * global_context, thread_context_t * thread_context, gene_input_t* ginp1, gene_input_t* ginp2, int *read_len_1, int *read_len_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char *qual_text_2, int remove_color_head, subread_read_number_t * read_no_in_chunk)
1121 {
1122 	int rl1=0, rl2=0;
1123 	int is_second_R1, is_second_R2;
1124 	subread_read_number_t this_number = -1;
1125 
1126 	geinput_preload_buffer(ginp1, &global_context -> input_reads.input_lock);
1127 	if(ginp2) geinput_preload_buffer(ginp2, &global_context -> input_reads.input_lock);
1128 
1129 	subread_lock_occupy(&global_context -> input_reads.input_lock);
1130 	if(global_context -> running_processed_reads_in_chunk < global_context -> config.reads_per_chunk)
1131 	{
1132 		do
1133 		{
1134 			is_second_R1 = 0; is_second_R2 = 0;
1135 			rl1 = geinput_next_read_trim(ginp1, read_name_1, read_text_1 , qual_text_1, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R1);
1136 			//if(global_context -> running_processed_reads_in_chunk < 100)SUBREADprintf("GETRNAMES %s LEN=%d\n", read_name_1, rl1);
1137 			if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
1138 			{
1139 				if(isalpha(read_text_1[0]))
1140 				{
1141 					int xk1;
1142 					for(xk1=2; read_text_1[xk1]; xk1++)
1143 						read_text_1[xk1-2]=read_text_1[xk1];
1144 					read_text_1[xk1-2]=0;
1145 				}
1146 			}
1147 
1148 			if(ginp2)
1149 			{
1150 				rl2 = geinput_next_read_trim(ginp2, read_name_2, read_text_2 , qual_text_2, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R2);
1151 				if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
1152 				{
1153 					if(isalpha(read_text_2[0]))
1154 					{
1155 						int xk1;
1156 						for(xk1=2; read_text_2[xk1]; xk1++)
1157 							read_text_2[xk1-2]=read_text_2[xk1];
1158 						read_text_2[xk1-2]=0;
1159 					}
1160 				}
1161 			}
1162 			if(rl1 <= 0 || (rl2 <= 0 && ginp2)) break;
1163 		} while(is_second_R1||is_second_R2) ;
1164 
1165 
1166 		//SUBREADprintf("IMBRL1=%d\tRL2=%d\n", rl1, rl2);
1167 		//
1168 		if(rl1 >0 || (rl2 > 0 && ginp2)){
1169 			this_number = global_context -> running_processed_reads_in_chunk;
1170 			global_context -> running_processed_reads_in_chunk ++;
1171 		}
1172 	}
1173 	subread_lock_release(&global_context -> input_reads.input_lock);
1174 
1175 	if( global_context->config.space_type == GENE_SPACE_COLOR) {
1176 		rl1-=1;rl2-=1;
1177 	}
1178 
1179 	if(ginp2 && rl1 * rl2 <=0 && (rl1>0 || rl2>0)){
1180 		if(!global_context-> input_reads.is_internal_error)
1181 			SUBREADprintf("\nERROR: two input files have different amounts of reads.\n\n");
1182 		global_context-> input_reads.is_internal_error = 1;
1183 		*read_no_in_chunk = -1;
1184 		return 1;
1185 	} else if(rl1>0 && (rl2>0 || !ginp2) && this_number>=0) {
1186 		if(global_context->config.is_first_read_reversed)
1187 		{
1188 			reverse_read(read_text_1, rl1, global_context->config.space_type);
1189 			if(qual_text_1)
1190 				reverse_quality(qual_text_1, rl1);
1191 		}
1192 
1193 		if(ginp2 && global_context->config.is_second_read_reversed)
1194 		{
1195 			reverse_read(read_text_2, rl2, global_context->config.space_type);
1196 			if(qual_text_2)
1197 				reverse_quality(qual_text_2, rl2);
1198 		}
1199 
1200 		*read_no_in_chunk = this_number;
1201 		*read_len_1 = rl1;
1202 		if(ginp2)
1203 			*read_len_2 = rl2;
1204 		return 0;
1205 	} else {
1206 		// normally finished
1207 		*read_no_in_chunk = -1;
1208 		return 1;
1209 	}
1210 }
1211 
1212 int write_final_results(global_context_t * context)
1213 {
1214 
1215 	if((context ->  config.do_fusion_detection || context ->  config.do_long_del_detection) && context -> config.do_structural_variance_detection)
1216 		finalise_structural_variances(context);
1217 
1218 	if(context -> config.output_prefix[0] && 0==context -> input_reads.is_internal_error && !(context -> config.is_BAM_output && context -> output_bam_writer -> is_internal_error) ) {
1219 		write_indel_final_results(context);
1220 
1221 		if(context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && (context -> config.prefer_donor_receptor_junctions||!(context ->  config.do_fusion_detection || context ->  config.do_long_del_detection)))
1222 			write_junction_final_results(context);
1223 
1224 		if((context ->  config.do_fusion_detection || context ->  config.do_long_del_detection))
1225 			write_fusion_final_results(context);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 int get_soft_clipping_length(char* CIGAR)
1232 {
1233 	int nch;
1234 	int cigar_cursor, tmp_int = 0;
1235 
1236 	for(cigar_cursor = 0; (nch = CIGAR[cigar_cursor]) > 0;cigar_cursor++)
1237 	{
1238 		if(isdigit(nch)) tmp_int = tmp_int*10+(nch-'0');
1239 		else
1240 		{
1241 			if(nch=='S') return tmp_int;
1242 			return 0;
1243 		}
1244 	}
1245 	return 0;
1246 }
1247 
1248 
1249 #define CIGAR_PERFECT_SECTIONS 12
1250 
1251 typedef struct{
1252 	char current_cigar_decompress[CORE_MAX_CIGAR_STR_LEN + 1];
1253 	char cigar [CORE_MAX_CIGAR_STR_LEN];
1254 
1255 	unsigned short chimeric_sections;
1256 	unsigned int out_poses[CIGAR_PERFECT_SECTIONS];
1257 	short out_lens[CIGAR_PERFECT_SECTIONS];
1258 	char out_cigars[CIGAR_PERFECT_SECTIONS][60];
1259 	char out_strands[CIGAR_PERFECT_SECTIONS];
1260 
1261 	char additional_information[CORE_ADDITIONAL_INFO_LENGTH + 1];
1262 	mapping_result_t * raw_result;
1263 
1264 	unsigned int linear_position;
1265 	short soft_clipping_movements;
1266 	char * chro;
1267 	int offset;
1268 	int strand;
1269 	int is_first_section_jumpped;
1270 
1271 	int mapping_quality;
1272 	int is_NM_appied;
1273 }subread_output_tmp_t;
1274 
1275 typedef struct{
1276 	long long int *PE_distance;
1277 	char * out_cigar_buffer[CIGAR_PERFECT_SECTIONS];
1278 	subread_output_tmp_t *r1;
1279 	subread_output_tmp_t *r2;
1280 	subread_output_tmp_t ** out_pairs;
1281 	mapping_result_t ** out_raws;
1282 
1283 } subread_output_context_t;
1284 
1285 void init_output_context(global_context_t * global_context, subread_output_context_t * out_context)
1286 {
1287 	int xk1;
1288 	memset(out_context, 0, sizeof(subread_output_context_t));
1289 
1290 	out_context -> r1 = malloc(sizeof(subread_output_tmp_t));
1291 	for(xk1=0;xk1<CIGAR_PERFECT_SECTIONS;xk1++)
1292 		out_context -> out_cigar_buffer[xk1] = malloc(60);
1293 
1294 	out_context -> out_pairs = malloc(sizeof( subread_output_context_t *) * global_context->config.multi_best_reads * 2);
1295 	out_context -> out_raws = malloc(sizeof( mapping_result_t * *) * global_context->config.multi_best_reads * 2);
1296 
1297 	if(global_context -> input_reads.is_paired_end_reads)
1298 	{
1299 		out_context -> PE_distance = malloc(sizeof(long long) * global_context->config.multi_best_reads);
1300 		out_context -> r2 = malloc(sizeof(subread_output_tmp_t));
1301 	} else {
1302 		out_context -> PE_distance = NULL;
1303 		out_context -> r2 = NULL;
1304 	}
1305 }
1306 
1307 void destroy_output_context(global_context_t * global_context , subread_output_context_t * out_context)
1308 {
1309 	int xk1;
1310 	for(xk1=0;xk1<CIGAR_PERFECT_SECTIONS;xk1++)
1311 		free(out_context -> out_cigar_buffer[xk1]);
1312 
1313 	free(out_context -> out_raws);
1314 	free(out_context -> out_pairs);
1315 	free(out_context -> r1 );
1316 	if(global_context -> input_reads.is_paired_end_reads)
1317 	{
1318 		free(out_context -> r2);
1319 		free(out_context -> PE_distance);
1320 	}
1321 }
1322 
1323 int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, mapping_result_t * result, int rlen);
1324 int calc_edit_dist(global_context_t * global_context, mapping_result_t * current_result, char * cigar , unsigned int pos ,  char * read_text, int all_mm)
1325 {
1326 	unsigned int cigar_cursor = 0, tmpi=0;
1327 
1328 	while(1)
1329 	{
1330 		char nch = cigar[cigar_cursor++];
1331 		if(!nch)break;
1332 
1333 		if('0'<=nch && '9' >=nch)
1334 		{
1335 			tmpi = tmpi*10+nch-'0';
1336 		}
1337 		else{
1338 			if(nch == 'I' || nch == 'D')
1339 			{
1340 				all_mm+=tmpi;
1341 			}
1342 
1343 			tmpi = 0;
1344 		}
1345 	}
1346 	return all_mm;
1347 }
1348 
1349 unsigned int move_to_read_head(unsigned int tailpos, char * cigar){
1350 	int cigar_i = 0, nch;
1351 	unsigned int tmpi = 0;
1352 	while(0<(nch = cigar[cigar_i++])){
1353 		if(isdigit(nch)){
1354 			tmpi = tmpi * 10 + nch - '0';
1355 		}else{
1356 			if(nch == 'N' || nch == 'M' || nch == 'D') tailpos -= tmpi;
1357 			tmpi = 0;
1358 		}
1359 	}
1360 	return tailpos;
1361 }
1362 
1363 
1364 // This function returns 1 if the cut was added.
1365 // It returns 0 if the head or tail cut is not able to be added (e.g., the cigar ends up like "50M4I10S" or "10S30N90M")
1366 int add_head_tail_cut_softclipping(global_context_t * global_context, unsigned int linear, char * cigar, int rlen, int head_cut, int tail_cut){
1367 	//SUBREADprintf("ADD_SOFT: %s , %d, %d\n", cigar,head_cut,tail_cut);
1368 
1369 	char cigar_added [CORE_MAX_CIGAR_STR_LEN];
1370 	int cigar_cursor = 0, read_cursor = 0, next_read_cursor = 0;
1371 	int tmpi = 0, nch, has_M = 0;
1372 	unsigned int linear_cursor = linear;
1373 
1374 	cigar_added[0]=0;
1375 
1376 	while(1){
1377 		nch = cigar[cigar_cursor++];
1378 		if(nch == 0) break;
1379 
1380 		if(isdigit(nch)){
1381 			tmpi = 10 * tmpi + (nch - '0');
1382 		}else{
1383 			if('M' == nch || 'S' == nch || 'I' == nch)
1384 				next_read_cursor = read_cursor + tmpi;
1385 
1386 			if('M' == nch || 'D' == nch || 'S' == nch || 'N' == nch){
1387 				linear_cursor += tmpi;
1388 			}
1389 
1390 			int head_S = 0, tail_S =0, remainder_tmpi = tmpi, is_skip = 0;
1391 
1392 			if(next_read_cursor <= head_cut) is_skip = 1;
1393 			if(read_cursor >= rlen - tail_cut) is_skip = 1;
1394 			if(!is_skip){
1395 				if(nch != 'S'){
1396 					if(read_cursor <= head_cut)head_S = head_cut;
1397 					if(next_read_cursor >= rlen - tail_cut) tail_S = tail_cut;
1398 					remainder_tmpi = tmpi;
1399 					if(head_S) remainder_tmpi-= (head_S - read_cursor);
1400 					if(tail_S) remainder_tmpi-= next_read_cursor - (rlen - tail_S);
1401 				}
1402 				//SUBREADprintf("OPTR=%c, THREE-LENS=%d,%d,%d\n", nch, head_S, remainder_tmpi, tail_S);
1403 				if(head_S >0 || tail_S > 0) if(nch !='M') return 0;
1404 
1405 				if(head_S > 0) sprintf(cigar_added + strlen(cigar_added), "%dS", head_S );
1406 				if(remainder_tmpi > 0){
1407 					sprintf(cigar_added + strlen(cigar_added), "%d%c", remainder_tmpi , nch );
1408 					if( nch == 'M' ) has_M = 1;
1409 				}
1410 				if(tail_S > 0) sprintf(cigar_added + strlen(cigar_added), "%dS", tail_S );
1411 			}
1412 
1413 			read_cursor = next_read_cursor;
1414 			tmpi = 0;
1415 		}
1416 	}
1417 	//SUBREADprintf("RPCIGAR: %s => %s\n", cigar, cigar_added);
1418 	strcpy(cigar, cigar_added);
1419 	return has_M;
1420 }
1421 
1422 
1423 int convert_read_to_tmp(global_context_t * global_context , subread_output_context_t * output_context, int read_number, int is_second_read, int read_len, char * read_text, char * qual_text, realignment_result_t * current_result, subread_output_tmp_t * r, char * read_name)
1424 {
1425 
1426 	int is_r_OK;
1427 	r -> raw_result = current_result -> mapping_result;
1428 	r -> additional_information[0]=0;
1429 
1430 	is_r_OK = (current_result -> mapping_result -> result_flags & CORE_IS_FULLY_EXPLAINED) > 0;
1431 
1432 	if(0 && FIXLENstrcmp("V0112_0155:7:1101:5279:29143#ATCACG", read_name) == 0)
1433 		SUBREADprintf("%s  R_%d CPOINT1 : is_OK=%d\n", read_name , is_second_read + 1 , is_r_OK);
1434 
1435 	if(is_r_OK) {
1436 
1437 		int current_strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
1438 		int is_first_section_jumped = current_result -> first_base_is_jumpped;
1439 		if(is_first_section_jumped) assert((global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection));
1440 		strcpy(r-> current_cigar_decompress, current_result -> cigar_string);
1441 
1442 		int chimeric_sections = 0;
1443 
1444 		r->is_first_section_jumpped = is_first_section_jumped;
1445 		r->linear_position = current_result -> first_base_position;
1446 		//r->mapping_quality = current_result -> final_quality;
1447 		r->mapping_quality = 40;	// changed on 10-JUN-2016
1448 		if(current_result -> realign_flags & CORE_IS_BREAKEVEN)
1449 			r -> mapping_quality = 0;
1450 		else
1451 			r -> mapping_quality /= current_result->MAPQ_adjustment;
1452 		//SUBREADprintf("REP=%d\n", current_result->MAPQ_adjustment);
1453 
1454 		strcpy(r->cigar, r -> current_cigar_decompress);
1455 		r->strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
1456 
1457 		//sprintf(r->additional_information, "\tSM:i:%d",current_result -> final_mismatched_bases);
1458 
1459 
1460 		//printf("SM='%s'\n", r->additional_information);
1461 
1462 		if((global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection))
1463 		{
1464 			chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens, read_name);
1465 
1466 			if(chimeric_sections > 0){
1467 				int xk1;
1468 				r->chimeric_sections = chimeric_sections;
1469 				strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
1470 				for(xk1=1; xk1<chimeric_sections; xk1++)
1471 				{
1472 					int chimeric_pos;
1473 					char * chimaric_chr;
1474 					strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
1475 					unsigned int vitual_head_pos = r->out_poses[xk1];
1476 					char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
1477 
1478 					//if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
1479 
1480 					if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
1481 					{
1482 						int soft_clipping_movement = 0;
1483 						soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
1484 						assert(chimaric_chr);
1485 						sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
1486 					}
1487 					else is_r_OK = 0;
1488 				}
1489 				r->linear_position = r->out_poses[0];
1490 				r->strand = r->out_strands[0]=='-';
1491 
1492 				strcpy(r->cigar , r->out_cigars[0]);
1493 			}else is_r_OK = 0;
1494 		}
1495 		if(is_r_OK)
1496 			r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
1497 	}
1498 
1499 	if(is_r_OK)
1500 	{
1501 		int head_cut = 0 , tail_cut = 0;
1502 
1503 		if(locate_gene_position_max(r->linear_position + r->soft_clipping_movements,& global_context -> chromosome_table, &r-> chro , &r -> offset, &head_cut, &tail_cut,(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)?read_len:(current_result->chromosomal_length - r->soft_clipping_movements))){
1504 			is_r_OK = 0;
1505 		} else {
1506 			int is_added_OK = 1;
1507 			if(head_cut!=0 || tail_cut!=0)
1508 				is_added_OK = add_head_tail_cut_softclipping(global_context, r->linear_position, r->cigar , read_len, head_cut, tail_cut);
1509 
1510 			if(is_added_OK){
1511 				r -> offset++;
1512 				assert(r-> chro);
1513 			}else{
1514 				is_r_OK = 0;
1515 			}
1516 		}
1517 
1518 		if(global_context -> config.do_breakpoint_detection && !(current_result -> realign_flags & CORE_NOTFOUND_DONORS))
1519 		{
1520 			sprintf(r->additional_information + strlen(r->additional_information), "\tXS:A:%c", (current_result -> realign_flags & CORE_IS_GT_AG_DONORS)?'+':'-');
1521 		}
1522 
1523 		/*
1524 		if(global_context -> config.more_accurate_fusions)
1525 		{
1526 
1527 			sprintf(r->additional_information + strlen(r->additional_information), "\tDF:i:%d", current_result -> best_second_diff_bases >0?current_result -> best_second_diff_bases:9999 );
1528 		}*/
1529 
1530 
1531 	}
1532 
1533 	return is_r_OK;
1534 }
1535 
1536 
1537 int add_event_detected_from_cigar(global_context_t * global_context, unsigned int left_last_base, unsigned int right_first_base, int is_indel, int indel_len, int left_extend, int right_extend, int read_length, char * read_name)
1538 {
1539 	HashTable * event_table = NULL;
1540 	chromosome_event_t * event_space = NULL;
1541 	event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
1542 	event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
1543 
1544 	chromosome_event_t *site_events[MAX_EVENT_ENTRIES_PER_SITE];
1545 	int search_types ;
1546 	if(is_indel)	search_types = CHRO_EVENT_TYPE_INDEL;
1547 	else		search_types = CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION;
1548 
1549 	if(left_last_base > right_first_base) {
1550 		unsigned int ttt;
1551 		ttt = right_first_base;
1552 		right_first_base = left_last_base;
1553 		left_last_base = ttt;
1554 	}
1555 
1556 	int xk1, site_events_no = search_event(global_context, event_table , event_space , left_last_base, EVENT_SEARCH_BY_BOTH_SIDES , search_types , site_events);
1557 
1558 	int is_found = 0;
1559 	for(xk1 = 0; xk1 < site_events_no; xk1++){
1560 		chromosome_event_t * this_event = site_events[xk1];
1561 		if(is_indel && this_event -> event_type != CHRO_EVENT_TYPE_INDEL) continue;
1562 		if((!is_indel) && this_event -> event_type == CHRO_EVENT_TYPE_INDEL) continue;
1563 
1564 		#ifndef __MINGW32__
1565 		if(0 && ( memcmp(read_name, "V0112_0155:7:1104:17648:117432",28)==0 || ( this_event -> event_small_side  > 2613701363 - 200 && this_event -> event_small_side  < 2613701363 + 100) ))
1566 			SUBREADprintf("EVENT: L=%u, R=%u (TABLE L=%u , R=%u), LEXT=%d, REXT=%d -- %s\n", left_last_base, right_first_base, this_event -> event_small_side, this_event -> event_large_side, left_extend, right_extend, read_name);
1567 		#endif
1568 
1569 		if(this_event -> event_small_side == left_last_base &&  this_event -> event_large_side == right_first_base){
1570 			if(is_indel && this_event -> indel_length == indel_len){
1571 				this_event -> final_counted_reads ++;
1572 				is_found = 1;
1573 				break;
1574 			}
1575 			if(!is_indel){
1576 				this_event -> final_counted_reads ++;
1577 
1578 				//
1579 				if(read_length >= 80)
1580 				{
1581 					if(left_extend > 30 && right_extend > 30) this_event -> critical_supporting_reads ++;
1582 				}else{
1583 					if(left_extend > 18 && right_extend > 18) this_event -> critical_supporting_reads ++;
1584 				}
1585 				this_event -> junction_flanking_left = max(this_event -> junction_flanking_left, left_extend);
1586 				this_event -> junction_flanking_right = max(this_event -> junction_flanking_right, right_extend);
1587 				is_found = 1;
1588 				break;
1589 			}
1590 		}
1591 	}
1592 
1593 	if(!is_found){
1594 		//SUBREADprintf("\nEVENT NOT FOUND.\n\n");
1595 		return 1;
1596 	}
1597 	return 0;
1598 }
1599 
1600 int get_junction_right_extension(char * remainder_cigar){
1601 	int ret = 0;
1602 	unsigned int tmpi = 0;
1603 	int cigar_cursor, nch;
1604 
1605 	for(cigar_cursor = 0;;cigar_cursor++){
1606 		nch = remainder_cigar[cigar_cursor];
1607 		if(0 == nch)break;
1608 		if(isdigit(nch))tmpi = tmpi*10 + nch - '0';
1609 		else{
1610 			if(nch == 'M' || nch == 'D')
1611 				ret += tmpi;
1612 			if(nch == 'N' || nch == 'n' || nch == 'B' || nch == 'b')
1613 				break;
1614 
1615 			tmpi = 0;
1616 		}
1617 	}
1618 	return ret;
1619 }
1620 
1621 void remove_soft_clipping(char * dst, char * src){
1622 	unsigned int tmpi = 0, head_clip = 0, tail_clip = 0, last_M = 0;
1623 	int cigar_cursor, is_first_s = 1, nch, nch2;
1624 
1625 	dst[0] = 0;
1626 
1627 	for(cigar_cursor = 0;;cigar_cursor++){
1628 		nch = src[cigar_cursor];
1629 		nch2 = src[cigar_cursor+1];
1630 
1631 		if(0 == nch)break;
1632 		if(isdigit(nch))tmpi = tmpi*10 + nch - '0';
1633 		else{
1634 			if('S' == nch){
1635 				if(is_first_s)	head_clip = tmpi;
1636 				if(nch2 == 0)	tail_clip = tmpi;
1637 			}
1638 			else if('M' == nch){
1639 				last_M = tmpi;
1640 			}
1641 			else{
1642 				if(last_M){
1643 					sprintf(dst + strlen(dst), "%uM", last_M + head_clip);
1644 					last_M = 0;
1645 					head_clip = 0;
1646 				}
1647 				sprintf(dst + strlen(dst), "%u%c", tmpi, nch);
1648 			}
1649 			tmpi=0;
1650 			is_first_s = 0;
1651 		}
1652 	}
1653 	if(last_M){
1654 		sprintf(dst + strlen(dst), "%uM" , last_M + tail_clip + head_clip);
1655 	}
1656 
1657 }
1658 
1659 int getFirstM(char * cig){
1660 	int tmpi = 0, nch, x1;
1661 
1662 	for(x1=0; 0!=(nch = cig[x1]) ; x1++){
1663 		if(isdigit(nch)){
1664 			tmpi=tmpi*10 + nch - '0';
1665 		}else break;
1666 	}
1667 	return tmpi;
1668 }
1669 
1670 int calc_tlen(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int read_len_1, int  read_len_2);
1671 
1672 int calc_flags(global_context_t * global_context, thread_context_t * thread_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int is_second_read, int read_len_1, int read_len_2, int current_location_no , int tlen, int this_OK, int mate_OK)
1673 {
1674 	int ret, is_TLEN_wrong = 0;
1675 
1676 	if(global_context->input_reads.is_paired_end_reads)
1677 	{
1678 		ret  = SAM_FLAG_PAIRED_TASK;
1679 		ret |= is_second_read?SAM_FLAG_SECOND_READ_IN_PAIR:SAM_FLAG_FIRST_READ_IN_PAIR;
1680 
1681 		subread_output_tmp_t * this_rec = is_second_read?rec2:rec1;
1682 		subread_output_tmp_t * mate_rec = is_second_read?rec1:rec2;
1683 
1684 		if(this_OK == 0)  ret |= SAM_FLAG_UNMAPPED;
1685 		else
1686 			if(this_rec->strand + is_second_read == 1) ret |= SAM_FLAG_REVERSE_STRAND_MATCHED;
1687 
1688 		if(mate_OK == 0)  ret |= SAM_FLAG_MATE_UNMATCHED;
1689 		else
1690 			if(mate_rec->strand + is_second_read != 1) ret |= SAM_FLAG_MATE_REVERSE_STRAND_MATCHED;
1691 
1692 		if(rec1 && rec2)
1693 		{
1694 			int TLEN = tlen;//calc_tlen(global_context , rec1, rec2, read_len_1, read_len_2);
1695 			int is_PEM = 0;
1696 			if( rec1 -> chro == rec2 -> chro /* two pointers can be directly compared. */ && TLEN >= global_context->config.minimum_pair_distance &&
1697 			    TLEN <= global_context-> config.maximum_pair_distance &&  this_rec->strand == mate_rec->strand )
1698 			{
1699 				if(global_context -> config.is_first_read_reversed && !(global_context -> config.is_second_read_reversed))
1700 				{
1701 					if(this_rec -> strand == 0) {
1702 						if((is_second_read + (mate_rec-> offset > this_rec -> offset) == 1) || mate_rec-> offset == this_rec -> offset)
1703 							is_PEM = 1;
1704 						else is_TLEN_wrong = 1;
1705 					}
1706 				}
1707 				else
1708 				{
1709 					if(this_rec -> strand) {
1710 						if((is_second_read + (mate_rec-> offset < this_rec -> offset) == 1) || mate_rec-> offset == this_rec -> offset) is_PEM = 1;
1711 						else is_TLEN_wrong = 1;
1712 					}else {
1713 						if((is_second_read + (mate_rec-> offset > this_rec -> offset) == 1) || mate_rec-> offset == this_rec -> offset) is_PEM = 1;
1714 						else is_TLEN_wrong = 1;
1715 					}
1716 				}
1717 			}
1718 			if(is_PEM) ret |= SAM_FLAG_MATCHED_IN_PAIR;
1719 			else if(is_second_read){
1720 				if(rec1 -> chro != rec2 -> chro){
1721 					if(thread_context) thread_context -> not_properly_pairs_different_chro ++;
1722 					else global_context -> not_properly_pairs_different_chro++;
1723 				}else if(this_rec->strand != mate_rec->strand){
1724 					if(thread_context) thread_context -> not_properly_different_strands ++;
1725 					else global_context -> not_properly_different_strands ++;
1726 				}else if(TLEN < global_context->config.minimum_pair_distance || TLEN > global_context-> config.maximum_pair_distance){
1727 					if(thread_context) thread_context -> not_properly_pairs_TLEN_wrong ++;
1728 					else global_context -> not_properly_pairs_TLEN_wrong ++;
1729 				}else if(is_TLEN_wrong){
1730 					if(thread_context) thread_context -> not_properly_pairs_wrong_arrangement ++;
1731 					else global_context  -> not_properly_pairs_wrong_arrangement ++;
1732 				}
1733 
1734 			}
1735 		}
1736 	}
1737 	else
1738 	{
1739 		ret = 0;
1740 
1741 		if(this_OK == 0)  ret |= SAM_FLAG_UNMAPPED;
1742 		else
1743 			if(rec1->strand) ret |= SAM_FLAG_REVERSE_STRAND_MATCHED;
1744 	}
1745 
1746 	if(current_location_no>0){
1747 		if((rec1 && !is_second_read)||(rec2 && is_second_read))
1748 			ret |= SAM_FLAG_SECONDARY_MAPPING;
1749 	}
1750 
1751 	return ret;
1752 }
1753 
1754 
1755 int calc_tlen(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int read_len_1, int  read_len_2)
1756 {
1757 	int ret = -1;
1758 
1759 	unsigned int r1_head_pos = rec1 -> offset;// - rec1 -> soft_clipping_movements;
1760 	unsigned int r2_head_pos = rec2 -> offset;// - rec2 -> soft_clipping_movements;
1761 
1762 	if(r1_head_pos == r2_head_pos)
1763 		ret = max(read_len_1, read_len_2);	// the two reads are fully overlapping
1764 	else{
1765 		int is_r2_smaller = r2_head_pos < r1_head_pos;
1766 		subread_output_tmp_t * smaller_rec = is_r2_smaller?rec2:rec1;
1767 		unsigned int smaller_head_pos = is_r2_smaller?r2_head_pos:r1_head_pos;
1768 		unsigned int larger_head_pos = is_r2_smaller?r1_head_pos:r2_head_pos;
1769 
1770 		int len_larger_rec = is_r2_smaller?read_len_1:read_len_2;
1771 		int len_smaller_rec = is_r2_smaller?read_len_2:read_len_1;
1772 
1773 		unsigned int tmpi = 0;
1774 		unsigned int chro_cursor = smaller_head_pos;
1775 		unsigned int section_end = 0;
1776 		unsigned int read_cursor = 0;
1777 		int cigar_cursor, nch, nch2;
1778 		for(cigar_cursor = 0; ; cigar_cursor++){
1779 			nch = smaller_rec -> cigar[cigar_cursor];
1780 			nch2 = smaller_rec -> cigar[cigar_cursor+1];
1781 
1782 			if(nch > 0){
1783 				if(isdigit(nch)) tmpi = tmpi * 10 + nch - '0';
1784 				else{
1785 					if(nch == 'M' || nch == 'S'){
1786 						chro_cursor += tmpi;
1787 						read_cursor += tmpi;
1788 						section_end = chro_cursor;
1789 					}
1790 					if(nch == 'N' || nch == 'B' || nch == 'b' || nch == 'n' || nch == 'I' || nch == 'D' || nch2 == 0){
1791 						if(nch == 'N' || nch == 'D')
1792 							chro_cursor += tmpi;
1793 
1794 						//SUBREADprintf("NCH=%c, NCH2=%d, SEC_END=%u, LARGE_HEAD=%u\n", nch, nch2, section_end, larger_head_pos);
1795 						if(section_end >= larger_head_pos)
1796 						{
1797 							ret = read_cursor + larger_head_pos - section_end + len_larger_rec;
1798 							break;
1799 						}
1800 					}
1801 
1802 					if(nch == 'I'){
1803 						read_cursor += tmpi;
1804 					}
1805 
1806 					if(nch == 'B' || nch == 'b' || nch == 'n') break;	// fusion block, unable to determine the TLEN after the fusion point.
1807 
1808 					tmpi = 0;
1809 				}
1810 			}else{
1811 				break;
1812 			}
1813 
1814 		}
1815 
1816 		if(ret < 0)
1817 			ret  = larger_head_pos - section_end + len_larger_rec + len_smaller_rec;
1818 	}
1819 
1820 
1821 	//SUBREADprintf("TLEN = %d :: \tREC1 : %s:%u (%s  - %d) ; REC2: %s:%u (%s  - %d )\n", ret, rec1 -> chro, rec1 -> offset, rec1 -> cigar, rec1 -> soft_clipping_movements,
1822 	//								       rec2 -> chro, rec2 -> offset, rec2 -> cigar, rec2 -> soft_clipping_movements);
1823 
1824 	return ret;
1825 
1826 }
1827 
1828 int calc_should_reverse(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int is_second_read)
1829 {
1830 	subread_output_tmp_t * cur_rec = is_second_read?rec2:rec1;
1831 
1832 	if(cur_rec)
1833 		return cur_rec->strand;
1834 	else
1835 	{
1836 		return is_second_read;
1837 		//subread_output_tmp_t * mate_rec = is_second_read?rec2:rec1;
1838 		//return mate_rec?mate_rec->strand:is_second_read;
1839 	}
1840 }
1841 
1842 void remove_nm_i(char * st)
1843 {
1844 	char * st_nm = strstr(st, "\tNM:i:");
1845 	if(st_nm)
1846 	{
1847 		char * write_cursor = st_nm;
1848 		int started = 0;
1849 		st_nm++;
1850 		while(1)
1851 		{
1852 			char nch = *st_nm;
1853 			if(!nch)
1854 			{
1855 				*write_cursor=0;
1856 				break;
1857 			}
1858 			if(nch == '\t') started = 1;
1859 			if(started)
1860 			{
1861 				*write_cursor=*st_nm;
1862 				write_cursor++;
1863 			}
1864 			st_nm++;
1865 		}
1866 	}
1867 }
1868 
1869 double last_t1 = 0;
1870 int for_other_thread = 0;
1871 int for_one_threads = 0;
1872 
1873 
1874 void add_buffered_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number ,
1875 	char * read_name1, unsigned int flags1, char * chro_name1, unsigned int chro_position1, int mapping_quality1, char * cigar1,
1876 	char * next_chro_name1, unsigned int next_chro_pos1, int temp_len1, int read_len1,
1877 	char * read_text1, char * qual_text1, char * additional_columns1,
1878 	char * read_name2, unsigned int flags2, char * chro_name2, unsigned int chro_position2, int mapping_quality2, char * cigar2,
1879 	char * next_chro_name2, unsigned int next_chro_pos2, int temp_len2, int read_len2,
1880 	char * read_text2, char * qual_text2, char * additional_columns2,
1881 	int all_locations, int this_location
1882 	){
1883 
1884 	assert(global_context -> config.all_threads > 1);
1885 
1886 	if( global_context -> config.is_BAM_output  && !global_context -> config.is_input_read_order_required){
1887 		//SUBREADprintf("MTBAM : THR_%d CORE: %s\n", thread_context -> thread_id, read_name1);
1888 		SamBam_writer_add_read(global_context -> output_bam_writer, thread_context -> thread_id, read_name1, flags1, chro_name1, chro_position1, mapping_quality1, cigar1, next_chro_name1, next_chro_pos1, temp_len1, read_len1, read_text1, qual_text1, additional_columns1, !global_context->input_reads.is_paired_end_reads);
1889 		if(global_context->input_reads.is_paired_end_reads)
1890 			SamBam_writer_add_read(global_context -> output_bam_writer, thread_context -> thread_id, read_name2, flags2, chro_name2, chro_position2, mapping_quality2, cigar2, next_chro_name2, next_chro_pos2, temp_len2, read_len2, read_text2, qual_text2, additional_columns2, 1);
1891 	}else{
1892 		while(1){
1893 			int is_fin = 0;
1894 			subread_lock_occupy(&global_context -> output_lock);
1895 			//SUBREADprintf("THWT : TH_%d IS AFTER %lld ; LAST=%lld (%d/%d)\n", thread_context -> thread_id, pair_number -1, global_context -> last_written_fragment_number, this_location, all_locations);
1896 			if(global_context -> last_written_fragment_number == pair_number-1){
1897 				if(global_context -> config.is_BAM_output){
1898 					SamBam_writer_add_read(global_context -> output_bam_writer, -1, read_name1, flags1,  chro_name1 , chro_position1, mapping_quality1, cigar1, next_chro_name1 , next_chro_pos1, temp_len1, read_len1, read_text1, qual_text1, additional_columns1, !global_context->input_reads.is_paired_end_reads);
1899 
1900 					if(global_context->input_reads.is_paired_end_reads)
1901 						SamBam_writer_add_read(global_context -> output_bam_writer, -2, read_name2, flags2,  chro_name2 , chro_position2, mapping_quality2, cigar2, next_chro_name2 , next_chro_pos2, temp_len2, read_len2, read_text2, qual_text2, additional_columns2, 1);
1902 
1903 				}else{
1904 					int write_len_2 = 100, write_len = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%d\t%s\t%s%s%s\n", read_name1, flags1, chro_name1, chro_position1, mapping_quality1, cigar1, next_chro_name1, next_chro_pos1,  temp_len1, read_text1, qual_text1, additional_columns1[0]?"\t":"", additional_columns1);
1905 					if(global_context->input_reads.is_paired_end_reads)
1906 						write_len_2 = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%d\t%s\t%s%s%s\n", read_name2, flags2, chro_name2, chro_position2, mapping_quality2, cigar2, next_chro_name2, next_chro_pos2,  temp_len2, read_text2, qual_text2, additional_columns2[0]?"\t":"", additional_columns2);
1907 
1908 					if( write_len < 10 || write_len_2 < 10 ){
1909 						global_context -> output_sam_is_full = 1;
1910 					}
1911 				}
1912 				if(all_locations <= this_location +1)global_context -> last_written_fragment_number = pair_number;
1913 				is_fin = 1;
1914 			}
1915 			subread_lock_release(&global_context -> output_lock);
1916 			if(is_fin)break;
1917 			usleep(2);
1918 		}
1919 	}
1920 }
1921 
1922 #define write_chunk_results_145 write_chunk_results
1923 
1924 // rec1 or rec2 is OK if they are not NULL.
1925 void write_single_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_output_tmp_t * rec1, realignment_result_t * raw_r1, subread_output_tmp_t * rec2, realignment_result_t * raw_r2, int all_locations , int current_location , char * read_name_1, char * read_name_2, int read_len_1, int read_len_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2, subread_read_number_t pair_number, int non_informative_subread_r1, int non_informative_subread_r2, int is_R1_OK, int is_R2_OK)
1926 {
1927 
1928 	//assert(all_locations <= global_context -> config.reported_multi_best_reads);
1929 	int tlen = 0;
1930 
1931 	if(is_R1_OK && is_R2_OK && rec1->chro == rec2->chro)tlen = calc_tlen(global_context , rec1, rec2,  read_len_1,  read_len_2);
1932 
1933 	//#warning "SUBREAD_151 ============= REMOVE '+1' when not doing structure variance detection for the proposal  =========="
1934 	if(0)
1935 	if((global_context ->  config.do_fusion_detection || global_context ->  config.do_long_del_detection) && rec1 && rec2 && current_location == 0 && rec1 -> chimeric_sections == 1 && rec2 -> chimeric_sections == 1){
1936 		// when current_location == 0, both r1 and r2 were on the same strand.
1937 		int is_funky = is_funky_fragment(global_context, read_name_1, rec1 -> chro, rec1 -> offset, read_len_1, rec1 -> strand, rec1 -> out_cigars[0], read_text_1, read_name_2, rec2 -> chro, rec2 -> offset, read_len_2, rec2 -> strand, rec2 -> out_cigars[0] , read_text_2, tlen);
1938 		if (0 &&is_funky)
1939 			SUBREADprintf("RNAME %s is %d, CHRO = '%s' %p  '%s' %p, POS=%u, %u\n", read_name_1, is_funky, rec1 -> chro,rec1 -> chro,rec2 -> chro,rec2 -> chro, rec1 -> offset, rec2 -> offset);
1940 
1941 		if(is_funky & FUNKY_FRAGMENT_A){
1942 			fraglist_append(&global_context -> funky_list_A, pair_number);
1943 		}
1944 		if(1)if(is_funky & FUNKY_FRAGMENT_BC){
1945 			//#warning "LOGIC WRONG: R1 AND R2 SHOULD BE DECIDED BY THEIR MAPPING POSITIONS"
1946 			bktable_append(&global_context -> funky_table_BC, rec1 -> chro, rec1 -> offset , NULL + (2*pair_number));
1947 			bktable_append(&global_context -> funky_table_BC, rec2 -> chro, rec2 -> offset , NULL + (2*pair_number+1));
1948 		}
1949 		if(1)if(is_funky & FUNKY_FRAGMENT_DE){
1950 			fraglist_append(&global_context -> funky_list_DE, pair_number);
1951 			bktable_append(&global_context -> funky_table_DE, rec1 -> chro, rec1 -> offset , NULL + (2*pair_number + (rec1 -> offset > rec2 -> offset ? 1:0)));
1952 			bktable_append(&global_context -> funky_table_DE, rec2 -> chro, rec2 -> offset , NULL + (2*pair_number + (rec1 -> offset < rec2 -> offset ? 1:0)));
1953 		}
1954 	}
1955 
1956 	int flag1 = calc_flags( global_context, thread_context , rec1, rec2, 0,  read_len_1,  read_len_2, current_location, tlen, is_R1_OK, is_R2_OK);
1957 
1958 	int flag2 = -1;
1959 
1960 	if(global_context->input_reads.is_paired_end_reads)
1961 	{
1962 		flag2 = calc_flags( global_context , thread_context, rec1, rec2, 1,  read_len_1,  read_len_2, current_location, tlen, is_R2_OK, is_R1_OK);
1963 		if((0 == current_location) && (flag2 & SAM_FLAG_MATCHED_IN_PAIR)){
1964 			if(thread_context)thread_context->all_correct_PE_reads  ++;
1965 			else global_context->all_correct_PE_reads  ++;
1966 		}
1967 	}
1968 
1969 	int applied_reverse_space;
1970 	applied_reverse_space = global_context->config.space_type;
1971 	if(global_context -> config.convert_color_to_base)
1972 	{
1973 
1974 		//SUBREADprintf("BEST_READ_NO = %d / %d\n", current_location, all_locations);
1975 		//SUBREADprintf("ORGI: %s\n", read_text_1);
1976 		colorread2base(read_text_1, read_len_1+1);
1977 		//SUBREADprintf("CONV: %s\n\n", read_text_1);
1978 
1979 		if(global_context->input_reads.is_paired_end_reads)
1980 			colorread2base(read_text_2, read_len_2+1);
1981 
1982 		applied_reverse_space = GENE_SPACE_BASE;
1983 	}
1984 
1985 	int should_1_reverse = calc_should_reverse( global_context , rec1, rec2, 0);
1986 
1987 	if(should_1_reverse)
1988 	{
1989 		reverse_read(read_text_1, read_len_1 + global_context->config.convert_color_to_base, applied_reverse_space);
1990 		reverse_quality(qual_text_1, read_len_1);
1991 	}
1992 
1993 	if(global_context->input_reads.is_paired_end_reads)
1994 	{
1995 		int should_2_reverse = calc_should_reverse( global_context , rec1, rec2, 1);
1996 
1997 		if(should_2_reverse)
1998 		{
1999 			reverse_read(read_text_2, read_len_2 + global_context->config.convert_color_to_base, applied_reverse_space);
2000 			reverse_quality(qual_text_2, read_len_2);
2001 		}
2002 	}
2003 	remove_backslash(read_name_1);
2004 	if(global_context->input_reads.is_paired_end_reads) remove_backslash(read_name_2);
2005 
2006 	int display_offset1 = 0, display_tailgate1 = 0;
2007 	int display_offset2 = 0, display_tailgate2 = 0;
2008 
2009 	if(global_context -> config.space_type == GENE_SPACE_COLOR)
2010 	{
2011 
2012 		if(rec1 && rec1 -> strand)
2013 		{
2014 			display_offset1 = 0;
2015 			display_tailgate1 = 1;
2016 		}
2017 		else
2018 		{
2019 			if(!global_context -> config.convert_color_to_base)
2020 			{
2021 				// the first base was a fake prime base; the second base is the first meaningful base.
2022 				char second_char = read_text_1[1];
2023 				read_text_1[1] = color2char(second_char, read_text_1[0]);
2024 			}
2025 			display_offset1 = 1;
2026 			display_tailgate1 = 0;
2027 		}
2028 
2029 		if(rec2 && rec2 -> strand)
2030 		{
2031 			if(!global_context -> config.convert_color_to_base)
2032 			{
2033 				// the first base was a fake prime base; the second base is the first meaningful base.
2034 				char second_char = read_text_2[1];
2035 				read_text_2[1] = color2char(second_char, read_text_2[0]);
2036 			}
2037 			display_offset2 = 1;
2038 			display_tailgate2 = 0;
2039 		}
2040 		else
2041 		{
2042 			display_offset2 = 0;
2043 			display_tailgate2 = 1;
2044 		}
2045 		assert(display_offset1 + display_tailgate1 == 1);
2046 		assert(display_offset2 + display_tailgate2 == 1);
2047 	}
2048 	if(!qual_text_1[0])
2049 	{
2050 		int xi2;
2051 		for(xi2=display_offset1;read_text_1[xi2 + display_tailgate1];xi2++) qual_text_1[xi2 - display_offset1] = 'I';
2052 		qual_text_1[xi2 - display_offset1]=0;
2053 		for(xi2=display_offset2;read_text_2[xi2 + display_tailgate2];xi2++) qual_text_2[xi2 - display_offset2] = 'I';
2054 		qual_text_2[xi2 - display_offset2]=0;
2055 
2056 	}
2057 
2058 
2059 
2060 	if(display_tailgate1)
2061 		read_text_1[strlen(read_text_1)-1]=0;
2062 	if(display_tailgate2)
2063 		read_text_2[strlen(read_text_2)-1]=0;
2064 
2065 
2066 	if(global_context -> config.space_type == GENE_SPACE_BASE){
2067 		if(is_R1_OK) {// && !strstr(rec1->additional_information, "\tNM:i:")){
2068 			short rec1_edit = calc_edit_dist(global_context, rec1->raw_result, rec1->cigar , rec1->linear_position, read_text_1, raw_r1 -> final_mismatched_bases);
2069 			sprintf(rec1->additional_information + strlen( rec1->additional_information), "\tNM:i:%d", rec1_edit );
2070 		}
2071 		if(global_context->input_reads.is_paired_end_reads && is_R2_OK) //&& !strstr(rec2->additional_information, "\tNM:i:"))
2072 		{
2073 			short rec2_edit = calc_edit_dist(global_context, rec2->raw_result, rec2->cigar , rec2->linear_position, read_text_2, raw_r2 -> final_mismatched_bases);
2074 			sprintf(rec2->additional_information + strlen( rec2->additional_information), "\tNM:i:%d", rec2_edit );
2075 		}
2076 	}
2077 
2078 	char extra_additional_1 [1000+CORE_ADDITIONAL_INFO_LENGTH], extra_additional_2[1000+CORE_ADDITIONAL_INFO_LENGTH];
2079 	int extra_additional_1_ptr=0, extra_additional_2_ptr=0;
2080 
2081 	extra_additional_1[0] = extra_additional_2[0]=0;
2082 
2083 	if(is_R1_OK || is_R2_OK){	// no NH and HI only if both ends are unmapped.
2084 		extra_additional_1_ptr +=snprintf(extra_additional_1, 310, "HI:i:%d\tNH:i:%d", current_location+1, all_locations);
2085 		extra_additional_2_ptr +=snprintf(extra_additional_2, 310, "HI:i:%d\tNH:i:%d", current_location+1, all_locations);
2086 	}
2087 
2088 	if(global_context->config.read_group_id[0])
2089 	{
2090 		extra_additional_1_ptr += snprintf(extra_additional_1+ extra_additional_1_ptr, 310, "\tRG:Z:%s", global_context->config.read_group_id);
2091 		extra_additional_2_ptr += snprintf(extra_additional_2+ extra_additional_2_ptr, 310, "\tRG:Z:%s", global_context->config.read_group_id);
2092 	}
2093 
2094 	char * out_chro1, * out_chro2, *out_cigar1, *out_cigar2;
2095 	if(is_R1_OK)
2096 	{
2097 		strcpy(extra_additional_1 + extra_additional_1_ptr, rec1->additional_information);
2098 		out_chro1 = rec1->chro;
2099 		out_cigar1 = rec1->cigar;
2100 	}
2101 	else
2102 	{
2103 		out_chro1 = "*";
2104 		out_cigar1 = "*";
2105 	}
2106 
2107 	if(is_R2_OK)
2108 	{
2109 		strcpy(extra_additional_2 + extra_additional_2_ptr, rec2->additional_information);
2110 		out_chro2 = rec2->chro;
2111 		out_cigar2 = rec2->cigar;
2112 	}
2113 	else
2114 	{
2115 		out_chro2 = "*";
2116 		out_cigar2 = "*";
2117 	}
2118 
2119 	int out_offset1=0, out_offset2=0;
2120 	long long int out_tlen1, out_tlen2;
2121 	int  out_mapping_quality1 = 0, out_mapping_quality2 = 0;
2122 
2123 	out_tlen1 = tlen;
2124 	out_tlen2 = tlen;
2125 	if(is_R1_OK && is_R2_OK)
2126 	{
2127 		if( rec1->offset >  rec2->offset) out_tlen1 = - out_tlen1;
2128 		else if(rec2->offset > rec1->offset) out_tlen2 = -out_tlen2;
2129 		else{
2130 			if( rec1 -> strand )  out_tlen1 = - out_tlen1;
2131 			else out_tlen2 = -out_tlen2;
2132 		}
2133 	}
2134 
2135 	if(0==current_location)
2136 	{
2137 		if(global_context -> input_reads.is_paired_end_reads)
2138 		{
2139 			if(is_R1_OK || is_R2_OK){
2140 				//SUBREADprintf("MAPTOLEN\n");
2141 				if(thread_context)
2142 					thread_context -> all_mapped_reads ++;
2143 				else
2144 					global_context -> all_mapped_reads ++;
2145 			}
2146 		}
2147 		else if(is_R1_OK){
2148 			if(thread_context)
2149 				thread_context -> all_mapped_reads ++;
2150 			else
2151 				global_context -> all_mapped_reads ++;
2152 		}
2153 	}
2154 
2155 	if(is_R1_OK){
2156 		out_offset1 = max(1, rec1->offset);
2157 		out_mapping_quality1 = rec1->mapping_quality;
2158 	}
2159 	if(is_R2_OK){
2160 		out_offset2 = max(1, rec2->offset);
2161 		out_mapping_quality2 = rec2->mapping_quality;
2162 	}
2163 
2164 
2165 	char * mate_chro_for_1  = out_chro2;
2166 	char * mate_chro_for_2  = out_chro1;
2167 
2168 	if(out_chro1 == out_chro2 && out_chro1 && out_chro1[0]!='*'){
2169 		mate_chro_for_1="=";
2170 		mate_chro_for_2="=";
2171 	}
2172 
2173 	//if(pair_number<10)SUBREADprintf("FIN_ADD #%d: %s in %p \n", pair_number, read_name_1, thread_context);
2174 
2175 	if(thread_context)
2176 		add_buffered_fragment(global_context, thread_context, pair_number,
2177 			read_name_1, flag1,  out_chro1 , out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1 , out_offset2, out_tlen1, read_len_1,
2178 			read_text_1 + display_offset1, qual_text_1, extra_additional_1,
2179 			read_name_2, flag2,  out_chro2 , out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2 , out_offset1, out_tlen2, read_len_2,
2180 			read_text_2 + display_offset2, qual_text_2, extra_additional_2, all_locations, current_location);
2181 	else{
2182 		//subread_lock_occupy(&global_context -> output_lock);
2183 		if(global_context -> config.is_BAM_output){
2184 			SamBam_writer_add_read(global_context -> output_bam_writer, -1, read_name_1, flag1,  out_chro1 , out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1 , out_offset2, out_tlen1, read_len_1, read_text_1 + display_offset1, qual_text_1, extra_additional_1, !global_context->input_reads.is_paired_end_reads);
2185 
2186 			if(global_context->input_reads.is_paired_end_reads)
2187 				SamBam_writer_add_read(global_context -> output_bam_writer, -1, read_name_2, flag2,  out_chro2 , out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2 , out_offset1, out_tlen2, read_len_2, read_text_2 + display_offset2, qual_text_2, extra_additional_2, 1);
2188 		}
2189 		else
2190 		{
2191 			int write_len_2 = 100,
2192 			#ifdef __MINGW32__
2193 			write_len = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%I64d\t%s\t%s%s%s\n", read_name_1, flag1, out_chro1, out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1, out_offset2, out_tlen1, read_text_1 + display_offset1, qual_text_1, extra_additional_1[0]?"\t":"", extra_additional_1);
2194 			#else
2195 			write_len = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_1, flag1, out_chro1, out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1, out_offset2, out_tlen1, read_text_1 + display_offset1, qual_text_1, extra_additional_1[0]?"\t":"", extra_additional_1);
2196 			#endif
2197 			if(global_context->input_reads.is_paired_end_reads)
2198 			#ifdef __MINGW32__
2199 				write_len_2 = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%I64d\t%s\t%s%s%s\n", read_name_2, flag2, out_chro2, out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2, out_offset1, out_tlen2, read_text_2 + display_offset2, qual_text_2, extra_additional_2[0]?"\t":"", extra_additional_2);
2200 			#else
2201 				write_len_2 = sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_2, flag2, out_chro2, out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2, out_offset1, out_tlen2, read_text_2 + display_offset2, qual_text_2, extra_additional_2[0]?"\t":"", extra_additional_2);
2202 			#endif
2203 
2204 			if( write_len < 10 || write_len_2 < 10 ){
2205 				global_context -> output_sam_is_full = 1;
2206 			}
2207 		}
2208 		//subread_lock_release(&global_context -> output_lock);
2209 	}
2210 
2211 
2212 	if(is_R1_OK) rec1->raw_result->selected_position += rec1->soft_clipping_movements;
2213 	if(is_R2_OK) rec2->raw_result->selected_position += rec2->soft_clipping_movements;
2214 
2215 }
2216 
2217 
2218 void init_chunk_scanning_parameters(global_context_t * global_context, thread_context_t * thread_context, gene_input_t ** ginp1, gene_input_t ** ginp2)
2219 {
2220 	*ginp2 = NULL;
2221 	*ginp1 = &global_context->input_reads.first_read_file;
2222 	if(global_context->input_reads.is_paired_end_reads)
2223 		*ginp2 = &global_context->input_reads.second_read_file;
2224 }
2225 
2226 gene_value_index_t * find_current_value_index(global_context_t * global_context, unsigned int pos, int len)
2227 {
2228 	int block_no;
2229 	if(global_context->index_block_number<2)
2230 	{
2231 		unsigned index_begin = global_context -> all_value_indexes [0] . start_base_offset;
2232 		unsigned index_end = global_context -> all_value_indexes [0] . start_base_offset + global_context -> all_value_indexes [0] . length;
2233 
2234 		if(pos>=index_begin && pos + len<index_end)
2235 			return & global_context->all_value_indexes [0];
2236 		else return NULL;
2237 	}
2238 	else
2239 		for(block_no=0; block_no<global_context->index_block_number; block_no++)
2240 		{
2241 			unsigned index_begin = global_context -> all_value_indexes [block_no] . start_base_offset;
2242 			unsigned index_end = global_context -> all_value_indexes [block_no] . start_base_offset + global_context -> all_value_indexes [block_no] . length;
2243 			if((block_no == 0 && pos >=  index_begin && pos < index_end - 1000000) ||
2244 			   (block_no>0 && block_no<global_context->index_block_number-1 && pos >= index_begin+ 1000000 && pos < index_end - 1000000) ||
2245 			   (block_no == global_context->index_block_number-1 && pos >= index_begin + 1000000 && pos < index_end ))
2246 			{
2247 				return & global_context -> all_value_indexes [block_no];
2248 			}
2249 		}
2250 	return NULL;
2251 }
2252 //this function selects the correct all_value_indexes from global_context and put it to global_context or thread_context if thread_context is not NULL.
2253 int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, mapping_result_t * result, int rlen)
2254 {
2255 	int block_no;
2256 
2257 	if(global_context->index_block_number<2)
2258 	{
2259 		unsigned index_begin = global_context -> all_value_indexes [0] . start_base_offset;
2260 		unsigned index_end = global_context -> all_value_indexes [0] . start_base_offset + global_context -> all_value_indexes [0] . length;
2261 
2262 		//SUBREADprintf("RESET2 : %u should <= %u\n",  result->selected_position + rlen, index_end);
2263 
2264 		if(result->selected_position>=index_begin && result->selected_position + rlen<=index_end)
2265 		{
2266 			if(thread_context)thread_context->current_value_index = & global_context->all_value_indexes [0];
2267 			else global_context->current_value_index =& global_context->all_value_indexes [0];
2268 			return 0;
2269 		}
2270 		else return 1;
2271 	}
2272 	for(block_no=0; block_no<global_context->index_block_number; block_no++)
2273 	{
2274 		unsigned index_begin = global_context -> all_value_indexes [block_no] . start_base_offset;
2275 		unsigned index_end = global_context -> all_value_indexes [block_no] . start_base_offset + global_context -> all_value_indexes [block_no] . length;
2276 		if((block_no == 0 && result->selected_position >=  index_begin && result->selected_position < index_end - 1000000) ||
2277 		   (block_no>0 && block_no<global_context->index_block_number-1 && result->selected_position >= index_begin+ 1000000 && result->selected_position < index_end - 1000000) ||
2278 		   (block_no == global_context->index_block_number-1 && result->selected_position >= index_begin + 1000000 && result->selected_position < index_end ))
2279 		{
2280 			if(thread_context)thread_context->current_value_index =& global_context -> all_value_indexes [block_no];
2281 			else global_context->current_value_index =& global_context -> all_value_indexes [block_no];
2282 			return 0;
2283 		}
2284 	}
2285 	return 1;
2286 }
2287 
2288 
2289 
2290 
2291 int do_iteration_one(global_context_t * global_context, thread_context_t * thread_context)
2292 {
2293 	assert(0); // not needed anymore
2294 	return 0;
2295 }
2296 
2297 
2298 
2299 int finish_iteration_three(global_context_t * global_context, thread_context_t * thread_context)
2300 {
2301 	return 0;
2302 }
2303 int do_iteration_three(global_context_t * global_context, thread_context_t * thread_context)
2304 {
2305 	int ret;
2306 	gene_input_t * ginp1 = NULL , * ginp2 = NULL;
2307 	subread_read_number_t current_read_number=0;
2308 	char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
2309 	char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
2310 	char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
2311 	int read_len_1, read_len_2=0, sqr_interval, sqr_read_number = 0;
2312 
2313 	//unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
2314 	//unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
2315 
2316 	print_in_box(80,0,0,"Prepare for long indel deleteion...");
2317 	init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
2318 	sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
2319 
2320 	while(1)
2321 	{
2322 		int is_second_read;
2323 
2324 		sqr_read_number++;
2325 		ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 1, &current_read_number);
2326 		if(ret) break;
2327 
2328 		int best_read_id = 0;
2329 		for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
2330 		{
2331 			int is_reversed_already = 0;
2332 			for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
2333 			{
2334 				mapping_result_t current_result_body;
2335 				mapping_result_t mate_result_body;
2336 
2337 				mapping_result_t * current_result = &current_result_body, *mate_result = NULL;
2338 				bigtable_readonly_result(global_context, NULL, current_read_number, best_read_id, is_second_read, current_result, NULL);
2339 
2340 				if(global_context -> input_reads.is_paired_end_reads){
2341 					mate_result   = &mate_result_body;
2342 					bigtable_readonly_result(global_context, NULL, current_read_number, best_read_id,!is_second_read, mate_result, NULL);
2343 				}
2344 
2345 				//if(best_read_id && (current_result->selected_votes <1)) break;
2346 
2347 				char * current_read_name =  is_second_read?read_name_2 : read_name_1;
2348 				char * current_read =  is_second_read?read_text_2 : read_text_1;
2349 				char * current_qual =  is_second_read?qual_text_2 : qual_text_1;
2350 				int current_rlen = is_second_read?read_len_2:read_len_1;
2351 				int mate_rlen = is_second_read?read_len_1:read_len_2;
2352 
2353 				if((current_result->result_flags & CORE_IS_FULLY_EXPLAINED) || (global_context -> input_reads.is_paired_end_reads &&(mate_result -> result_flags & CORE_IS_FULLY_EXPLAINED)))
2354 				{
2355 					//SUBREADprintf("BUILD BLOCK FOR READ_%d '%s' BEST=%d\n", is_second_read + 1, current_read_name , best_read_id);
2356 					if(current_result->result_flags & CORE_IS_FULLY_EXPLAINED)
2357 					{
2358 						int is_negative_strand = ((current_result  -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0);
2359 
2360 						if(is_negative_strand + is_reversed_already ==1)
2361 						{
2362 							reverse_read(current_read, current_rlen,  global_context->config.space_type);
2363 							reverse_quality(current_qual, current_rlen);
2364 							is_reversed_already=!is_reversed_already;
2365 						}
2366 
2367 						build_local_reassembly(global_context , thread_context , current_read_number, current_read_name , current_read , current_qual , current_rlen, 0 , is_second_read, best_read_id, 0, current_result, mate_result);
2368 
2369 					}
2370 					else if(global_context -> input_reads.is_paired_end_reads && (mate_result -> result_flags & CORE_IS_FULLY_EXPLAINED))
2371 					{
2372 						int is_negative_strand = ((mate_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0);
2373 						if(is_negative_strand+is_reversed_already==1)
2374 						{
2375 							reverse_read(current_read, current_rlen,  global_context->config.space_type);
2376 							reverse_quality(current_qual, current_rlen);
2377 							is_reversed_already=!is_reversed_already;
2378 						}
2379 
2380 						build_local_reassembly(global_context , thread_context , current_read_number , current_read_name , current_read , current_qual , current_rlen, mate_rlen , is_second_read, best_read_id, 1, current_result, mate_result);
2381 					}
2382 				}
2383 			}
2384 		}
2385 
2386 		if(!thread_context || thread_context->thread_id == 0)
2387 		{
2388 			if(sqr_read_number>sqr_interval)
2389 			{
2390 				show_progress(global_context, thread_context, current_read_number, STEP_ITERATION_THREE);
2391 				sqr_read_number = 0;
2392 			}
2393 		}
2394 	}
2395 
2396 	return 0;
2397 
2398 }
2399 
2400 
2401 void add_realignment_event_support(global_context_t * global_context , realignment_result_t * res){
2402 	int xk1;
2403 	indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
2404 
2405 	for(xk1 = 0; xk1 < MAX_EVENTS_IN_READ ; xk1++){
2406 		chromosome_event_t *sup = res -> supporting_chromosome_events[xk1];
2407 		if(!sup)break;
2408 
2409 		int lock_hash = sup -> global_event_id % EVENT_BODY_LOCK_BUCKETS;
2410 		subread_lock_occupy(indel_context -> event_body_locks+lock_hash);
2411 		sup -> final_counted_reads ++;
2412 		sup -> junction_flanking_left = max(sup -> junction_flanking_left, res -> flanking_size_left[xk1]);
2413 		sup -> junction_flanking_right = max(sup -> junction_flanking_right, res -> flanking_size_right[xk1]);
2414 		subread_lock_release(indel_context -> event_body_locks+lock_hash);
2415 	}
2416 }
2417 
2418 unsigned int calc_end_pos(unsigned int p, char * cigar, unsigned int * all_skipped_len, int * is_exonic_regions, global_context_t * global_context);
2419 void test_PE_and_same_chro_align(global_context_t * global_context , realignment_result_t * res1, realignment_result_t * res2, int * is_exonic_regions, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * rname, int * res_tlen);
2420 void write_realignments_for_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_output_context_t * out_context, unsigned int read_number, realignment_result_t * res1, realignment_result_t * res2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2 , int rlen1 , int rlen2, int multi_mapping_number, int this_multi_mapping_i, int non_informative_subreads_r1, int non_informative_subreads_r2){
2421 
2422 	int is_2_OK = 0, is_1_OK = 0;
2423 
2424 	if(res1){
2425 		is_1_OK = convert_read_to_tmp(global_context , out_context, read_number,  0, rlen1, read_text_1, qual_text_1, res1, out_context -> r1, read_name_1);
2426 		if(is_1_OK) add_realignment_event_support(global_context, res1);
2427 	}
2428 	if(res2){
2429 		is_2_OK = convert_read_to_tmp(global_context , out_context, read_number,  1, rlen2, read_text_2, qual_text_2, res2, out_context -> r2, read_name_2);
2430 		if(is_2_OK) add_realignment_event_support(global_context, res2);
2431 	}
2432 
2433 	subread_output_tmp_t * r1_output = NULL;
2434 	subread_output_tmp_t * r2_output = NULL;
2435 
2436 	if(res1){
2437 		r1_output = out_context -> r1;
2438 	}
2439 
2440 	if(res2){
2441 		r2_output = out_context -> r2;
2442 	}
2443 
2444 	if(this_multi_mapping_i < 1){
2445 		if( is_1_OK == 0 && is_2_OK == 0)
2446 			if(thread_context) thread_context -> all_unmapped_reads ++;
2447 			else global_context -> all_unmapped_reads ++;
2448 		else if( is_1_OK == 0 || is_2_OK == 0){
2449 			if(thread_context) thread_context -> not_properly_pairs_only_one_end_mapped ++;
2450 			else global_context -> not_properly_pairs_only_one_end_mapped ++;
2451 
2452 			if((is_1_OK && (res1 -> realign_flags & CORE_IS_BREAKEVEN )) ||
2453 			   (is_2_OK && (res2 -> realign_flags & CORE_IS_BREAKEVEN))) {
2454 				if(thread_context) thread_context -> all_multimapping_reads ++;
2455 				else global_context -> all_multimapping_reads ++;
2456 			}else{
2457 				if(thread_context) thread_context -> all_uniquely_mapped_reads ++;
2458 				else global_context -> all_uniquely_mapped_reads ++;
2459 			}
2460 		}else{
2461 			if(res1 -> realign_flags & CORE_IS_BREAKEVEN){
2462 				if(thread_context) thread_context -> all_multimapping_reads ++;
2463 				else global_context -> all_multimapping_reads ++;
2464 			}else{
2465 				if(thread_context) thread_context -> all_uniquely_mapped_reads ++;
2466 				else global_context -> all_uniquely_mapped_reads ++;
2467 			}
2468 		}
2469 	}
2470 
2471 
2472 	if((!global_context->config.ignore_unmapped_reads) || (is_2_OK || is_1_OK))
2473 		write_single_fragment(global_context, thread_context, r1_output, res1, r2_output, res2, multi_mapping_number , this_multi_mapping_i , read_name_1, read_name_2, rlen1, rlen2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_number, non_informative_subreads_r1, non_informative_subreads_r2, is_1_OK, is_2_OK);
2474 }
2475 
2476 
2477 void clear_repeated_buffer(global_context_t * global_context, unsigned int * repeated_buffer_position, char ** repeated_buffer_cigar, int * repeated_count){
2478 	(*repeated_count) = 0;
2479 }
2480 int add_repeated_buffer(global_context_t * global_context, unsigned int * repeated_buffer_position, char ** repeated_buffer_cigar, int * repeated_count, realignment_result_t * res1, realignment_result_t * res2){
2481 	int x1, is_repeated = 0;
2482 	char * r1_cigar = "*";
2483 	unsigned int r1_pos = 0;
2484 
2485 	if(res1){
2486 		r1_cigar = res1 -> cigar_string;
2487 		r1_pos = res1 -> first_base_position;
2488 	}
2489 
2490 	char * r2_cigar = "*";
2491 	unsigned int r2_pos = 0;
2492 
2493 	if(res2){
2494 		r2_cigar = res2 -> cigar_string;
2495 		r2_pos = res2 -> first_base_position;
2496 	}
2497 
2498 	for(x1 = 0; x1 < (*repeated_count); x1 += 2){
2499 		//if(strcmp(r1_cigar, "48M2769N53M")==0)
2500 		//	SUBREADprintf("KYKY %u=%u  %s=%s\n" , r1_pos, repeated_buffer_position[x1], r1_cigar , repeated_buffer_cigar[x1]);
2501 		if(repeated_buffer_position[x1] == r1_pos && repeated_buffer_position[x1+1] == r2_pos)
2502 			if(strcmp(repeated_buffer_cigar[x1], r1_cigar) == 0 && strcmp(repeated_buffer_cigar[x1+1], r2_cigar) == 0)
2503 			{
2504 				is_repeated = 1;
2505 				break;
2506 			}
2507 	}
2508 
2509 	if( (!is_repeated) && (*repeated_count) < MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads){
2510 
2511 
2512 		//if(strcmp(r1_cigar, "48M2769N53M")==0)
2513 		//	SUBREADprintf("CPCP %u=%u  %s=%s\n" , r1_pos, repeated_buffer_position[x1], r1_cigar , repeated_buffer_cigar[x1]);
2514 		repeated_buffer_position[*repeated_count] = r1_pos;
2515 		repeated_buffer_position[1 + *repeated_count] = r2_pos;
2516 		strcpy(repeated_buffer_cigar[*repeated_count], r1_cigar);
2517 		strcpy(repeated_buffer_cigar[1 + *repeated_count], r2_cigar);
2518 		(*repeated_count) +=2;
2519 	}
2520 
2521 	return is_repeated;
2522 }
2523 int do_iteration_two(global_context_t * global_context, thread_context_t * thread_context)
2524 {
2525 	gene_input_t * ginp1 = NULL , * ginp2 = NULL;
2526 	subread_read_number_t current_read_number=0;
2527 	char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
2528 	char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
2529 
2530 	char raw_read_text_1[MAX_READ_LENGTH+1], raw_read_text_2[MAX_READ_LENGTH+1];
2531 	char raw_qual_text_1[MAX_READ_LENGTH+1], raw_qual_text_2[MAX_READ_LENGTH+1];
2532 	char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
2533 	char * repeated_buffer_cigars[MAX_ALIGNMENT_PER_ANCHOR *  2 * global_context -> config.reported_multi_best_reads];
2534 	unsigned int repeated_buffer_pos[MAX_ALIGNMENT_PER_ANCHOR *  2 * global_context -> config.reported_multi_best_reads];
2535 	int repeated_count;
2536 	int read_len_1=0, read_len_2=0;
2537 	int sqr_interval, sqr_read_number=0;
2538 
2539 	int * final_MATCH_buffer1, *final_MISMATCH_buffer1, *final_PENALTY_buffer1;
2540 	int * final_MATCH_buffer2, *final_MISMATCH_buffer2, *final_PENALTY_buffer2, non_informative_subreads_r1=0, non_informative_subreads_r2=0;
2541 	int * final_realignment_index1, *final_realignment_index2;
2542 	unsigned int *final_realignment_number;
2543 	unsigned long long * final_SCORE_buffer;
2544 	unsigned int * final_TLEN_buffer;
2545 
2546 	realignment_result_t * final_realignments;
2547 	subread_output_context_t out_context;
2548 
2549 	//SUBREADprintf("THREAD_OPT2_start\n");
2550 	init_output_context(global_context, &out_context);
2551 
2552 	for(repeated_count = 0;repeated_count < MAX_ALIGNMENT_PER_ANCHOR  *  2 * global_context -> config.reported_multi_best_reads ; repeated_count ++ ){
2553 		repeated_buffer_cigars[repeated_count] = malloc(2*CORE_MAX_CIGAR_STR_LEN);
2554 	}
2555 
2556 	final_MATCH_buffer1 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2557 	final_MISMATCH_buffer1 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2558 	final_PENALTY_buffer1 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2559 	final_realignment_index1 = malloc(sizeof(int) * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2560 
2561 	final_MATCH_buffer2 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2562 	final_MISMATCH_buffer2 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2563 	final_PENALTY_buffer2 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2564 	final_realignment_index2 = malloc(sizeof(int) * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
2565 
2566 	final_SCORE_buffer = malloc(sizeof(long long) * global_context -> config.multi_best_reads * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR*MAX_ALIGNMENT_PER_ANCHOR);
2567 	final_TLEN_buffer = malloc(sizeof(int) * global_context -> config.multi_best_reads * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR*MAX_ALIGNMENT_PER_ANCHOR);
2568 
2569 	final_realignments = malloc(sizeof(realignment_result_t) * global_context -> config.multi_best_reads * 2 * MAX_ALIGNMENT_PER_ANCHOR);
2570 	final_realignment_number = malloc(sizeof(int) * global_context -> config.multi_best_reads * 2);
2571 
2572 	mapping_result_t * r1_align_result_buffer, * r2_align_result_buffer;
2573 	subjunc_result_t * r1_subjunc_result_buffer, * r2_subjunc_result_buffer;
2574 
2575 	r1_align_result_buffer = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
2576 	r2_align_result_buffer = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
2577 
2578 	r1_subjunc_result_buffer = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
2579 	r2_subjunc_result_buffer = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
2580 
2581 	//unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
2582 	//unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
2583 
2584 	init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
2585 	sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
2586 
2587 
2588 	while(1)
2589 	{
2590 		int is_second_read;
2591 		int max_votes;
2592 
2593 		sqr_read_number++;
2594 		fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 0, &current_read_number);
2595 		//if(current_read_number%500000==0)SUBREADprintf("THREAD_OPT2_fetch %s  %s ; RNO=%lld\n", read_name_1, read_text_1, current_read_number);
2596 		strcpy(raw_read_text_1, read_text_1);
2597 		strcpy(raw_qual_text_1, qual_text_1);
2598 		//printf("OCT27-STEPB - %s\n", read_name_1);
2599 
2600 		if(global_context -> input_reads.is_paired_end_reads){
2601 			strcpy(raw_read_text_2, read_text_2);
2602 			strcpy(raw_qual_text_2, qual_text_2);
2603 		}
2604 
2605 		if(global_context->config.space_type == GENE_SPACE_COLOR){
2606 			if(isalpha(read_text_1[0])){
2607 				//SUBREADprintf("FIRST : %s\n\n", read_text_1);
2608 				int xk1;
2609 				for(xk1=2; read_text_1[xk1]; xk1++){
2610 					read_text_1[xk1-2] = read_text_1[xk1];
2611 				}
2612 				read_text_1[xk1-2] = 0;
2613 			}
2614 
2615 			if(global_context -> input_reads.is_paired_end_reads){
2616 				//SUBREADprintf("SECOND: %s\n\n", read_text_2);
2617 				if(isalpha(read_text_2[0])){
2618 					int xk1;
2619 					for(xk1=1; read_text_2[xk1]; xk1++){
2620 						read_text_2[xk1-1] = read_text_2[xk1];
2621 					}
2622 					read_text_2[xk1-1] = 0;
2623 				}
2624 			}
2625 		}
2626 
2627 		if(raw_qual_text_1[0] && global_context -> config.phred_score_format == FASTQ_PHRED64){
2628 			fastq_64_to_33(raw_qual_text_1);
2629 			if(global_context->input_reads.is_paired_end_reads)
2630 				fastq_64_to_33(raw_qual_text_2);
2631 		}
2632 
2633 		if((global_context -> config.is_BAM_output && global_context -> output_bam_writer -> is_internal_error) ||  global_context -> input_reads.is_internal_error ||
2634 		   (global_context -> output_sam_is_full))break;
2635 		if(current_read_number < 0) break;
2636 
2637 		// if no more reads
2638 		if( global_context -> input_reads.is_paired_end_reads)
2639 			max_votes = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0, 0)->selected_votes, _global_retrieve_alignment_ptr(global_context, current_read_number, 1, 0)->selected_votes);
2640 		else	max_votes = _global_retrieve_alignment_ptr(global_context, current_read_number, 0, 0)->selected_votes;
2641 
2642 		//if(  current_read_number < 10 ) SUBREADprintf("BSSS2 : %s : %d\n", read_name_1, max_votes);
2643 
2644 		int best_read_id=0;
2645 
2646 		//memset(final_MATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
2647 		//memset(final_MISMATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
2648 		int r1_candidate_locations = 0, r2_candidate_locations = 0;
2649 		int r1_step2_locations = 0, r2_step2_locations = 0;
2650 
2651 		clear_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count);
2652 		for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
2653 		{
2654 			char * current_read_name =  is_second_read?read_name_2 : read_name_1;
2655 			int is_reversed_already = 0, realignment_i;
2656 			int * current_candidate_locations = is_second_read?&r2_candidate_locations:&r1_candidate_locations;
2657 
2658 			int * current_MATCH_buffer = is_second_read?final_MATCH_buffer2:final_MATCH_buffer1;
2659 			int * current_MISMATCH_buffer = is_second_read?final_MISMATCH_buffer2:final_MISMATCH_buffer1;
2660 			int * current_PENALTY_buffer = is_second_read?final_PENALTY_buffer2:final_PENALTY_buffer1;
2661 			int * current_realignment_index = is_second_read?final_realignment_index2:final_realignment_index1;
2662 
2663 
2664 			for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
2665 			{
2666 				mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
2667 
2668 				if(best_read_id == 0){
2669 					if(is_second_read)
2670 						non_informative_subreads_r2 = current_result -> noninformative_subreads_in_vote;
2671 					else
2672 						non_informative_subreads_r1 = current_result -> noninformative_subreads_in_vote;
2673 				}
2674 
2675 				char * current_read =  is_second_read?read_text_2 : read_text_1;
2676 				char * current_qual =  is_second_read?qual_text_2 : qual_text_1;
2677 				int current_rlen = is_second_read?read_len_2:read_len_1;
2678 
2679 				if(current_result->selected_votes < global_context->config.minimum_subread_for_second_read || max_votes < global_context->config.minimum_subread_for_first_read)
2680 				{
2681 				//	if(current_read_number == 111 || current_read_number == 112)
2682 					//SUBREADprintf("RESET0 [%lld] R_%d SEL=%d MAX=%d\n", current_read_number, is_second_read + 1, current_result->selected_votes, max_votes);
2683 					current_result -> selected_votes = 0;
2684 					continue;
2685 				}
2686 				if(locate_current_value_index(global_context, thread_context, current_result, current_rlen))
2687 				{
2688 					current_result -> selected_votes = 0;
2689 					//SUBREADprintf("RESET1 Read position excesses index boundary.\n");
2690 					continue;
2691 				}
2692 
2693 				//#warning ">>>>>>> COMMENT THIS <<<<<<<"
2694 				//printf("OCT27-STEP21-%s:%d-ALN%02d-THRE %d in MM=%d\n", current_read_name, is_second_read + 1, best_read_id, thread_context -> thread_id, global_context -> config.multi_best_reads);
2695 
2696 				int is_negative_strand = (current_result  -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
2697 
2698 				if(is_negative_strand + is_reversed_already == 1)
2699 				{
2700 					reverse_read(current_read, current_rlen,  global_context->config.space_type);
2701 					reverse_quality(current_qual, current_rlen);
2702 					is_reversed_already = !is_reversed_already;
2703 				}
2704 
2705 				current_result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
2706 
2707 
2708 				if(is_second_read) r2_step2_locations = best_read_id + 1;
2709 				else r1_step2_locations = best_read_id + 1;
2710 
2711 				unsigned int final_alignments = explain_read(global_context, thread_context , final_realignments + (is_second_read + 2 * best_read_id) * MAX_ALIGNMENT_PER_ANCHOR,
2712 							   current_read_number, current_rlen, current_read_name, current_read, current_qual, is_second_read, best_read_id, is_negative_strand);
2713 
2714 				final_realignment_number[ best_read_id * 2 + is_second_read ] = final_alignments;
2715 
2716 				for(realignment_i = 0 ; realignment_i < final_alignments ; realignment_i ++){
2717 					if((* current_candidate_locations) >= global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR) break;
2718 
2719 					int realign_index = (is_second_read + 2 * best_read_id)* MAX_ALIGNMENT_PER_ANCHOR + realignment_i;
2720 					int final_MATCH = final_realignments[realign_index].final_matched_bases;
2721 					int final_PENALTY = final_realignments[realign_index].final_penalty;
2722 
2723 					if((current_result -> result_flags & CORE_IS_FULLY_EXPLAINED) && final_MATCH>0) {
2724 						int final_MISMATCH = final_realignments[realign_index].final_mismatched_bases;
2725 						//#warning ">>>>>>>>>>>>>>>>>>>>>>>>>>>>> REMOVE THIS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
2726 						//printf("OCT27-STEPMSM-MMM %s M=%d MM=%d\n", read_name_1, final_MATCH ,final_MISMATCH);
2727 
2728 						current_PENALTY_buffer[*current_candidate_locations] = final_PENALTY;
2729 						current_MATCH_buffer[*current_candidate_locations] = final_MATCH;
2730 						current_MISMATCH_buffer[*current_candidate_locations] = final_MISMATCH;
2731 						current_realignment_index[*current_candidate_locations] = realign_index;
2732 						(*current_candidate_locations) ++;
2733 					}
2734 				}
2735 				//#warning ">>>>>>> COMMENT THIS <<<<<<<"
2736 				//printf("OCT27-FI-%s:%d-ALN%02d-THRE %d\n", current_read_name, is_second_read + 1, best_read_id, thread_context -> thread_id);
2737 			}
2738 		}
2739 
2740 		//if(161430 <= current_read_number) SUBREADprintf("LOC1=%d, LOC2=%d\n", r1_candidate_locations, r2_candidate_locations);
2741 
2742 		int need_expect_TLEN = r2_candidate_locations && r1_candidate_locations && global_context -> config.reported_multi_best_reads<2 && global_context -> expected_TLEN_read_numbers < READPAIRS_FOR_CALC_EXPT_TLEN;
2743 		int output_cursor = 0;
2744 		if(r2_candidate_locations == 0 || r1_candidate_locations == 0) {
2745 			int is_second_read, highest_score_occurence =0;
2746 			for(is_second_read = 0; is_second_read < 1+ global_context -> input_reads.is_paired_end_reads; is_second_read++)
2747 			{
2748 				int current_candidate_locations = is_second_read?r2_candidate_locations:r1_candidate_locations;
2749 				if(current_candidate_locations > 0){
2750 
2751 					int * current_MATCH_buffer = is_second_read?final_MATCH_buffer2:final_MATCH_buffer1;
2752 					int * current_PENALTY_buffer = is_second_read?final_PENALTY_buffer2:final_PENALTY_buffer1;
2753 					int * current_MISMATCH_buffer = is_second_read?final_MISMATCH_buffer2:final_MISMATCH_buffer1;
2754 					int * current_realignment_index = is_second_read?final_realignment_index2:final_realignment_index1;
2755 
2756 					int read_record_i;
2757 					unsigned long long int best_score_highest = 0;
2758 					unsigned long long int scores_array [global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR];
2759 
2760 					for(read_record_i = 0; read_record_i < current_candidate_locations; read_record_i++){
2761 						realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
2762 
2763 						unsigned int this_MATCH = current_MATCH_buffer[read_record_i];
2764 						unsigned int this_MISMATCH = current_MISMATCH_buffer[read_record_i];
2765 						unsigned int this_PENALTY = current_PENALTY_buffer[read_record_i];
2766 						unsigned long long this_SCORE;
2767 
2768 						if(global_context -> config.experiment_type == CORE_EXPERIMENT_DNASEQ){
2769 							this_SCORE = this_MATCH * 100000llu + (10000 - this_MISMATCH);
2770 						}else{
2771 							unsigned int skip = 0; int is_exonic_regions = 1;
2772 							if(global_context -> exonic_region_bitmap)calc_end_pos(current_realignment_result -> first_base_position, current_realignment_result -> cigar_string, &skip, &is_exonic_regions, global_context  );
2773 
2774 							if(global_context -> config.scRNA_input_mode)
2775 								this_SCORE =((100000llu * (10000 - this_MISMATCH + 2*is_exonic_regions) + this_MATCH)*50llu - this_PENALTY)*20llu+ current_realignment_result -> known_junction_supp;
2776 							else this_SCORE =((100000llu * (10000 - this_MISMATCH) + this_MATCH)*50llu - this_PENALTY)*20llu+ current_realignment_result -> known_junction_supp;
2777 						}
2778 
2779 						best_score_highest = max(best_score_highest, this_SCORE);
2780 						scores_array[read_record_i] = this_SCORE;
2781 					}
2782 
2783 					for(read_record_i = 0; read_record_i <  current_candidate_locations ; read_record_i++){
2784 						realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
2785 						if( scores_array[read_record_i] >= best_score_highest && (current_realignment_result -> realign_flags & CORE_TOO_MANY_MISMATCHES)==0) {
2786 							int is_repeated = 0;
2787 
2788 							is_repeated = add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, is_second_read?NULL:current_realignment_result , is_second_read?current_realignment_result:NULL);
2789 							if(is_repeated)
2790 								scores_array[read_record_i] = 0;
2791 							else
2792 								highest_score_occurence ++;
2793 						}
2794 					}
2795 
2796 					if(highest_score_occurence<2 ||  global_context -> config.report_multi_mapping_reads){
2797 						int is_break_even = 0;
2798 						if(highest_score_occurence>1)	is_break_even = 1;
2799 
2800 						highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
2801 						for(read_record_i = 0; read_record_i <  current_candidate_locations ; read_record_i++){
2802 							realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
2803 
2804 							if( scores_array[read_record_i] >= best_score_highest && (current_realignment_result -> realign_flags & CORE_TOO_MANY_MISMATCHES)==0
2805 							  && output_cursor < global_context -> config.reported_multi_best_reads){
2806 								strcpy(read_text_1, raw_read_text_1);
2807 								strcpy(read_text_2, raw_read_text_2);
2808 								strcpy(qual_text_1, raw_qual_text_1);
2809 								strcpy(qual_text_2, raw_qual_text_2);
2810 
2811 								if(is_break_even) current_realignment_result -> realign_flags |= CORE_IS_BREAKEVEN;
2812 								current_realignment_result -> MAPQ_adjustment = current_MISMATCH_buffer [read_record_i] + ( is_second_read?(r2_step2_locations): (r1_step2_locations));
2813 
2814 //		SUBREADprintf("THREAD_OPT1_write %s\n", read_name_1);
2815 								if(is_second_read)
2816 									write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, NULL, current_realignment_result, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor,  non_informative_subreads_r1, non_informative_subreads_r2);
2817 								else write_realignments_for_fragment(global_context, thread_context, &out_context , current_read_number, current_realignment_result, NULL, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor,  non_informative_subreads_r1, non_informative_subreads_r2);
2818 								output_cursor ++;
2819 							}
2820 						}
2821 
2822 						assert(output_cursor == highest_score_occurence);
2823 					}
2824 				}
2825 			}
2826 		} else {
2827 			int r1_best_id, r2_best_id, highest_score_occurence = 0;
2828 			int expected_TLEN;
2829 			if(global_context -> expected_TLEN_read_numbers >= READPAIRS_FOR_CALC_EXPT_TLEN)
2830 				expected_TLEN = global_context -> expected_TLEN_sum / global_context -> expected_TLEN_read_numbers;
2831 			else expected_TLEN = (global_context -> config.minimum_pair_distance + global_context -> config.maximum_pair_distance)/2;
2832 
2833 			unsigned long long highest_score = 0;
2834 			if(need_expect_TLEN) memset(final_TLEN_buffer, 0 , sizeof(int)  * global_context -> config.multi_best_reads * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR*MAX_ALIGNMENT_PER_ANCHOR );
2835 			memset(final_SCORE_buffer, 0 , sizeof(long long)  * global_context -> config.multi_best_reads * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR*MAX_ALIGNMENT_PER_ANCHOR );
2836 			for(r1_best_id = 0; r1_best_id < r1_candidate_locations; r1_best_id ++) {
2837 				int r1_matched = final_MATCH_buffer1[r1_best_id];
2838 				if(r1_matched < 1) continue;
2839 				realignment_result_t * realignment_result_R1 = final_realignments + final_realignment_index1[r1_best_id];
2840 
2841 				for(r2_best_id = 0; r2_best_id < r2_candidate_locations; r2_best_id ++) {
2842 					int r2_matched = final_MATCH_buffer2[r2_best_id];
2843 					if(r2_matched < 1) continue;
2844 
2845 					realignment_result_t * realignment_result_R2 = final_realignments + final_realignment_index2[r2_best_id];
2846 					int is_PE = 0, tlen=0;
2847 					int is_same_chro = 0, is_exonic_regions = 0;
2848 					unsigned long long final_SCORE = 0;
2849 
2850 					test_PE_and_same_chro_align(global_context , realignment_result_R1 , realignment_result_R2, &is_exonic_regions, &is_PE, &is_same_chro , read_len_1, read_len_2, read_name_1, &tlen);
2851 
2852 					unsigned long long TLEN_exp_score = 0;
2853 					if(is_PE && global_context -> config.no_TLEN_preference == 0 && global_context -> config.reported_multi_best_reads<2){
2854 						TLEN_exp_score = (tlen > expected_TLEN)?(tlen - expected_TLEN):(expected_TLEN - tlen);
2855 						if(TLEN_exp_score>999) TLEN_exp_score=0;
2856 						else TLEN_exp_score = 999-TLEN_exp_score;
2857 					}
2858 					if(global_context -> config.experiment_type == CORE_EXPERIMENT_DNASEQ){
2859 						int weight;
2860 
2861 						//#warning " ============ USE THE FIRST THREE WEIGHTS! ======== "
2862 
2863 						if(is_PE)
2864 							weight = 120;
2865 							//weight = 300;
2866 						else if(is_same_chro)
2867 							weight = 100;
2868 						else	weight = 80;
2869 							//weight = 30;
2870 						final_SCORE = weight * (final_MATCH_buffer1[r1_best_id] + final_MATCH_buffer2[r2_best_id]);
2871 						//#warning "=========== ADD BY YANG LIAO FOR MORE MAPPED READS WITH '-u' OPTION ================"
2872 						final_SCORE = final_SCORE * 1000llu - final_MISMATCH_buffer1[r1_best_id] - final_MISMATCH_buffer2[r2_best_id];
2873 						final_SCORE = final_SCORE * 20llu - final_PENALTY_buffer1[r1_best_id] - final_PENALTY_buffer2[r2_best_id];
2874 						final_SCORE = final_SCORE * 1000llu + TLEN_exp_score;
2875 						if(is_PE && need_expect_TLEN) final_TLEN_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id] = tlen;
2876 						//SUBREADprintf("%s FINAL_SCORE=%llu  TLEN_SCORE=%llu    DIST=%d    EXP_TLEN=%d\n", read_name_1, final_SCORE, TLEN_exp_score, tlen, expected_TLEN);
2877 					} else if (global_context -> config.experiment_type == CORE_EXPERIMENT_RNASEQ) {
2878 						int weight;
2879 
2880 
2881 						if(1){
2882 
2883 							if(is_exonic_regions && is_PE)
2884 								weight = 5000;
2885 							else if(is_PE || is_exonic_regions)
2886 								weight = 3000;
2887 							else if(is_same_chro)
2888 								weight = 1000;
2889 							else	weight = 300;
2890 						}else{
2891 							if(is_PE)
2892 								weight = 3000;
2893 							else if(is_same_chro)
2894 								weight = 1000;
2895 							else	weight = 300;
2896 
2897 						}
2898 
2899 						//#warning "=========== ADD BY YANG LIAO  ' + 2' ===================="
2900 						final_SCORE = weight / (final_MISMATCH_buffer1[r1_best_id] + final_MISMATCH_buffer2[r2_best_id] + 1 + 2);
2901 
2902 						//#warning "=========== ADD BY YANG LIAO FOR MORE MAPPED READS WITH '-u' OPTION ================"
2903 						final_SCORE = final_SCORE * 3000llu + (final_MATCH_buffer1[r1_best_id] + final_MATCH_buffer2[r2_best_id]);
2904 						final_SCORE = final_SCORE * 20 + realignment_result_R2 -> known_junction_supp + realignment_result_R1 -> known_junction_supp;
2905 						final_SCORE = final_SCORE * 20 - final_PENALTY_buffer1[r1_best_id] - final_PENALTY_buffer2[r2_best_id];
2906 						final_SCORE = final_SCORE * 1000 + TLEN_exp_score;
2907 
2908 						//#warning ">>>>>>>>>>>>>>>>>>>>>>>>>>>>> REMOVE THIS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
2909 						//printf("OCT27-STEPMSM-FNSC %s M=%d,%d MM=%d,%d  CHROSAME=%d FSCR=%llu\n", read_name_1, final_MATCH_buffer1[r1_best_id], final_MATCH_buffer2[r2_best_id], final_MISMATCH_buffer1[r1_best_id] , final_MISMATCH_buffer2[r2_best_id], is_same_chro, final_SCORE );
2910 						if(is_PE && need_expect_TLEN) final_TLEN_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id] = tlen;
2911 					} else assert(0);
2912 
2913 					assert(final_SCORE > 0);
2914 					final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id] = final_SCORE;
2915 
2916 					if(final_SCORE > highest_score) {
2917 						//#warning ">>>>>>>>>>>> COMMENT THIS <<<<<<<<<<<<<<<<<<<"
2918 						//printf("OCT27-STEPMSM-REPL %s : SCORE %llu -> %llu, pos=%u,%u\n", read_name_1, final_SCORE, highest_score , realignment_result_R1->mapping_result->selected_position, realignment_result_R2->mapping_result->selected_position );
2919 						highest_score_occurence = 1;
2920 						highest_score = final_SCORE;
2921 						//SUBREADprintf("29NOV2016-FIRSTBEST %s [%d : score %llu] LOC = %d,%d\n", read_name_1, highest_score_occurence, final_SCORE, r1_best_id, r2_best_id);
2922 						clear_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count);
2923 						add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, realignment_result_R1, realignment_result_R2);
2924 					} else if(final_SCORE == highest_score) {
2925 						int is_repeat = 0;
2926 
2927 						if(global_context -> config.reported_multi_best_reads)
2928 							is_repeat = add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, realignment_result_R1, realignment_result_R2);
2929 
2930 						if(is_repeat)
2931 							final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id] = 0;
2932 						else{
2933 							highest_score_occurence ++;
2934 						//	SUBREADprintf("29NOV2016-BEST %s [%d : score %llu] LOC = %d,%d\n", read_name_1, highest_score_occurence, final_SCORE, r1_best_id, r2_best_id);
2935 						}
2936 					}
2937 				}
2938 			}
2939 
2940 			//SUBREADprintf("Highest score = %llu, Occurance = %d\n", highest_score , highest_score_occurence);
2941 			// Then, copy the (R1, R2) that have the highest score into the align_res buffer.
2942 
2943 			//#warning ">>>>>>> COMMENT THIS <<<<<<<"
2944 			//printf("OCT27-WRITE-BOTHMAOOED-%s-THRE %d ; occu=%d\n", read_name_1, thread_context -> thread_id, highest_score_occurence);
2945 
2946 			if(highest_score_occurence <= 1 || global_context -> config.report_multi_mapping_reads){
2947 
2948 				int is_break_even = 0;
2949 				if(highest_score_occurence>1)is_break_even = 1;
2950 
2951 				highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
2952 				for(r1_best_id = 0; r1_best_id < r1_candidate_locations; r1_best_id ++)
2953 				{
2954 					int r1_matched = final_MATCH_buffer1[r1_best_id];
2955 					if(r1_matched < 1) continue;
2956 
2957 					for(r2_best_id = 0; r2_best_id < r2_candidate_locations; r2_best_id ++)
2958 					{
2959 						int r2_matched = final_MATCH_buffer2[r2_best_id];
2960 						if(r2_matched < 1) continue;
2961 
2962 						if(final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id] == highest_score &&
2963 							output_cursor < global_context -> config.reported_multi_best_reads){
2964 								if(need_expect_TLEN){
2965 									int this_tlen = final_TLEN_buffer[r1_best_id * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR + r2_best_id];
2966 									if(this_tlen>0){
2967 										subread_lock_occupy(&global_context -> output_lock);
2968 										global_context -> expected_TLEN_read_numbers++;
2969 										global_context -> expected_TLEN_sum += this_tlen;
2970 										if(global_context -> expected_TLEN_read_numbers == READPAIRS_FOR_CALC_EXPT_TLEN)
2971 											print_in_box(80,0,0,"  Estimated fragment length : %d bp\n",  (int)(global_context -> expected_TLEN_sum / global_context -> expected_TLEN_read_numbers ));
2972 										subread_lock_release(&global_context -> output_lock);
2973 									}
2974 								}
2975 								realignment_result_t * r1_realign = final_realignments + final_realignment_index1[r1_best_id];
2976 								realignment_result_t * r2_realign = final_realignments + final_realignment_index2[r2_best_id];
2977 
2978 								strcpy(read_text_1, raw_read_text_1);
2979 								strcpy(read_text_2, raw_read_text_2);
2980 								strcpy(qual_text_1, raw_qual_text_1);
2981 								strcpy(qual_text_2, raw_qual_text_2);
2982 
2983 								if(is_break_even){
2984 									r1_realign -> realign_flags |= CORE_IS_BREAKEVEN;
2985 									r2_realign -> realign_flags |= CORE_IS_BREAKEVEN;
2986 								}
2987 
2988 								r1_realign -> MAPQ_adjustment = r1_step2_locations + final_MISMATCH_buffer1[r1_best_id];
2989 								r2_realign -> MAPQ_adjustment = r2_step2_locations + final_MISMATCH_buffer2[r2_best_id];
2990 
2991 //		SUBREADprintf("THREAD_OPT2_write %s\n", read_name_1);
2992 								write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, r1_realign, r2_realign, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor,  non_informative_subreads_r1, non_informative_subreads_r2);
2993 								output_cursor ++;
2994 						}
2995 					}
2996 				}
2997 				assert(output_cursor == highest_score_occurence );
2998 			}
2999 		}
3000 
3001 		//if(  current_read_number < 10 ) SUBREADprintf("BEEE2 : %s : %d\n", read_name_1, max_votes);
3002 		//#warning ">>>>>>> COMMENT THIS <<<<<<<"
3003 		//printf("OCT27-WRITE-UNMAP?-%s-THRE %d\n", read_name_1, thread_context -> thread_id);
3004 		if(output_cursor<1) {
3005 			strcpy(read_text_1, raw_read_text_1);
3006 			strcpy(read_text_2, raw_read_text_2);
3007 			strcpy(qual_text_1, raw_qual_text_1);
3008 			strcpy(qual_text_2, raw_qual_text_2);
3009 //		SUBREADprintf("THREAD_OPT3_write %s\n", read_name_1);
3010 			write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, NULL, NULL, read_name_1, read_name_2, read_text_1, read_text_2, raw_qual_text_1, raw_qual_text_2, read_len_1, read_len_2, 0, 0, non_informative_subreads_r1, non_informative_subreads_r2);
3011 		}
3012 
3013 		if(!thread_context || thread_context->thread_id == 0)
3014 		{
3015 			if(sqr_read_number>sqr_interval)
3016 			{
3017 				show_progress(global_context, thread_context, current_read_number, STEP_ITERATION_TWO);
3018 				sqr_read_number=0;
3019 			}
3020 		}
3021 		//#warning ">>>>>>> COMMENT THIS <<<<<<<"
3022 		//printf("OCT27-FIN-%s-THRE %d\n", read_name_1, thread_context -> thread_id);
3023 		//bigtable_release_result(global_context, thread_context, current_read_number, 1);
3024 	}
3025 
3026 	free(final_realignments);
3027 	free(final_realignment_number);
3028 	free(final_MATCH_buffer1);
3029 	free(final_MISMATCH_buffer1);
3030 	free(final_PENALTY_buffer1);
3031 	free(final_realignment_index1);
3032 
3033 	free(final_MATCH_buffer2);
3034 	free(final_MISMATCH_buffer2);
3035 	free(final_PENALTY_buffer2);
3036 	free(final_realignment_index2);
3037 
3038 	free(final_SCORE_buffer);
3039 	free(final_TLEN_buffer);
3040 
3041 	free(r1_align_result_buffer);
3042 	free(r1_subjunc_result_buffer);
3043 	free(r2_align_result_buffer);
3044 	free(r2_subjunc_result_buffer);
3045 
3046 	for(repeated_count = 0;repeated_count < MAX_ALIGNMENT_PER_ANCHOR  *  2 * global_context -> config.reported_multi_best_reads ; repeated_count ++ ){
3047 		free(repeated_buffer_cigars[repeated_count]);
3048 	}
3049 
3050 	destroy_output_context(global_context, &out_context);
3051 	//SUBREADprintf("OPT2-FINISHED\n");
3052 	if(global_context ->config.all_threads >1 && global_context ->config.is_BAM_output && !global_context ->config.is_input_read_order_required)SamBam_writer_finalise_thread(global_context -> output_bam_writer, thread_context -> thread_id);
3053 	if(global_context ->config.all_threads <=1 && global_context ->config.is_BAM_output && global_context ->config.sort_reads_by_coordinates)SamBam_writer_finalise_one_thread(global_context -> output_bam_writer);
3054 	return 0;
3055 }
3056 
3057 int core_get_subread_quality(global_context_t * global_context, thread_context_t * thread_context, char * qual, int qual_len)
3058 {
3059 	int x1, ret=1;
3060 
3061 	if(!qual)return 1;
3062 	if(!qual[0])return 1;
3063 
3064 	int offset = (global_context->config.phred_score_format == FASTQ_PHRED33)?33:64;
3065 
3066 	for(x1=0; (x1 < qual_len) && qual[x1]; x1++)
3067 		ret +=  (qual[x1] - offset);
3068 
3069 	return  ret;
3070 }
3071 
3072 int has_better_mapping(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t current_read_number, int is_second_read, int this_aln_id){
3073 	int better_read_id;
3074 
3075 	mapping_result_t * this_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, this_aln_id);
3076 	for(better_read_id = 0; better_read_id < this_aln_id; better_read_id++){
3077 		mapping_result_t * better_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, better_read_id);
3078 		if( this_r -> selected_position >= better_r -> selected_position - global_context -> config.max_indel_length - 1
3079 		 && this_r -> selected_position <=  better_r -> selected_position +  global_context -> config.max_indel_length +1 )
3080 		if( this_r -> confident_coverage_start >= better_r -> confident_coverage_start
3081 		 && this_r -> confident_coverage_end <= better_r -> confident_coverage_end) return 1;
3082 	}
3083 	return 0;
3084 }
3085 
3086 int do_voting(global_context_t * global_context, thread_context_t * thread_context)
3087 {
3088 	int xk1;
3089 	gene_input_t * ginp1 = NULL , * ginp2 = NULL;
3090 	subread_read_number_t current_read_number=0, last_shown_curr = 0;
3091 	char * read_text_1, * read_text_2;
3092 	char * qual_text_1, * qual_text_2;
3093 	char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
3094 	int read_len_1, read_len_2=0;
3095 	int min_first_read_votes = global_context -> config.minimum_subread_for_first_read;
3096 	int voting_max_indel_length = min(16, global_context->config.max_indel_length);
3097 	int sqr_interval=10000, sqr_read_number = 0;
3098 
3099 	read_text_1 = malloc(MAX_READ_LENGTH+1);
3100 	read_text_2 = malloc(MAX_READ_LENGTH+1);
3101 	qual_text_1 = malloc(MAX_READ_LENGTH+1);
3102 	qual_text_2 = malloc(MAX_READ_LENGTH+1);
3103 
3104 	gene_vote_t * vote_1, * vote_2, * vote_fg;
3105 
3106 	vote_1 = (gene_vote_t *) malloc(sizeof(gene_vote_t));
3107 	vote_2 = (gene_vote_t *) malloc(sizeof(gene_vote_t));
3108 	vote_fg = (gene_vote_t *) malloc(sizeof(gene_vote_t));
3109 
3110 	if(!(vote_1&&vote_2))
3111 	{
3112 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_FATAL,"Cannot allocate voting memory.");
3113 		return 1;
3114 	}
3115 
3116 	init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
3117 
3118 	unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
3119 	unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
3120 	int has_second_read = 1 + global_context -> input_reads.is_paired_end_reads;
3121 
3122 	if(thread_context)
3123 		thread_context -> current_value_index = global_context -> current_value_index;
3124 
3125 	int GENE_SLIDING_STEP = global_context -> current_index -> index_gap;
3126 	int need_junction_step = global_context -> config.do_breakpoint_detection || global_context ->  config.do_fusion_detection ||global_context ->  config.do_long_del_detection;
3127 
3128 	while(1)
3129 	{
3130 		int is_second_read;
3131 		int subread_no;
3132 		int is_reversed, applied_subreads = 0, v1_all_subreads=0, v2_all_subreads=0;
3133 
3134 		fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2,1, &current_read_number);
3135 		//SUBREADprintf("VOTING READ # %lld: %s %s %d\n", current_read_number, read_name_1, read_text_1, read_len_1);
3136 
3137 		if(current_read_number < 0){
3138 		//	#warning ">>>>>>>>>>>>> COMMENT THIS <<<<<<<<<<<<<<<<<<<"
3139 		//	printf("OCT27-STEPB-QUIT :T%d\n", thread_context -> thread_id);
3140 			break;
3141 		}
3142 
3143 		for(is_reversed = 0; is_reversed<2; is_reversed++)
3144 		{
3145 			//printf("MAP_REA = %s / %s\n", read_text_1, read_text_2);
3146 
3147 			for (is_second_read = 0; is_second_read < has_second_read; is_second_read ++)
3148 			{
3149 				gene_vote_t * current_vote = is_second_read?vote_2: vote_1;
3150 				char * current_read =  is_second_read?read_text_2 : read_text_1;
3151 				int current_rlen = is_second_read?read_len_2:read_len_1;
3152 				int subread_step;
3153 				if(current_rlen< 16) continue;
3154 				int CR15GLS = (current_rlen - 15 - GENE_SLIDING_STEP)<<16;
3155 				if(current_rlen<= EXON_LONG_READ_LENGTH)
3156 				{
3157 					subread_step =  CR15GLS /(global_context -> config.total_subreads -1);
3158 					if(subread_step<(GENE_SLIDING_STEP<<16))subread_step = GENE_SLIDING_STEP<<16;
3159 				}else{
3160 					subread_step = 6<<16;
3161 					if( CR15GLS / subread_step > 62)
3162 						subread_step = CR15GLS/62;
3163 				}
3164 
3165 				int noninformative_subreads_for_each_gap[GENE_SLIDING_STEP];
3166 
3167 				applied_subreads = 1 + CR15GLS / subread_step;
3168 				if(is_second_read) v2_all_subreads = applied_subreads;
3169 				else	 v1_all_subreads = applied_subreads;
3170 
3171 				//SUBREADprintf("NSUBR=%d\tAPPLIED_SUBR=%d\tSTEP=%d\n", global_context -> config.total_subreads, applied_subreads, subread_step);
3172 
3173 				unsigned int current_high_border = high_index_border -  current_rlen;
3174 
3175 				if(global_context->config.do_breakpoint_detection && current_rlen > EXON_LONG_READ_LENGTH)//&& global_context->config.all_threads<2)
3176 				{
3177 					char * current_qual =  is_second_read?qual_text_2 : qual_text_1;
3178 					core_fragile_junction_voting(global_context, thread_context, read_name_1, current_read, current_qual, current_rlen, is_reversed, global_context->config.space_type, low_index_border, current_high_border, vote_fg);
3179 
3180 				}
3181 				if(global_context->config.SAM_extra_columns)
3182 				{
3183 					for(xk1=0;xk1<GENE_SLIDING_STEP;xk1++)
3184 						noninformative_subreads_for_each_gap[xk1]=0;
3185 				}
3186 
3187 				int allow_indel_i;
3188 				unsigned int shift_indel_locs[ GENE_VOTE_TABLE_SIZE * GENE_VOTE_SPACE ];
3189 				unsigned int shift_indel_NO = 0;
3190 				for(allow_indel_i = 0; allow_indel_i<2; allow_indel_i++){
3191 					if(is_reversed==0 || !(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)){
3192 						if(is_second_read == 0)init_gene_vote(vote_1);
3193 						if(is_second_read == 1)init_gene_vote(vote_2);
3194 					}
3195 
3196 					for(subread_no=0; subread_no < applied_subreads ; subread_no++)
3197 					{
3198 						for(xk1=0; xk1<GENE_SLIDING_STEP ; xk1++)
3199 						{
3200 
3201 							if(global_context->config.SAM_extra_columns)
3202 							{
3203 								current_vote -> noninformative_subreads = noninformative_subreads_for_each_gap[xk1];
3204 							}
3205 
3206 							int subread_offset = ((subread_step * subread_no) >> 16);
3207 							if(GENE_SLIDING_STEP > 1)
3208 								subread_offset -= subread_offset%(GENE_SLIDING_STEP) - xk1;
3209 
3210 							char * subread_string = current_read + subread_offset;
3211 
3212 							gehash_key_t subread_integer = genekey2int(subread_string, global_context->config.space_type);
3213 							if(global_context->config.is_methylation_reads)
3214 								 gehash_go_q_CtoT(global_context->current_index, subread_integer , subread_offset, current_rlen, is_reversed, current_vote, 1, 0xffffff, voting_max_indel_length, subread_no, 1,  low_index_border, high_index_border - current_rlen);
3215 							else
3216 								 gehash_go_X(global_context->current_index, subread_integer , subread_offset, current_rlen, is_reversed, current_vote,  voting_max_indel_length, subread_no,  low_index_border, current_high_border, allow_indel_i, shift_indel_locs, &shift_indel_NO);
3217 							if(global_context->config.SAM_extra_columns)
3218 								noninformative_subreads_for_each_gap[xk1] = current_vote -> noninformative_subreads;
3219 						}
3220 
3221 					}
3222 					if(shift_indel_NO == 0 || global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection)break;
3223 				}
3224 
3225 				if(global_context->config.SAM_extra_columns)
3226 				{
3227 					short max_noninformative_subreads = -1;
3228 
3229 					for(xk1=0;xk1<GENE_SLIDING_STEP;xk1++)
3230 						if(noninformative_subreads_for_each_gap[xk1] > max_noninformative_subreads)
3231 							max_noninformative_subreads = noninformative_subreads_for_each_gap[xk1];
3232 
3233 					current_vote -> noninformative_subreads = max_noninformative_subreads;
3234 				}
3235 			}
3236 
3237 
3238 
3239 			if(is_reversed==1 || !(global_context-> config.do_fusion_detection || global_context-> config.do_long_del_detection))
3240 			{
3241 //#warning "====== CHECK PRINTING !!! =========="
3242 				if(0&&current_read_number == 4000){
3243 					SUBREADprintf(">>>%llu<<<\n%s [%d]  %s\n%s [%d]  %s  VOTE1_MAX=%d >= %d\n", current_read_number, read_name_1, read_len_1, read_text_1, read_name_2, read_len_2, read_text_2, vote_1->max_vote, min_first_read_votes);
3244 					SUBREADprintf(" ======= PAIR %s = %llu ; NON_INFORMATIVE = %d, %d =======\n", read_name_1, current_read_number, vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
3245 					print_votes(vote_1, global_context -> config.index_prefix);
3246 					print_votes(vote_2, global_context -> config.index_prefix);
3247 				}
3248 
3249 				if(global_context -> input_reads.is_paired_end_reads)
3250 					process_voting_junction(global_context, thread_context, current_read_number, vote_1, vote_2, read_name_1, read_name_2, read_text_1, read_text_2, read_len_1, read_len_2, is_reversed, v1_all_subreads, v2_all_subreads);
3251 				else{
3252 					if(vote_1->max_vote >= min_first_read_votes)
3253 						process_voting_junction(global_context, thread_context, current_read_number, vote_1, vote_2, read_name_1, NULL ,  read_text_1, NULL, read_len_1, 0, is_reversed, v1_all_subreads, 0);
3254 					else if(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0) -> selected_votes < 1)
3255 					{
3256 						mapping_result_t * allll = _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0);
3257 						allll -> noninformative_subreads_in_vote = 0;
3258 
3259 
3260 						_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->used_subreads_in_vote = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->used_subreads_in_vote, applied_subreads);
3261 						_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote, vote_1 -> noninformative_subreads);
3262 					}
3263 				}
3264 			}
3265 
3266 			if(is_reversed == 0)
3267 			{
3268 				reverse_read(read_text_1, read_len_1,  global_context->config.space_type);
3269 				if(global_context -> input_reads.is_paired_end_reads)
3270 					reverse_read(read_text_2, read_len_2,  global_context->config.space_type);
3271 			}
3272 		}
3273 
3274 		int read_1_reversed = 1;
3275 		int read_2_reversed = 1;
3276 		int best_read_id;
3277 
3278 		if(global_context -> is_final_voting_run){
3279 			for(is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
3280 			{
3281 				int * this_read_has_reversed = is_second_read ? &read_2_reversed:&read_1_reversed;
3282 				char * current_read_name =  is_second_read?read_name_2 : read_name_1;
3283 				char * current_read =  is_second_read?read_text_2 : read_text_1;
3284 				char * current_qual =  is_second_read?qual_text_2 : qual_text_1;
3285 				int current_rlen = is_second_read?read_len_2:read_len_1;
3286 
3287 				for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
3288 				{
3289 					mapping_result_t * current_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read,best_read_id);
3290 					if(current_r -> selected_votes < 1) continue;
3291 
3292 					int this_read_should_be_reversed = (current_r -> result_flags & CORE_IS_NEGATIVE_STRAND) ? 1:0;
3293 
3294 					//SUBREADprintf("DETECT INDEL: should_reverse = %d, this_has_reversed = %d\n", this_read_should_be_reversed, *this_read_has_reversed);
3295 
3296 					if(this_read_should_be_reversed != (*this_read_has_reversed))
3297 					{
3298 						(*this_read_has_reversed) = !(*this_read_has_reversed);
3299 						reverse_read(current_read, current_rlen, global_context->config.space_type);
3300 						if(current_qual)
3301 							reverse_quality(current_qual , current_rlen);
3302 					}
3303 
3304 					gene_value_index_t * curr_val_index = thread_context? thread_context -> current_value_index: global_context -> current_value_index;
3305 					locate_current_value_index(global_context, thread_context, current_r, current_rlen);
3306 
3307 			//		#warning "==== UNCOMMENT THE NEXT LINE ===="
3308 
3309 					if(0 && FIXLENstrcmp("R00000003493", current_read_name)==0)
3310 						SUBREADprintf("TEST_BETTER: better_id = %d, votes = %d, has=%d\n", best_read_id, current_r -> selected_votes, has_better_mapping(global_context, thread_context, current_read_number,  is_second_read,best_read_id));
3311 
3312 					if(!has_better_mapping(global_context, thread_context, current_read_number,  is_second_read,best_read_id))
3313 						find_new_indels(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
3314 					if(need_junction_step)
3315 						find_new_junctions(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
3316 
3317 			//		#warning "==== UNCOMMENT THE NEXT LINE ===="
3318 					if(0 && FIXLENstrcmp("R00000003493", current_read_name)==0){
3319 						SUBREADprintf("TEST_BETTER_END: better_id = %d, votes = %d, has=%d\n", best_read_id, current_r -> selected_votes, has_better_mapping(global_context, thread_context, current_read_number,  is_second_read,best_read_id));
3320 						exit(1);
3321 					}
3322 
3323 					if(thread_context) thread_context -> current_value_index = curr_val_index;
3324 					else global_context -> current_value_index = curr_val_index;
3325 				}
3326 			}
3327 		}
3328 
3329 		if(!thread_context || thread_context->thread_id == 0)
3330 		{
3331 			if(0 == global_context -> config.scRNA_input_mode  && sqr_read_number > sqr_interval)
3332 			{
3333 				show_progress(global_context, thread_context, current_read_number, STEP_VOTING);
3334 				sqr_read_number = 0;
3335 				unsigned long long total_file_size = global_context -> input_reads.first_read_file_size;
3336 				unsigned long long guessed_all_reads = total_file_size / global_context -> input_reads . avg_read_length;// / (1+global_context -> config.is_SAM_file_input);
3337 				sqr_interval = max(10000,guessed_all_reads / global_context -> config.all_threads/10);
3338 			}
3339 			if(global_context -> config.scRNA_input_mode && current_read_number - last_shown_curr >= 1000000){
3340 				show_progress(global_context, thread_context, current_read_number + global_context -> all_processed_reads, STEP_VOTING);
3341 				last_shown_curr = current_read_number;
3342 			}
3343 		}
3344 
3345 
3346 		//bigtable_release_result(global_context, thread_context, current_read_number, 1);
3347 		sqr_read_number++;
3348 
3349 	}
3350 
3351 	free(vote_1);
3352 	free(vote_2);
3353 	free(vote_fg);
3354 	free(read_text_1);
3355 	free(read_text_2);
3356 	free(qual_text_1);
3357 	free(qual_text_2);
3358 
3359 	return 0;
3360 }
3361 
3362 void subread_init_topKbuff(global_context_t * global_context, topK_buffer_t * topKbuff){
3363 	topKbuff -> vote_simple_1_buffer = malloc(global_context -> config.max_vote_simples * sizeof(simple_mapping_t));
3364 	topKbuff -> vote_simple_2_buffer = malloc(global_context -> config.max_vote_simples * sizeof(simple_mapping_t));
3365 
3366 	topKbuff -> junction_tmp_r1 = malloc(sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
3367 	topKbuff -> junction_tmp_r2 = malloc(sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
3368 
3369 	topKbuff -> alignment_tmp_r1 = malloc(sizeof(mapping_result_t) * global_context->config.multi_best_reads);
3370 	topKbuff -> alignment_tmp_r2 = malloc(sizeof(mapping_result_t) * global_context->config.multi_best_reads);
3371 
3372 	topKbuff -> comb_buffer = malloc(global_context -> config.max_vote_combinations * sizeof(vote_combination_t));
3373 }
3374 
3375 void subread_free_topKbuff(global_context_t * global_context, topK_buffer_t * topKbuff){
3376 
3377 	free(topKbuff ->junction_tmp_r1);
3378 	free(topKbuff ->junction_tmp_r2);
3379 	free(topKbuff ->alignment_tmp_r1);
3380 	free(topKbuff ->alignment_tmp_r2);
3381 	free(topKbuff ->comb_buffer);
3382 	free(topKbuff ->vote_simple_1_buffer);
3383 	free(topKbuff ->vote_simple_2_buffer);
3384 }
3385 
3386 
3387 
3388 void * run_in_thread(void * pthread_param)
3389 {
3390 	void ** parameters = (void **)pthread_param;
3391 	global_context_t * global_context = (global_context_t * ) parameters[0];
3392 	thread_context_t * thread_context = (thread_context_t *) parameters[1];
3393 	int task = *((int *)(parameters[2]));
3394 	subread_lock_t * thread_init_lock = (subread_lock_t * ) parameters[3];
3395 	int * ret_value_pointer = (int *)parameters[4];
3396 
3397 	if(thread_init_lock)
3398 		subread_lock_release(thread_init_lock);
3399 
3400 
3401 	switch(task)
3402 	{
3403 		case STEP_VOTING:
3404 			*ret_value_pointer = do_voting(global_context, thread_context);
3405 		break;
3406 		case STEP_ITERATION_TWO:
3407 			*ret_value_pointer = do_iteration_two(global_context, thread_context);
3408 		break;
3409 
3410 	}
3411 
3412 	//sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_DETAILS, "finished running %d", task);
3413 	return NULL;
3414 }
3415 
3416 int run_maybe_threads(global_context_t *global_context, int task)
3417 {
3418 	void * thr_parameters [5];
3419 	int ret_value =0;
3420 
3421 	if(task == STEP_ITERATION_TWO){
3422 		global_context -> last_written_fragment_number = -1;
3423 	}
3424 
3425 
3426 	if(global_context->config.all_threads<2) {
3427 		thr_parameters[0] = global_context;
3428 		thr_parameters[1] = NULL;
3429 		thr_parameters[2] = &task;
3430 		thr_parameters[3] = NULL;
3431 		thr_parameters[4] = &ret_value;
3432 
3433 		run_in_thread(thr_parameters);
3434 		if(STEP_VOTING == task){
3435 			// sort and merge events from all threads and the global event space.
3436 			sort_global_event_table(global_context);
3437 			// sort the event entry table at each location.
3438 			//#warning "==== UNCOMMENT NEXT LINE ======"
3439 			sort_junction_entry_table(global_context);
3440 		}
3441 	} else {
3442 		int current_thread_no ;
3443 		thread_context_t thread_contexts[64];
3444 		int ret_values[64];
3445 
3446 		memset(thread_contexts, 0, sizeof(thread_context_t)*64);
3447 		global_context -> all_thread_contexts = thread_contexts;
3448 
3449 		for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++) {
3450 			thread_contexts[current_thread_no].thread_id = current_thread_no;
3451 			init_indel_thread_contexts(global_context, thread_contexts+current_thread_no, task);
3452 			if((global_context->config.do_breakpoint_detection || global_context-> config.do_fusion_detection || global_context->config.do_breakpoint_detection || global_context-> config.do_long_del_detection))
3453 				init_junction_thread_contexts(global_context, thread_contexts+current_thread_no, task);
3454 
3455 			if(STEP_VOTING == task) subread_init_topKbuff(global_context,&thread_contexts[current_thread_no].topKbuff);
3456 
3457 			subread_lock_occupy(&global_context -> thread_initial_lock);
3458 			thr_parameters[0] = global_context;
3459 			thr_parameters[1] = thread_contexts+current_thread_no;
3460 			thr_parameters[2] = &task;
3461 			thr_parameters[3] = (void *)&global_context -> thread_initial_lock;
3462 			thr_parameters[4] = ret_values + current_thread_no;
3463 
3464 			pthread_create(&thread_contexts[current_thread_no].thread, NULL,  run_in_thread, &thr_parameters);
3465 		}
3466 
3467 		for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++) {
3468 			pthread_join(thread_contexts[current_thread_no].thread, NULL);
3469 
3470 			if(STEP_ITERATION_TWO == task){
3471 				global_context -> all_mapped_reads += thread_contexts[current_thread_no].all_mapped_reads;
3472 				global_context -> all_correct_PE_reads += thread_contexts[current_thread_no].all_correct_PE_reads;
3473 				global_context -> not_properly_pairs_wrong_arrangement += thread_contexts[current_thread_no].not_properly_pairs_wrong_arrangement;
3474 				global_context -> not_properly_pairs_different_chro += thread_contexts[current_thread_no].not_properly_pairs_different_chro;
3475 				global_context -> not_properly_different_strands += thread_contexts[current_thread_no].not_properly_different_strands;
3476 				global_context -> not_properly_pairs_TLEN_wrong += thread_contexts[current_thread_no].not_properly_pairs_TLEN_wrong;
3477 				global_context -> all_unmapped_reads += thread_contexts[current_thread_no].all_unmapped_reads;
3478 				global_context -> not_properly_pairs_only_one_end_mapped += thread_contexts[current_thread_no].not_properly_pairs_only_one_end_mapped;
3479 				global_context -> all_multimapping_reads += thread_contexts[current_thread_no].all_multimapping_reads;
3480 				global_context -> all_uniquely_mapped_reads += thread_contexts[current_thread_no].all_uniquely_mapped_reads;
3481 
3482 			}
3483 			if(STEP_VOTING == task) subread_free_topKbuff(global_context,&thread_contexts[current_thread_no].topKbuff);
3484 			ret_value += *(ret_values + current_thread_no);
3485 			if(ret_value)break;
3486 		}
3487 
3488 		// sort and merge events from all threads and the global event space.
3489 		finalise_indel_and_junction_thread(global_context, thread_contexts, task);
3490 		if(STEP_VOTING == task)
3491 			// sort the event entry table at each location.
3492 			sort_junction_entry_table(global_context);
3493 	}
3494 
3495 	if(CORE_SOFT_BR_CHAR == '\r')
3496 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "");
3497 	return ret_value;
3498 }
3499 
3500 void clean_context_after_chunk(global_context_t * context)
3501 {
3502 	context -> running_processed_reads_in_chunk = 0;
3503 	context -> processed_reads_in_chunk = 0;
3504 	init_bigtable_results(context, 1);
3505 
3506 	indel_context_t * indel_context = (indel_context_t *)context -> module_contexts[MODULE_INDEL_ID];
3507 	chromosome_event_t * event_space = indel_context -> event_space_dynamic;
3508 
3509 	int event_id;
3510 	//memset(context -> big_margin_record  , 0 , sizeof(*context -> big_margin_record) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.big_margin_record_size);
3511 	for(event_id = 0; event_id < indel_context->total_events; event_id++){
3512 		chromosome_event_t * event_body = event_space + event_id;
3513 		event_body -> critical_read_id = 0xffffffffffffffffllu;
3514 	}
3515 }
3516 
3517 #define SKIP_CORE_NOEMPTY(fp_loc, buf_loc)	{ while(1){char *ret_loc = fgets(buf_loc, 3000, fp_loc); if(buf_loc[0]!='\n' || !ret_loc) break; } }
3518 
3519 void locate_read_files(global_context_t * global_context, int type)
3520 {
3521 	// The BCL input module uses its own chunking algorithm.
3522 	if(global_context -> input_reads.first_read_file.file_type == GENE_INPUT_BCL) return;
3523 
3524 	if(type==SEEK_SET) {
3525 		geinput_tell(&global_context -> input_reads.first_read_file, &global_context -> current_circle_start_position_file1);
3526 		if(global_context ->input_reads.is_paired_end_reads)
3527 			geinput_tell(&global_context -> input_reads.second_read_file, &global_context -> current_circle_start_position_file2);
3528 	} else {
3529 		geinput_tell(&global_context -> input_reads.first_read_file, &global_context -> current_circle_end_position_file1);
3530 		if(global_context ->input_reads.is_paired_end_reads)
3531 			geinput_tell(&global_context -> input_reads.second_read_file, &global_context -> current_circle_end_position_file2);
3532 	}
3533 	if(global_context -> input_reads.first_read_file.file_type == GENE_INPUT_SCRNA_FASTQ) return;
3534 	if(global_context -> input_reads.first_read_file.file_type == GENE_INPUT_SCRNA_BAM) return;
3535 
3536 	// This variable is only used to display the running progress.
3537 	// That is why the scRNA-seq mode doesn't need to keep the start pos.
3538 	if(type==SEEK_SET)
3539 		global_context -> current_circle_start_abs_offset_file1 = geinput_file_offset(&(global_context -> input_reads.first_read_file));
3540 
3541 }
3542 
3543 void rewind_read_files(global_context_t * global_context, int type)
3544 {
3545 	if(type==SEEK_SET)
3546 	{
3547 		geinput_seek(&global_context -> input_reads.first_read_file, & global_context -> current_circle_start_position_file1);
3548 		if(global_context ->input_reads.is_paired_end_reads)
3549 			geinput_seek(&global_context -> input_reads.second_read_file, & global_context -> current_circle_start_position_file2);
3550 	}
3551 	else
3552 	{
3553 		geinput_seek(&global_context -> input_reads.first_read_file, & global_context -> current_circle_end_position_file1);
3554 		if(global_context ->input_reads.is_paired_end_reads)
3555 			geinput_seek(&global_context -> input_reads.second_read_file, & global_context -> current_circle_end_position_file2);
3556 
3557 	}
3558 }
3559 
3560 void go_chunk_start( global_context_t * global_context ){
3561 	if(global_context -> input_reads.first_read_file.file_type == GENE_INPUT_BCL)
3562 		cacheBCL_go_chunk_start(&global_context -> input_reads.first_read_file.bcl_input);
3563 	else
3564 		rewind_read_files(global_context, SEEK_SET);
3565 	global_context -> running_processed_reads_in_chunk=0;
3566 }
3567 
3568 void go_chunk_nextchunk( global_context_t * global_context ){
3569 	if(global_context -> input_reads.first_read_file.file_type == GENE_INPUT_BCL)
3570 		cacheBCL_go_chunk_end(&global_context -> input_reads.first_read_file.bcl_input);
3571 	else
3572 		rewind_read_files(global_context, SEEK_END);
3573 	global_context -> running_processed_reads_in_chunk=0;
3574 }
3575 
3576 int read_chunk_circles(global_context_t *global_context)
3577 {
3578 	int block_no;
3579 
3580 //	printf("GINP1 AT %llu\n", ftello(global_context -> input_reads.first_read_file.input_fp));
3581 
3582 	unsigned int chunk_no = 0;
3583 
3584 	global_context -> current_index = (gehash_t*) malloc(sizeof(gehash_t));
3585 	global_context -> current_value_index = global_context -> all_value_indexes;
3586 	global_context -> running_processed_reads_in_chunk=0;
3587 	global_context -> processed_reads_in_chunk=0;
3588 
3589 	double time_load_index = miltime();
3590 	for(block_no = 0; block_no< global_context->index_block_number; block_no++)
3591 	{
3592 		char tmp_fname[MAX_FILE_NAME_LENGTH+ 30];
3593 		sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, block_no,  global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
3594 		if(gvindex_load(&global_context -> all_value_indexes[block_no], tmp_fname)) return -1;
3595 	}
3596 	double period_load_index = miltime() - time_load_index;
3597 	global_context -> timecost_load_index += period_load_index;
3598 
3599 	while(1)
3600 	{
3601 		int ret;
3602 
3603 		locate_read_files(global_context, SEEK_SET);
3604 		for(global_context->current_index_block_number = 0; global_context->current_index_block_number < global_context->index_block_number; global_context->current_index_block_number++)
3605 		{
3606 			char tmp_fname[MAX_FILE_NAME_LENGTH+30];
3607 			time_load_index = miltime();
3608 			if(global_context->index_block_number > 1 || chunk_no == 0)
3609 			{
3610 				sprintf(tmp_fname, "%s.%02d.%c.tab", global_context->config.index_prefix, global_context->current_index_block_number,  global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
3611 				print_in_box(80,0,0, "Load the %d-th index block...",1+ global_context->current_index_block_number);
3612 
3613 				if(gehash_load(global_context -> current_index, tmp_fname)) return -1;
3614 				print_in_box(80,0,0, "The index block has been loaded.");
3615 				sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, global_context->current_index_block_number, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
3616 			}
3617 			period_load_index = miltime() - time_load_index;
3618 			global_context -> timecost_load_index += period_load_index;
3619 			global_context -> current_value_index = global_context -> all_value_indexes + global_context->current_index_block_number;
3620 
3621 			if(global_context->current_index_block_number ==0 && global_context -> all_processed_reads==0)
3622 				global_context->align_start_time = miltime();
3623 
3624 			if(global_context->index_block_number == global_context->current_index_block_number + 1)
3625 				global_context -> is_final_voting_run = 1;
3626 			else	global_context -> is_final_voting_run = 0;
3627 
3628 			print_in_box(80,0,0, "Start read mapping in chunk.");
3629 			double time_start_voting = miltime();
3630 			ret = run_maybe_threads(global_context, STEP_VOTING);
3631 			double period_voting = miltime() - time_start_voting;
3632 			global_context -> timecost_voting += period_voting;
3633 
3634 			if(0 == global_context->current_index_block_number){
3635 				locate_read_files(global_context, SEEK_END);
3636 				global_context -> processed_reads_in_chunk = global_context -> running_processed_reads_in_chunk;
3637 			}
3638 
3639 			if(global_context->current_index_block_number < global_context->index_block_number -1)
3640 				go_chunk_start(global_context);
3641 			//	rewind_read_files(global_context, SEEK_SET);
3642 
3643 			int is_last_chunk = global_context -> processed_reads_in_chunk < global_context->config.reads_per_chunk;
3644 
3645 			if(global_context->index_block_number > 1 || is_last_chunk)
3646 				gehash_destory_fast(global_context -> current_index);
3647 
3648 			if(ret) break;
3649 			if(!global_context -> processed_reads_in_chunk) break;
3650 		}
3651 
3652 
3653 		// after the voting step, all subread index blocks are released and all base index blocks are loaded at once.
3654 		double time_before_realign = miltime();
3655 
3656 		if(0 == chunk_no && global_context->config.exon_annotation_file[0] && global_context->config.do_breakpoint_detection){
3657 			ret = ret || load_known_junctions(global_context);
3658 			if(!ret){
3659 				// sort and merge events from all threads and the global event space.
3660 				sort_global_event_table(global_context);
3661 				// sort the event entry table at each location.
3662 				sort_junction_entry_table(global_context);
3663 			}
3664 		}
3665 
3666 		ret = ret || anti_supporting_read_scan(global_context);
3667 		remove_neighbour(global_context);
3668 
3669 		go_chunk_start(global_context);
3670 	//	rewind_read_files(global_context, SEEK_SET);
3671 		double period_before_realign = miltime() - time_before_realign;
3672 		global_context -> timecost_before_realign += period_before_realign;
3673 
3674 		double time_realign = miltime();
3675 		ret = ret || run_maybe_threads(global_context, STEP_ITERATION_TWO);
3676 		double period_realign = miltime() - time_realign;
3677 
3678 		global_context -> timecost_for_realign += period_realign;
3679 
3680 		if(global_context -> config.is_third_iteration_running)
3681 		{
3682 			go_chunk_start(global_context);
3683 			//rewind_read_files(global_context, SEEK_SET);
3684 			ret = ret || do_iteration_three(global_context, NULL);
3685 		}
3686 
3687 		go_chunk_nextchunk(global_context);
3688 		//rewind_read_files(global_context, SEEK_END);
3689 
3690 		global_context -> all_processed_reads+= global_context ->processed_reads_in_chunk;
3691 
3692 		if(ret) return ret;
3693 
3694 		if(global_context -> processed_reads_in_chunk < global_context->config.reads_per_chunk ||
3695 		  (global_context -> config.is_BAM_output && global_context -> output_bam_writer -> is_internal_error) || global_context -> input_reads.is_internal_error  ||
3696 		  (global_context -> output_sam_is_full))
3697 			// base value indexes loaded in the last circle are not destroyed and are used in writting the indel VCF.
3698 			// the indexes will be destroyed in destroy_global_context
3699 			break;
3700 
3701 		clean_context_after_chunk(global_context);
3702 		chunk_no++;
3703 	}
3704 
3705 	free(global_context -> current_index);
3706 
3707 	if(global_context -> config.is_third_iteration_running)
3708 		finalise_long_insertions(global_context);
3709 
3710 	return 0;
3711 }
3712 
3713 void char_strftime(char * tbuf){
3714 	time_t rawtime;
3715 	struct tm * timeinfo;
3716 
3717 	time (&rawtime);
3718 	timeinfo = localtime (&rawtime);
3719 	strftime (tbuf,80,"%d-%b-%Y %H:%M:%S",timeinfo);
3720 
3721 }
3722 
3723 void print_subread_logo()
3724 {
3725 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m ========== %c[0m%c[36m    _____ _    _ ____  _____  ______          _____  ", CHAR_ESC, CHAR_ESC, CHAR_ESC);
3726 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m =====      %c[0m%c[36m   / ____| |  | |  _ \\|  __ \\|  ____|   /\\   |  __ \\ ", CHAR_ESC, CHAR_ESC, CHAR_ESC);
3727 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m   =====    %c[0m%c[36m  | (___ | |  | | |_) | |__) | |__     /  \\  | |  | |", CHAR_ESC, CHAR_ESC, CHAR_ESC);
3728 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m     ====   %c[0m%c[36m   \\___ \\| |  | |  _ <|  _  /|  __|   / /\\ \\ | |  | |", CHAR_ESC, CHAR_ESC, CHAR_ESC);
3729 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m       ==== %c[0m%c[36m   ____) | |__| | |_) | | \\ \\| |____ / ____ \\| |__| |", CHAR_ESC, CHAR_ESC, CHAR_ESC);
3730 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %c[44;37m ========== %c[0m%c[36m  |_____/ \\____/|____/|_|  \\_\\______/_/    \\_\\_____/%c[0m", CHAR_ESC, CHAR_ESC, CHAR_ESC, CHAR_ESC);
3731 	#ifdef MAKE_STANDALONE
3732 	char * spaces = "";
3733 	if(strlen(SUBREAD_VERSION) == 8) spaces = "";
3734 	else if(strlen(SUBREAD_VERSION) == 5) spaces = "  ";
3735 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"	%sv%s",spaces,SUBREAD_VERSION);
3736 	#else
3737 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO ,"       %s",SUBREAD_VERSION);
3738 	#endif
3739 }
3740 
3741 
3742 
3743 int print_configuration(global_context_t * context)
3744 {
3745 	setlocale(LC_NUMERIC, "");
3746 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
3747 	print_subread_logo();
3748 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
3749 	print_in_box(80, 1, 1, context->config.entry_program_name == CORE_PROGRAM_SUBJUNC?"setting":"setting");
3750 	print_in_box(80, 0, 1, "");
3751 
3752 	if(context->config.do_breakpoint_detection)
3753 	{
3754 	    if(context-> config.do_fusion_detection)
3755 			print_in_box(80, 0, 0, "Function      : Read alignment + Junction/Fusion detection%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
3756 	    else if(context-> config.do_long_del_detection)
3757 			print_in_box(80, 0, 0, "Function      : Read alignment + Long Deletion detection%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
3758 		else
3759 			print_in_box(80, 0, 0, "Function      : Read alignment + Junction detection (%s)", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?"DNA-Seq":"RNA-Seq");
3760 	}
3761 	else
3762 	        print_in_box(80, 0, 0, "Function      : Read alignment%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
3763 	if( context->config.second_read_file[0]){
3764 	        print_in_box(80, 0, 0, "Input file 1  : %s", get_short_fname(context->config.first_read_file));
3765 	        print_in_box(80, 0, 0, "Input file 2  : %s", get_short_fname(context->config.second_read_file));
3766 	}else if(context->config.scRNA_input_mode == GENE_INPUT_SCRNA_FASTQ){
3767 			const char *strmm_tmp = context->config.first_read_file;
3768 			int sample_no = 1;
3769 			while((strmm_tmp = strstr(strmm_tmp, SCRNA_FASTA_SPLIT1))!=NULL) {
3770 				sample_no++;
3771 				strmm_tmp++;
3772 			}
3773 	        print_in_box(80, 0, 0, "Input file    : %d samples from scRNA-seq", sample_no);
3774 	}else if(context->config.scRNA_input_mode == GENE_INPUT_BCL){
3775 	        print_in_box(80, 0, 0, "Input file    : %s%s", get_short_fname(context->config.first_read_file), " (scRNA)");
3776 	}else if(context->config.scRNA_input_mode == GENE_INPUT_SCRNA_BAM){
3777 	        print_in_box(80, 0, 0, "Input file    : %s%s", get_short_fname(context->config.first_read_file), " (10X BAM)");
3778 	}else{
3779 	        print_in_box(80, 0, 0, "Input file    : %s%s", get_short_fname(context->config.first_read_file), context->config.is_SAM_file_input?(context->config.is_BAM_input?" (BAM)":" (SAM)"):(""));
3780 	}
3781 
3782 	if(context->config.output_prefix [0])
3783 	        print_in_box(80, 0, 0, "Output file   : %s (%s)%s", get_short_fname(context->config.output_prefix), context->config.is_BAM_output?"BAM":"SAM", context->config.is_input_read_order_required?", Keep Order":(context->config.sort_reads_by_coordinates?", Sorted":""));
3784 	else
3785 	        print_in_box(80, 0, 0, "Output method : STDOUT (%s)" , context->config.is_BAM_output?"BAM":"SAM");
3786 
3787 	print_in_box(80, 0, 0,         "Index name    : %s", get_short_fname(context->config.index_prefix));
3788 	print_in_box(80, 0, 0, "");
3789 	print_in_box(80, 0, 1, "------------------------------------");
3790 	print_in_box(80, 0, 0, "");
3791 	print_in_box(80, 0, 0, "                              Threads : %d", context->config.all_threads);
3792 	print_in_box(80, 0, 0, "                         Phred offset : %d", (context->config.phred_score_format == FASTQ_PHRED33)?33:64);
3793 	if( context->config.second_read_file[0])
3794 	{
3795 	print_in_box(80, 0, 0, "              # of extracted subreads : %d", context->config.total_subreads);
3796 	print_in_box(80, 0, 0, "                       Min read1 vote : %d", context->config.minimum_subread_for_first_read);
3797 	print_in_box(80, 0, 0, "                       Min read2 vote : %d", context->config.minimum_subread_for_second_read);
3798 	print_in_box(80, 0, 0, "                    Max fragment size : %d", context->config.maximum_pair_distance);
3799 	print_in_box(80, 0, 0, "                    Min fragment size : %d", context->config.minimum_pair_distance);
3800 	}
3801 	else
3802 	print_in_box(80, 0, 0, "                            Min votes : %d / %d", context->config.minimum_subread_for_first_read, context->config.total_subreads);
3803 
3804 	print_in_box(80, 0, 0, "                       Max mismatches : %d", context->config.max_mismatch_exonic_reads);
3805 	print_in_box(80, 0, 0, "                     Max indel length : %d", context->config.max_indel_length);
3806 	print_in_box(80, 0, 0, "           Report multi-mapping reads : %s", context->config.report_multi_mapping_reads?"yes":"no");
3807 	print_in_box(80, 0, 0, "Max alignments per multi-mapping read : %d", context->config.multi_best_reads);
3808 
3809 	if(context->config.exon_annotation_file[0]){
3810 	if(context->config.exon_annotation_file_screen_out[0])
3811 	print_in_box(80, 0, 0, "                          Annotations : %s", context->config.exon_annotation_file_screen_out);
3812 	else
3813 	print_in_box(80, 0, 0, "                          Annotations : %s (%s)",get_short_fname( context->config.exon_annotation_file), context->config.exon_annotation_file_type==FILE_TYPE_GTF?"GTF":"SAF");
3814 	}
3815 
3816 	if(context->config.max_insertion_at_junctions)
3817 	print_in_box(80, 0, 0, "                   Insertions at junc : %d", context->config.max_insertion_at_junctions);
3818 
3819 	if(context->config.read_group_id[0])
3820 	print_in_box(80, 0, 0, "                      Read group name : %s", context->config.read_group_id);
3821 
3822 	print_in_box(80, 0, 1, "");
3823 	print_in_box(80, 2, 1, "");
3824 	sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
3825 
3826 
3827 	if(!context->config.experiment_type){
3828 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify the experiment type by using the '-t' option.\n");
3829 		return -1;
3830 	}
3831 
3832 	if(!context->config.first_read_file[0])
3833 	{
3834 	        sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify at least one input file in the FASTQ/FASTA/PLAIN format using the '-r' option.\n");
3835 	        return -1;
3836 	}
3837 
3838 	if(0 && !context->config.output_prefix[0])
3839 	{
3840 	        sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify the path of output using the '-o' option.\n");
3841 	        return -1;
3842 	}
3843 
3844 	if(!context->config.index_prefix[0])
3845 	{
3846 	        sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify the prefix of the index files using the '-i' option.\n");
3847 	        return -1;
3848 	}
3849 	char tbuf[90];
3850 	char_strftime(tbuf);
3851 
3852 	print_in_box(80,1,1,"Running (%s, pid=%d)", tbuf, getpid());
3853 	print_in_box(80,0,1,"");
3854 
3855 
3856 	return 0;
3857 }
3858 
3859 
3860 
3861 int init_paired_votes(global_context_t *context)
3862 {
3863 	init_bigtable_results(context, 0);
3864 	return 0;
3865 }
3866 
3867 #define FETCH_SEQ_LEN(x1) (context->chromosome_table.read_offsets[xk1] - last_offset+16 - ( 2 * context->chromosome_table.padding ))
3868 
3869 void write_sam_headers(global_context_t * context)
3870 {
3871 	char *  sorting_str = context -> config.sort_reads_by_coordinates?"coordinate":"unsorted";
3872 	if(context -> config.is_BAM_output)
3873 	{
3874 		char header_buff[100];
3875 		sprintf(header_buff, "@HD\tVN:1.0\tSO:%s", sorting_str);
3876 		SamBam_writer_add_header(context -> output_bam_writer, header_buff, 0);
3877 		int xk1;
3878 		int last_offset = 0;
3879 		char * obuf = malloc(10000+5000);
3880 		for(xk1=0; xk1< context->chromosome_table.total_offsets; xk1++)
3881 		{
3882 			int seq_len = FETCH_SEQ_LEN(x1);
3883 			//context->chromosome_table.read_offsets[xk1] - last_offset+16
3884 			SamBam_writer_add_chromosome(context -> output_bam_writer, context->chromosome_table.read_names+ xk1 * MAX_CHROMOSOME_NAME_LEN, seq_len, 1);
3885 			last_offset = context->chromosome_table.read_offsets[xk1];
3886 		}
3887 
3888 
3889 		if(context->config.read_group_id[0])
3890 		{
3891 			snprintf(obuf,10000, "@RG\tID:%s%s",context->config.read_group_id, context->config.read_group_txt);
3892 			SamBam_writer_add_header(context -> output_bam_writer,obuf, 0);
3893 		}
3894 		snprintf(obuf,9999+4900, "@PG\tID:subread\tPN:subread\tVN:%s\tCL:%s", SUBREAD_VERSION, context->rebuilt_command_line);
3895 		SamBam_writer_add_header(context -> output_bam_writer,obuf, 0);
3896 		SamBam_writer_finish_header(context -> output_bam_writer);
3897 		free(obuf);
3898 	}
3899 	else
3900 	{
3901 		sambamout_fprintf(context -> output_sam_fp, "@HD\tVN:1.0\tSO:%s\n", sorting_str);
3902 		int xk1;
3903 		int last_offset = 0;
3904 		for(xk1=0; xk1< context->chromosome_table.total_offsets; xk1++)
3905 		{
3906 			int seq_len = FETCH_SEQ_LEN(x1);
3907 			sambamout_fprintf(context -> output_sam_fp, "@SQ\tSN:%s\tLN:%u\n", context->chromosome_table.read_names+ xk1 * MAX_CHROMOSOME_NAME_LEN, seq_len);
3908 			last_offset = context->chromosome_table.read_offsets[xk1];
3909 		}
3910 
3911 		if(context->config.read_group_id[0])
3912 			sambamout_fprintf(context -> output_sam_fp, "@RG\tID:%s%s\n",context->config.read_group_id, context->config.read_group_txt);
3913 		sambamout_fprintf(context -> output_sam_fp, "@PG\tID:subread\tPN:subread\tVN:%s\tCL:%s\n", SUBREAD_VERSION, context->rebuilt_command_line);
3914 
3915 	}
3916 }
3917 
3918 #define EXONIC_REGION_RESOLUTION 16
3919 
3920 int is_pos_in_annotated_exon_regions(global_context_t * global_context, unsigned int pos){
3921 	int exonic_map_byte = pos / EXONIC_REGION_RESOLUTION / 8;
3922 	int exonic_map_bit = (pos / EXONIC_REGION_RESOLUTION) % 8;
3923 
3924 	return (global_context ->exonic_region_bitmap [exonic_map_byte] & (1<<exonic_map_bit))?1:0;
3925 
3926 }
3927 
3928 char * get_sam_chro_name_from_alias(HashTable * tab, char * anno_chro){
3929 	KeyValuePair * cursor = NULL;
3930 	long x1;
3931 	for(x1 = 0; x1 < tab -> numOfBuckets; x1 ++){
3932 		cursor = tab -> bucketArray[x1];
3933 		while(1){
3934 			if(NULL == cursor)break;
3935 			char * tab_anno_chro = cursor -> value;
3936 			if(strcmp(tab_anno_chro, anno_chro) == 0) return (char *)cursor -> key;
3937 			cursor = cursor -> next;
3938 		}
3939 	}
3940 	return NULL;
3941 }
3942 
3943 int do_anno_bitmap_add_feature(char * gene_name, char * tracnscript_id, char * chro_name, unsigned int feature_start, unsigned int feature_end, int is_negative_strand, void * context){
3944 	global_context_t * global_context = context;
3945 	if(global_context -> sam_chro_to_anno_chr_alias){
3946 		char * sam_chro = get_sam_chro_name_from_alias(global_context -> sam_chro_to_anno_chr_alias, chro_name);
3947 		if(sam_chro!=NULL) chro_name = sam_chro;
3948 	}
3949 
3950 	if(!HashTableGet(global_context -> annotation_chro_table,chro_name))
3951 		HashTablePut(global_context -> annotation_chro_table, memstrcpy( chro_name ), NULL+1);
3952 
3953 	char tmp_chro_name[MAX_CHROMOSOME_NAME_LEN];
3954 	int access_n = HashTableGet( global_context -> chromosome_table.read_name_to_index, chro_name ) - NULL;
3955 	if(access_n < 1){
3956 		if(chro_name[0]=='c' && chro_name[1]=='h' && chro_name[2]=='r'){
3957 			chro_name += 3;
3958 		}else{
3959 			strcpy(tmp_chro_name, "chr");
3960 			strcat(tmp_chro_name, chro_name);
3961 			chro_name = tmp_chro_name;
3962 		}
3963 	}
3964 
3965 	unsigned int exonic_map_start = linear_gene_position(&global_context->chromosome_table , chro_name, feature_start);
3966 	unsigned int exonic_map_stop = linear_gene_position(&global_context->chromosome_table , chro_name, feature_end);
3967 	if(exonic_map_start > 0xffffff00 || exonic_map_stop > 0xffffff00){
3968 		return -1;
3969 	}
3970 
3971 	exonic_map_start -= exonic_map_start%EXONIC_REGION_RESOLUTION;
3972 	exonic_map_stop -= exonic_map_stop%EXONIC_REGION_RESOLUTION;
3973 
3974 	for(; exonic_map_start <= exonic_map_stop; exonic_map_start+=EXONIC_REGION_RESOLUTION){
3975 		int exonic_map_byte = exonic_map_start / EXONIC_REGION_RESOLUTION / 8;
3976 		int exonic_map_bit = (exonic_map_start / EXONIC_REGION_RESOLUTION) % 8;
3977 
3978 		global_context ->exonic_region_bitmap[exonic_map_byte] |= (1<<exonic_map_bit);
3979 	}
3980 	return 0;
3981 }
3982 
3983 void convert_table_key_to_alias(void * ky ,void * va, HashTable * tab){
3984 	HashTable * alias_index_to_annot = tab -> appendix2;
3985 	HashTable * result_annot_chros = tab -> appendix1;
3986 
3987 	char * index_name = ky;
3988 	char * annot_name = HashTableGet(alias_index_to_annot, index_name);
3989 	if(annot_name==NULL) annot_name = index_name;
3990 	HashTablePut( result_annot_chros, annot_name, va );
3991 }
3992 
3993 void warning_anno_vs_index(HashTable * anno_chros_tab, gene_offset_t * index_chros_offset, HashTable * alias_index_to_annot){
3994 
3995 	HashTable * index_chros_tab = index_chros_offset -> read_name_to_index;
3996 	HashTable * chros_in_annot_using_index_names = anno_chros_tab ;
3997 	HashTable * chros_in_index_using_annot_names = index_chros_tab ;
3998 	HashTable * alias_annot_to_index = NULL;
3999 
4000 	if(alias_index_to_annot){
4001 		chros_in_annot_using_index_names = StringTableCreate(1000);
4002 		chros_in_index_using_annot_names = StringTableCreate(1000);
4003 
4004 		alias_annot_to_index = StringTableReverse(alias_index_to_annot);
4005 		anno_chros_tab -> appendix1 = chros_in_annot_using_index_names;
4006 		anno_chros_tab -> appendix2 = alias_annot_to_index;
4007 		HashTableIteration(anno_chros_tab, convert_table_key_to_alias);
4008 
4009 		index_chros_tab -> appendix1 = chros_in_index_using_annot_names;
4010 		index_chros_tab -> appendix2 = alias_index_to_annot;
4011 		HashTableIteration(index_chros_tab, convert_table_key_to_alias);
4012 	}
4013 
4014 	warning_hash_hash(anno_chros_tab, chros_in_index_using_annot_names, "Chromosomes/contigs in annotation but not in index :");
4015 	warning_hash_hash(index_chros_tab, chros_in_annot_using_index_names, "Chromosomes/contigs in index but not in annotation :");
4016 
4017 	if(alias_index_to_annot){
4018 		HashTableDestroy(alias_annot_to_index);
4019 		HashTableDestroy(chros_in_annot_using_index_names);
4020 		HashTableDestroy(chros_in_index_using_annot_names);
4021 	}
4022 }
4023 
4024 int load_annotated_exon_regions(global_context_t * global_context){
4025 	int bitmap_size = (4096 / EXONIC_REGION_RESOLUTION / 8)*1024*1024;
4026 	global_context ->exonic_region_bitmap = malloc(bitmap_size);
4027 	memset( global_context ->exonic_region_bitmap , 0, bitmap_size );
4028 	global_context -> annotation_chro_table = HashTableCreate(1003);
4029 	HashTableSetDeallocationFunctions( global_context -> annotation_chro_table, free, NULL);
4030 	HashTableSetKeyComparisonFunction( global_context -> annotation_chro_table, my_strcmp);
4031 	HashTableSetHashFunction( global_context -> annotation_chro_table, fc_chro_hash);
4032 
4033 	int loaded_features = load_features_annotation(global_context->config.exon_annotation_file, global_context->config.exon_annotation_file_type, global_context->config.exon_annotation_gene_id_column, NULL, global_context->config.exon_annotation_feature_name_column, global_context, do_anno_bitmap_add_feature);
4034 	if(loaded_features < 0)return -1;
4035 	else print_in_box(80,0,0,"%d annotation records were loaded.\n", loaded_features);
4036 	return 0;
4037 }
4038 
4039 int load_global_context(global_context_t * context)
4040 {
4041 	char tmp_fname [MAX_FILE_NAME_LENGTH + 50];
4042 	int min_phred_score = -1 , max_phred_score = -1;
4043 	context -> is_phred_warning = 0;
4044 
4045 	if(context->config.multi_best_reads>1 && ! context->config.report_multi_mapping_reads){
4046 		print_in_box(80,0,0,"WARNING: Multi-mapping reads are reported.");
4047 		context->config.report_multi_mapping_reads = 1;
4048 	}
4049 
4050 	if(context->config.scRNA_input_mode){
4051 		context -> config.multi_best_reads = 3;
4052 		context -> config.do_remove_neighbour_for_scRNA = 1;
4053 		context -> config.reads_per_chunk = 30000000*context -> config.multi_best_reads;
4054 		context -> config.multi_best_reads = max(context -> config.multi_best_reads , context -> config.reported_multi_best_reads);
4055 		// opening a BCL input needs the exact chunk size.
4056 		if(context->config.multi_best_reads>1) context -> config.reads_per_chunk /= context->config.multi_best_reads;
4057 		char * reads_per_chunk = getenv("CC_READS_PER_CHUNK");
4058 		char * remove_beighbour = getenv("CC_REMOVE_NEIGHBOUR");
4059 		if(remove_beighbour) context -> config.do_remove_neighbour_for_scRNA = remove_beighbour[0]-'0';
4060 		if(reads_per_chunk) context -> config.reads_per_chunk = atoi(reads_per_chunk);
4061 	}
4062 	print_in_box(80,0,0,"Check the input reads.");
4063 	subread_init_lock(&context->input_reads.input_lock);
4064 	if(core_geinput_open(context, &context->input_reads.first_read_file, 1)) {
4065 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable to open the input file '%s'\n", context->config.first_read_file);
4066 		return -1;
4067 	}
4068 
4069 	context->config.space_type = context->input_reads.first_read_file.space_type;
4070 	print_in_box(89,0,0,"The input file contains %c[36m%s%c[0m space reads.", CHAR_ESC, context->config.space_type == GENE_SPACE_COLOR?"color":"base", CHAR_ESC);
4071 	if(context->config.space_type == GENE_SPACE_COLOR && context->config.is_BAM_output && !context->config.convert_color_to_base)
4072 	{
4073 		print_in_box(80,0,0,"The color-space bases will be converted into base space in the BAM output.");
4074 		context->config.convert_color_to_base=1;
4075 	}
4076 	else if(context->config.space_type == GENE_SPACE_BASE && context->config.convert_color_to_base)
4077 	{
4078 		print_in_box(80,0,0,"The reads will not be converted into base space.");
4079 		context->config.convert_color_to_base=0;
4080 	}
4081 
4082 
4083 
4084 	if(context->input_reads.is_paired_end_reads)
4085 	{
4086 		if(core_geinput_open(context, &context->input_reads.second_read_file, 2))
4087 		{
4088 			//sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable to open '%s' as input. Please check if it exists, you have the permission to read it, and it is in the correct format.\n", context->config.second_read_file);
4089 			return -1;
4090 		}
4091 
4092 		context -> config.max_vote_combinations = 3;
4093 		context -> config.multi_best_reads = 3;
4094 		context -> config.max_vote_simples = 64;
4095 		context -> config.max_vote_number_cutoff = 2;
4096 	}else{
4097 		context -> config.max_vote_combinations = 3;
4098 		context -> config.multi_best_reads = 3;
4099 		context -> config.max_vote_simples = 3;
4100 		context -> config.max_vote_number_cutoff = 2;
4101 	}
4102 
4103 	print_in_box(80,0,0,"Initialise the memory objects.");
4104 	context -> config.max_vote_simples = max(context -> config.max_vote_simples ,  context -> config.reported_multi_best_reads);
4105 	context -> config.max_vote_combinations = max(context -> config.max_vote_combinations ,  context -> config.reported_multi_best_reads);
4106 
4107 	if(!context->config.scRNA_input_mode){
4108 		// if it is BCL input then these two parameters have been decided .
4109 		if(context->config.multi_best_reads>1) context->config.reads_per_chunk /= context->config.multi_best_reads;
4110 		if(context->input_reads.is_paired_end_reads) context->config.reads_per_chunk /= 2;
4111 	}
4112 
4113 	struct stat ginp1_stat;
4114 	int guess_tested_reads = 0;
4115 	stat(context->config.first_read_file , &ginp1_stat);
4116 	context->input_reads.first_read_file_size = ginp1_stat.st_size;
4117 
4118 	print_in_box(80,0,0,"Estimate the mean read length.");
4119 	context -> input_reads.avg_read_length = 100;
4120 	if(context->config.scRNA_input_mode != GENE_INPUT_SCRNA_BAM && context->config.scRNA_input_mode != GENE_INPUT_SCRNA_FASTQ) context -> input_reads.avg_read_length = guess_reads_density_format(context->config.first_read_file , context->config.is_SAM_file_input?1:0, &min_phred_score, &max_phred_score , &guess_tested_reads);
4121 	if(context -> input_reads.avg_read_length<0 )context -> input_reads.avg_read_length = 250;
4122 //	SUBREADprintf("QR=[%d,%d]; ALEN=%f\n",  min_phred_score, max_phred_score, context -> input_reads.avg_read_length);
4123 	if(max_phred_score>=0)
4124 	{
4125 		int inferred_offset;
4126 
4127 		if(abs(min_phred_score - 33) < abs(min_phred_score - 64))  inferred_offset = 33;
4128 		else	inferred_offset = 64;
4129 
4130 		if(context -> input_reads.first_read_file.file_type != GENE_INPUT_GZIP_FASTA){
4131 			if((context->config.phred_score_format == FASTQ_PHRED64 && inferred_offset == 33) ||
4132 			   (context->config.phred_score_format == FASTQ_PHRED33 && inferred_offset == 64))
4133 			{
4134 				print_in_box(80,0,0, "WARNING  - The specified Phred score offset (%d) seems incorrect.", context->config.phred_score_format == FASTQ_PHRED33?33:64);
4135 				print_in_box(80,0,0, "           ASCII values of the quality scores of read bases included in");
4136 				print_in_box(80,0,0, "           the first %d reads were found to be within the range of",guess_tested_reads);
4137 				print_in_box(80,0,0, "           [%d,%d].", min_phred_score, max_phred_score);
4138 				print_in_box(80,0,0, "");
4139 				context -> is_phred_warning = 1;
4140 			}
4141 			else{
4142 				print_in_box(80,0,0, "The range of Phred scores observed in the data is [%d,%d]", min_phred_score - inferred_offset, max_phred_score - inferred_offset);
4143 			}
4144 		}
4145 	}
4146 
4147 	subread_init_lock(&context -> output_lock);
4148 
4149 	if(context->config.report_sam_file && context -> config.output_prefix[0])
4150 	{
4151 		// ====== open output files ======
4152 		// Only the sam file is opened here; other files like bed, indel and etc are opened in init_modules()
4153 		sprintf(tmp_fname,"%s", context->config.output_prefix);
4154 
4155 		print_in_box(80,0,0,"Create the output %s file.", context -> config.is_BAM_output?"BAM":"SAM");
4156 		if(context -> config.is_BAM_output)
4157 		{
4158 			context -> output_bam_writer = malloc(sizeof(SamBam_Writer));
4159 			SamBam_writer_create(context -> output_bam_writer , tmp_fname, context -> config.is_input_read_order_required?1:context -> config.all_threads,  context -> config.sort_reads_by_coordinates, context->config.scRNA_input_mode, context -> config.temp_file_prefix);
4160 			context -> output_sam_fp = NULL;
4161 		}
4162 		else
4163 		{
4164 			context -> output_sam_fp = f_subr_open(tmp_fname,"wb");
4165 			//context -> output_sam_inner_buffer = malloc(OUTPUT_BUFFER_SIZE);
4166 			//setvbuf (context -> output_sam_fp, context -> output_sam_inner_buffer, _IOFBF, OUTPUT_BUFFER_SIZE);
4167 			context -> output_bam_writer = NULL;
4168 		}
4169 		if((!context -> output_bam_writer) && (!context->output_sam_fp))
4170 		{
4171 			sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable to open '%s' to write. Please check if the path exists and you have the permission to create/write this file", tmp_fname);
4172 			return -1;
4173 		}
4174 	}
4175 	else
4176 	{
4177 		if(context -> config.is_BAM_output)
4178 		{
4179 			context -> output_bam_writer = malloc(sizeof(SamBam_Writer));
4180 			SamBam_writer_create(context -> output_bam_writer ,NULL, context -> config.is_input_read_order_required?1:context -> config.all_threads,  context -> config.sort_reads_by_coordinates, 0,  context -> config.temp_file_prefix);
4181 		}
4182 		context->output_sam_fp = NULL;
4183 	}
4184 
4185 	// ====== check index files, count blocks and load chro table ======
4186 	print_in_box(80,0,0,"Check the index.");
4187 	sprintf(tmp_fname, "%s.reads", context->config.index_prefix);
4188 	if(!does_file_exist(tmp_fname))
4189 	{
4190 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable top open index '%s'. Please make sure that the correct prefix is specified and you have the permission to read these files. For example, if there are files '/opt/my_index.reads', '/opt/my_index.files' and etc, the index prefix should be specified as '/opt/my_index' without any suffix. \n", context->config.index_prefix);
4191 		return -1;
4192 	}
4193 
4194 	context->current_index_block_number = 0;
4195 	if(load_offsets(&context->chromosome_table, context->config.index_prefix)){
4196 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"\nThe index was built by using an old version of Subread; its format is no longer supported. Please use the current version of the index builder to rebuild it.\n");
4197 		return 1;
4198 	}
4199 
4200 	if(context->config.space_type == GENE_SPACE_COLOR)
4201 		sprintf(tmp_fname, "%s.00.c.tab", context->config.index_prefix);
4202 	else
4203 		sprintf(tmp_fname, "%s.00.b.tab", context->config.index_prefix);
4204 	if(!does_file_exist(tmp_fname))
4205 	{
4206 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Your reads are in the %s space but the index was not built in the same space. Unable to precess the reads.\n", context->config.space_type == GENE_SPACE_COLOR?"color":"base");
4207 		return -1;
4208 	}
4209 
4210 	context->index_block_number = 0;
4211 	while(1)
4212 	{
4213 		sprintf(tmp_fname, "%s.%02d.%c.tab", context->config.index_prefix, context->index_block_number, context->config.space_type == GENE_SPACE_COLOR?'c':'b');
4214 		if(!does_file_exist(tmp_fname))break;
4215 		context->index_block_number ++;
4216 		if(context->index_block_number>=2 && context->config.max_indel_length > 16)
4217 		{
4218 			print_in_box(80,0,0,"ERROR You cannot use multi-block index for very-long indel detection.");
4219 			print_in_box(80,0,0,"Please set the maximum indel length <= 16.");
4220 			return -1;
4221 		}
4222 	}
4223 
4224 	if(context->config.report_sam_file)
4225 		write_sam_headers(context);
4226 
4227 	print_in_box(80,0,0,"Init the voting space.");
4228 	// ====== init other variables ======
4229 	if(context -> config.all_threads>1)
4230 		subread_init_lock(&context -> thread_initial_lock);
4231 	else subread_init_topKbuff(context, &context -> topKbuff);
4232 
4233 	if(init_paired_votes(context))
4234 	{
4235 		sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Cannot initialise the voting space. You need at least 2GB of empty physical memory to run this program.\n");
4236 		return 1;
4237 	}
4238 	sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "load_global_context: finished");
4239 
4240 	memset( context->all_value_indexes , 0 , 100 * sizeof(gene_value_index_t));
4241 
4242 	context -> sam_chro_to_anno_chr_alias = NULL;
4243 	if(context->config.exon_annotation_file[0]){
4244 		print_in_box(80,0,0,"Load the annotation file.");
4245 		if( load_annotated_exon_regions( context ) ) return -1;
4246 		if(context->config.exon_annotation_alias_file[0])
4247 			 context -> sam_chro_to_anno_chr_alias = load_alias_table(context->config.exon_annotation_alias_file);
4248 		warning_anno_vs_index(context -> annotation_chro_table, &context -> chromosome_table, context -> sam_chro_to_anno_chr_alias);
4249 		HashTableDestroy(context -> annotation_chro_table);
4250 	} else context -> exonic_region_bitmap = NULL;
4251 	print_in_box(80,0,0,"Global environment is initialised.");
4252 	return 0;
4253 }
4254 
4255 
4256 int init_modules(global_context_t * context)
4257 {
4258 	sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: begin");
4259 	int ret = init_indel_tables(context);
4260 	if((context->config.do_breakpoint_detection || context-> config.do_fusion_detection || context->config.do_breakpoint_detection || context-> config.do_long_del_detection))
4261 		ret = ret || init_junction_tables(context);
4262 
4263 	sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: finished: %d",ret);
4264 	return ret;
4265 }
4266 
4267 int destroy_modules(global_context_t * context)
4268 {
4269 	destroy_indel_module(context);
4270 	if((context->config.do_breakpoint_detection || context-> config.do_fusion_detection || context->config.do_breakpoint_detection || context-> config.do_long_del_detection))
4271 		destroy_junction_tables(context);
4272 	return 0;
4273 }
4274 
4275 int destroy_global_context(global_context_t * context)
4276 {
4277 	int xk1, block_no, ret = 0;
4278 
4279 	if(context -> exonic_region_bitmap) free(context -> exonic_region_bitmap);
4280 
4281 	for(block_no = 0; block_no< context->index_block_number; block_no++)
4282 		gvindex_destory(&context -> all_value_indexes[block_no]);
4283 
4284 	if(context->config.all_threads<2){
4285 		subread_free_topKbuff( context,&context -> topKbuff );
4286 	}
4287 	if(context->output_sam_fp) {
4288 		if(context -> output_sam_is_full){
4289 			unlink(context->config.output_prefix);
4290 			SUBREADprintf("\nERROR: cannot finish the SAM file. Please check the disk space in the output directory.\nNo output file was generated.\n");
4291 			ret = 1;
4292 		}
4293 		fclose(context -> output_sam_fp);
4294 	}
4295 
4296 	if(context->input_reads.is_internal_error){
4297 		unlink(context->config.output_prefix);
4298 		return 1;
4299 	}
4300 
4301 	if(context->output_bam_writer) {
4302 		SamBam_writer_close(context->output_bam_writer);
4303 		if(context->output_bam_writer -> is_internal_error){
4304 			unlink(context->config.output_prefix);
4305 			SUBREADprintf("\nERROR: cannot finish the BAM file. Please check the disk space in the output directory.\nNo output file was generated.\n");
4306 			ret = 1;
4307 		}
4308 		free(context->output_bam_writer);
4309 		context->output_bam_writer=NULL;
4310 	}
4311 	//free(context->big_margin_record);
4312 
4313 	for(xk1=0; xk1<5; xk1++)
4314 		if(context->module_contexts[xk1])free(context->module_contexts[xk1]);
4315 	geinput_close(&context -> input_reads.first_read_file);
4316 	if(context->input_reads.is_paired_end_reads) geinput_close(&context -> input_reads.second_read_file);
4317 	destroy_offsets(&context->chromosome_table);
4318 	finalise_bigtable_results(context);
4319 
4320 	if((context -> will_remove_input_file & 1) && strstr(context ->config.first_read_file, "/core-temp")) unlink(context ->config.first_read_file);
4321 	if((context -> will_remove_input_file & 2) && strstr(context ->config.second_read_file, "/core-temp")) unlink(context ->config.second_read_file);
4322 	free(context -> rebuilt_command_line);
4323 
4324 	return ret;
4325 }
4326 
4327 void subread_rebuild_cmd(int argc, char ** argv, global_context_t * global_context){
4328 	global_context -> rebuilt_command_line_size = rebuild_command_line(&global_context -> rebuilt_command_line , argc, argv);
4329 }
4330 
4331 
4332 int write_bincigar_part(char * bincigar, int chropt, unsigned int optlen, int bincigar_len)
4333 {
4334 	int binopt, binbytes, x1;
4335 
4336 	if(optlen<1) return -1;
4337 
4338 	if(optlen < 4) binbytes=1;
4339 	else if(optlen < 1024) binbytes=2;
4340 	else if(optlen < 262144) binbytes=3;
4341 	else if(optlen < 67108864) binbytes=4;
4342 	else binbytes=5;
4343 
4344 	if(bincigar_len<binbytes) return -1;
4345 
4346 	switch(chropt)
4347 	{
4348 		case 'S':
4349 			binopt = CORE_CIGAR_OPT_S;
4350 			break;
4351 		case 'M':
4352 			binopt = CORE_CIGAR_OPT_M;
4353 			break;
4354 		case 'I':
4355 			binopt = CORE_CIGAR_OPT_I;
4356 			break;
4357 		case 'D':
4358 			binopt = CORE_CIGAR_OPT_D;
4359 			break;
4360 		case 'N':
4361 			binopt = CORE_CIGAR_OPT_N;
4362 			break;
4363 		case 'n':
4364 			binopt = CORE_CIGAR_OPT_NN;
4365 			break;
4366 		case 'B':
4367 			binopt = CORE_CIGAR_OPT_B;
4368 			break;
4369 		case 'b':
4370 			binopt = CORE_CIGAR_OPT_BB;
4371 			break;
4372 		default:
4373 			return -1;
4374 	}
4375 
4376 	bincigar[0]=binopt | (binbytes << 3) | ((optlen & 3)<< 6);
4377 	optlen >>= 2;
4378 	for(x1=1;x1<binbytes; x1++)
4379 	{
4380 		bincigar[x1] = optlen&0xff;
4381 		optlen>>=8;
4382 	}
4383 
4384 	return binbytes;
4385 }
4386 
4387 // function returns the actual length of bincigar, or -1 if anything is wrong, e.g., bincigar_len is too short or unrecognized operations.
4388 int OLD_cigar2bincigar(char *cigar, char *bincigar, int bincigar_len)
4389 {
4390 	int xk1=0;
4391 	unsigned int tmpv=0, bincigar_cursor=0;
4392 	while(1)
4393 	{
4394 		int nch = cigar[xk1];
4395 		if(!nch) break;
4396 		xk1++;
4397 
4398 		if(isdigit(nch)) tmpv=tmpv*10+(nch-'0');
4399 		else
4400 		{
4401 			int bincigar_sec_len = write_bincigar_part(bincigar+bincigar_cursor, nch, tmpv, bincigar_len-bincigar_cursor);
4402 			if(bincigar_sec_len<0){
4403 				bincigar[0]=0;
4404 				return -1;
4405 			}
4406 			bincigar_cursor += bincigar_sec_len;
4407 			tmpv=0;
4408 		}
4409 	}
4410 
4411 	if(bincigar_cursor<bincigar_len) bincigar[bincigar_cursor] = 0;
4412 
4413 	//printf("%s : BL=%d\n", cigar, bincigar_cursor);
4414 
4415 	return bincigar_cursor;
4416 }
4417 
4418 
4419 int write_cigar_part(char *bincigar, char *cigar, int cigar_len , int * bincigar_move)
4420 {
4421 	int binbytes, x1, binopt, charopt;
4422 	unsigned int tmpv = 0;
4423 	char sec_buf[13];
4424 
4425 	binbytes = 7& (bincigar[0] >> 3);
4426 	binopt = 7 & bincigar[0];
4427 
4428 	switch(binopt)
4429 	{
4430 		case CORE_CIGAR_OPT_D:
4431 			charopt='D';
4432 			break;
4433 		case CORE_CIGAR_OPT_I:
4434 			charopt='I';
4435 			break;
4436 		case CORE_CIGAR_OPT_M:
4437 			charopt='M';
4438 			break;
4439 		case CORE_CIGAR_OPT_S:
4440 			charopt='S';
4441 			break;
4442 		case CORE_CIGAR_OPT_B:
4443 			charopt='B';
4444 			break;
4445 		case CORE_CIGAR_OPT_BB:
4446 			charopt='b';
4447 			break;
4448 		case CORE_CIGAR_OPT_N:
4449 			charopt='N';
4450 			break;
4451 		default:
4452 			charopt='n';
4453 			break;
4454 	}
4455 
4456 	tmpv = (bincigar[0]>>6) & 3;
4457 	for(x1 = 1; x1 < binbytes; x1++)
4458 	{
4459 		unsigned int dtmpv = 0xff & bincigar[x1];
4460 		dtmpv <<= (x1*8 - 6);
4461 		tmpv += dtmpv;
4462 	}
4463 
4464 	int added_len = sprintf(sec_buf, "%u%c", tmpv, charopt);
4465 	if(added_len > cigar_len)
4466 		return -1;
4467 	memcpy(cigar, sec_buf, added_len);
4468 	(*bincigar_move) = binbytes;
4469 
4470 	return added_len;
4471 }
4472 
4473 int OLD_bincigar2cigar(char * cigar, int cigar_len, char * bincigar, int bincigar_max_len, int read_len)
4474 {
4475 
4476 	int cigar_cursor = 0, bincigar_cursor = 0;
4477 
4478 	while(1)
4479 	{
4480 		int bincigar_move = 0;
4481 		int cigar_sec_len = write_cigar_part(bincigar + bincigar_cursor, cigar+cigar_cursor, cigar_len-cigar_cursor-1, &bincigar_move);
4482 		if(cigar_sec_len<0){
4483 			sprintf(cigar,"%dM", read_len);
4484 			return -1;
4485 		}
4486 		//printf("NPC=%s\n", cigar);
4487 		cigar_cursor += cigar_sec_len;
4488 		bincigar_cursor += bincigar_move;
4489 		if(bincigar_cursor>=bincigar_max_len) break;
4490 		if(bincigar[bincigar_cursor] == 0) break;
4491 	}
4492 	cigar[cigar_cursor] = 0;
4493 
4494 	return cigar_cursor;
4495 }
4496 
4497 int term_strncpy(char * dst, char * src, int max_dst_mem)
4498 {
4499 	int i;
4500 
4501 	for(i=0; i<max_dst_mem; i++)
4502 	{
4503 		if(!src[i]) break;
4504 		dst[i]=src[i];
4505 		if(i == max_dst_mem-1)
4506 			SUBREADprintf("String out of memory limit: '%s'\n", src);
4507 	}
4508 	if(i >= max_dst_mem) i = max_dst_mem-1;
4509 	dst[i] = 0;
4510 
4511 	return 0;
4512 }
4513 void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, char * res){
4514 	char * ch;
4515 	int off;
4516 
4517 	locate_gene_position(pos, &global_context -> chromosome_table, &  ch, &off);
4518 
4519 	sprintf(res, "%s:%u", ch, off);
4520 }
4521 
4522 int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * perfect_lens, char * read_name)
4523 {
4524 	unsigned int current_perfect_map_start = sel_pos;
4525 	int current_perfect_section_no = 0;
4526 	unsigned int current_perfect_cursor = sel_pos;
4527 	int is_reversed = is_first_section_reversed;
4528 	int is_negative = is_first_section_negative_strand;
4529 	int read_cursor = 0;
4530 	int out_cigar_writer_ptr = 0;
4531 	unsigned int tmpi = 0;
4532 
4533 	short perfect_len = 0;
4534 
4535 	int cigar_cursor;
4536 
4537 	out_poses[0] = current_perfect_map_start - (is_reversed?1:0);
4538 	out_strands[0] = is_negative?'-':'+';
4539 	char main_piece_strand = (is_first_section_negative_strand == is_first_section_reversed)?'+':'-';
4540 
4541 	for(cigar_cursor=0;;cigar_cursor++)
4542 	{
4543 		char ncch = in_cigar[cigar_cursor];
4544 		int is_chimeric_section_end = 0;
4545 
4546 		if(!ncch){
4547 			perfect_lens [current_perfect_section_no] = perfect_len ;
4548 			current_perfect_section_no++;
4549 			break;
4550 		}
4551 
4552 		if(toupper(ncch)=='N'||toupper(ncch)=='B')
4553 		{
4554 
4555 			unsigned int jummped_location;
4556 			int is_chro_jump = 0, is_long_jump = 0;
4557 
4558 			if(is_reversed)
4559 			{
4560 				if(toupper(ncch)=='N')
4561 					jummped_location = current_perfect_map_start - 1 + tmpi;
4562 				else
4563 					jummped_location = current_perfect_map_start - 1 - tmpi;
4564 			}
4565 			else
4566 			{
4567 				if(toupper(ncch)=='N')
4568 					jummped_location = current_perfect_cursor + tmpi;
4569 				else
4570 					jummped_location = current_perfect_cursor - tmpi;
4571 
4572 			}
4573 
4574 			if(ncch == 'N')
4575 			{
4576 				char * curr_chr, * new_chr;
4577 				int curr_offset, new_offset;
4578 				locate_gene_position_max(current_perfect_cursor, &global_context -> chromosome_table, & curr_chr, & curr_offset, NULL, NULL, 1);
4579 				locate_gene_position_max(jummped_location      , &global_context -> chromosome_table, &  new_chr, &  new_offset, NULL, NULL, 1);
4580 				if( curr_chr == NULL || new_chr == NULL ){
4581 					/*
4582 					char outpos[100];
4583 					absoffset_to_posstr(global_context, sel_pos + 1, outpos);
4584 					SUBREADprintf("Wrong CIGAR: mapped to %s, CIGAR=%s\n", outpos , in_cigar);
4585 					*/
4586 					return -1;
4587 				}
4588 				assert(curr_chr);
4589 				assert(new_chr);
4590 				is_chro_jump = (curr_chr != new_chr);
4591 
4592 				long long int dist = (long long int)current_perfect_cursor;
4593 				dist -= (long long int)jummped_location;
4594 				if(abs(dist) >= global_context -> config.maximum_intron_length)
4595 					is_long_jump = 1;
4596 
4597 			//#warning ">>>>>> COMMENT WHEN RELEASE <<<<<<"
4598 			if(0 && FIXLENstrcmp("R000000007", read_name)==0)
4599 				SUBREADprintf("dist=%lld, abs=%lld, %u, %u\n",dist,abs(dist), current_perfect_cursor, jummped_location);
4600 				// A long jump is the jump longer than 2^27.
4601 				// Picard does not like it!!
4602 			}
4603 
4604 			// is_long_jump is true only if the two sections are on different chromosomes.
4605 			//#warning ">>>>>> COMMENT WHEN RELEASE <<<<<<"
4606 			if(0 && FIXLENstrcmp("R000000007", read_name)==0)
4607 				SUBREADprintf("CHR_JMP=%d, NCCH=%c, LONG_JMP=%d\n", is_chro_jump, ncch, is_long_jump);
4608 			if(is_chro_jump || islower(ncch) || ncch == 'B' || is_long_jump)
4609 			{
4610 				current_perfect_cursor = jummped_location;
4611 
4612 				if(islower(ncch)){
4613 					is_reversed = !is_reversed;
4614 					is_negative = !is_negative;
4615 
4616 					if(is_reversed)
4617 						current_perfect_cursor --;
4618 				}
4619 
4620 				current_perfect_map_start = current_perfect_cursor;
4621 				tmpi = 0;
4622 				if(read_cursor<read_len)
4623 					sprintf(out_cigars[current_perfect_section_no] + out_cigar_writer_ptr,"%dS", read_len - read_cursor);
4624 
4625 				perfect_lens [current_perfect_section_no] = perfect_len ;
4626 				perfect_len = 0;
4627 
4628 				current_perfect_section_no++;
4629 				if(current_perfect_section_no>=CIGAR_PERFECT_SECTIONS)break;
4630 
4631 				out_poses[current_perfect_section_no] = current_perfect_map_start - read_cursor;
4632 				out_strands[current_perfect_section_no] = is_negative?'-':'+';
4633 				out_cigar_writer_ptr = sprintf(out_cigars[current_perfect_section_no],"%dS", read_cursor);
4634 				is_chimeric_section_end  = 1;
4635 			}
4636 		}
4637 
4638 		if(!is_chimeric_section_end)
4639 		{
4640 			if(isalpha(ncch))
4641 			{
4642 				out_cigar_writer_ptr+=sprintf(out_cigars[current_perfect_section_no]+out_cigar_writer_ptr, "%u%c", tmpi, ncch);
4643 			}
4644 			if(ncch == 'M'|| ncch == 'S')
4645 			{
4646 				read_cursor += tmpi;
4647 				if(ncch == 'M')
4648 					perfect_len += tmpi;
4649 				if(is_reversed)
4650 					out_poses[current_perfect_section_no] += tmpi;
4651 				else
4652 					current_perfect_cursor += tmpi;
4653 				tmpi = 0;
4654 			}
4655 			else if(ncch == 'D' || ncch == 'N')
4656 			{
4657 				if(is_reversed)
4658 					out_poses[current_perfect_section_no] += tmpi;
4659 				else
4660 					current_perfect_cursor += tmpi;
4661 				tmpi = 0;
4662 			}
4663 			else if(ncch == 'I')
4664 			{
4665 				read_cursor += tmpi;
4666 				tmpi = 0;
4667 			}
4668 			else if(isdigit(ncch))
4669 				tmpi = tmpi*10+(ncch-'0');
4670 		}
4671 	}
4672 
4673 	int xk1 = 0, best_match = -9999;
4674 
4675 	for(xk1=0; xk1<current_perfect_section_no;xk1++)
4676 	{
4677 		if(best_match < 0 || (main_piece_strand == out_strands[xk1] && perfect_lens[xk1]>perfect_lens[best_match]))
4678 			best_match = xk1;
4679 	}
4680 
4681 	if(best_match>0)
4682 	{
4683 		unsigned int tmpv;
4684 		char cigar_sec[100];
4685 		tmpv = out_poses[0];
4686 		out_poses[0]=out_poses[best_match];
4687 		out_poses[best_match] = tmpv;
4688 
4689 		tmpv = out_strands[0];
4690 		out_strands[0] = out_strands[best_match];
4691 		out_strands[best_match] = tmpv;
4692 
4693 		strcpy(cigar_sec, out_cigars[0]);
4694 		strcpy(out_cigars[0], out_cigars[best_match]);
4695 		strcpy(out_cigars[best_match] , cigar_sec);
4696 
4697 		tmpv = perfect_lens[0];
4698 		perfect_lens[0] = perfect_lens[best_match];
4699 		perfect_lens[best_match] = tmpv;
4700 	}
4701 
4702 	return current_perfect_section_no;
4703 }
4704 
4705 void quick_sort_run(void * arr, int spot_low,int spot_high, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r));
4706 
4707 void quick_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r))
4708 {
4709 	quick_sort_run(arr, 0, arr_size-1, compare, exchange);
4710 }
4711 
4712 
4713 void quick_sort_run(void * arr, int spot_low,int spot_high, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r))
4714 {
4715 	// https://en.wikipedia.org/wiki/Quicksort
4716 	// Lomuto partition scheme
4717 
4718 	int pivot,j,i;
4719 
4720 	if(spot_high <= spot_low) return;
4721 	pivot = spot_high;
4722 	i = spot_low;
4723 
4724 	for(j = spot_low+1; j < spot_high; j++)
4725 		if(compare(arr, j, pivot)<=0) {
4726 			exchange(arr,i,j);
4727 			i++;
4728 		}
4729 
4730 	exchange(arr, i, spot_high);
4731 
4732 	quick_sort_run(arr, spot_low, i-1, compare, exchange);
4733 	quick_sort_run(arr, i+1, spot_high, compare, exchange);
4734 
4735 }
4736 void basic_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)){
4737 	int i, j;
4738 	for(i=start; i< start + items - 1; i++)
4739 	{
4740 		int min_j = i;
4741 		for(j=i + 1; j< start + items; j++)
4742 		{
4743 			if(compare(arr, min_j, j) > 0)
4744 				min_j = j;
4745 		}
4746 		if(i!=min_j)
4747 			exchange(arr, i, min_j);
4748 	}
4749 }
4750 
4751 void basic_sort(void * arr, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)){
4752 	basic_sort_run(arr, 0, items, compare, exchange);
4753 }
4754 
4755 
4756 void merge_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2))
4757 {
4758 	if(items > 11)
4759 	{
4760 		int half_point = items/2;
4761 		merge_sort_run(arr, start, half_point, compare, exchange, merge);
4762 		merge_sort_run(arr, start + half_point, items - half_point, compare, exchange, merge);
4763 		merge(arr, start, half_point, items - half_point);
4764 	}
4765 	else
4766 	{
4767 		basic_sort_run(arr, start, items, compare, exchange);
4768 	}
4769 }
4770 void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2))
4771 {
4772 	merge_sort_run(arr, 0, arr_size, compare, exchange, merge);
4773 }
4774 
4775 unsigned int calc_end_pos(unsigned int p, char * cigar, unsigned int * all_skipped_len, int * is_exonic_regions, global_context_t * global_context){
4776 	unsigned int cursor = p, tmpi=0;
4777 	int nch, cigar_cursor;
4778 	for(cigar_cursor = 0; 0!=(nch = cigar[cigar_cursor]); cigar_cursor++){
4779 		if(isdigit(nch)){
4780 			tmpi = tmpi * 10 + (nch - '0');
4781 		}else{
4782 			if((nch == 'S' && cursor == p) || nch == 'M' || nch == 'N' || nch == 'D'){
4783 				if(nch == 'M' && global_context -> exonic_region_bitmap){
4784 					if(global_context -> config.do_breakpoint_detection){
4785 						if(is_pos_in_annotated_exon_regions(global_context, cursor) == 0 || is_pos_in_annotated_exon_regions(global_context, cursor + tmpi - 1) == 0)  ( * is_exonic_regions) = 0;
4786 					} else {
4787 						if(is_pos_in_annotated_exon_regions(global_context, cursor + tmpi/2) == 0) ( * is_exonic_regions) = 0;
4788 					}
4789 				}
4790 
4791 				cursor += tmpi;
4792 				if(nch == 'N' || nch == 'D')(*all_skipped_len) += tmpi;
4793 			}
4794 			tmpi = 0;
4795 		}
4796 	}
4797 	return cursor;
4798 
4799 }
4800 
4801 void test_PE_and_same_chro_cigars(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_exonic_regions, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * cigar1, char * cigar2, char *read_name, int * res_tlen){
4802  	char * r1_chr = NULL, * r2_chr = NULL;
4803 	int r1_pos, r2_pos;
4804 
4805 	(*is_same_chromosome) = 0;
4806 	(*is_PE_distance) = 0;
4807 	(*is_exonic_regions) = 1;
4808 
4809 	locate_gene_position(pos1, &global_context -> chromosome_table, & r1_chr, & r1_pos);
4810 	locate_gene_position(pos2, &global_context -> chromosome_table, & r2_chr, & r2_pos);
4811 
4812 	if(r1_chr == r2_chr){
4813 		unsigned int skip_1 = 0;
4814 		unsigned int skip_2 = 0;
4815 		unsigned int r1_end_pos = calc_end_pos(pos1, cigar1, &skip_1, is_exonic_regions, global_context  );
4816 		unsigned int r2_end_pos = calc_end_pos(pos2, cigar2, &skip_2, is_exonic_regions, global_context  );
4817 
4818 		unsigned int tlen = max(r1_end_pos, r2_end_pos) - min(pos1, pos2);
4819 		if(tlen > skip_1) tlen -= skip_1;
4820 		if(tlen > skip_2) tlen -= skip_2;
4821 
4822 		(*is_same_chromosome) = 1;
4823 
4824 		if(tlen >= global_context -> config.minimum_pair_distance && tlen <= global_context -> config.maximum_pair_distance)
4825 			(* is_PE_distance) = 1;
4826 		*res_tlen = tlen;
4827 	}else *res_tlen = 0x7fffffff;
4828 }
4829 
4830 void test_PE_and_same_chro_align(global_context_t * global_context , realignment_result_t * res1, realignment_result_t * res2, int * is_exonic_regions, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * read_name, int *res_tlen){
4831 	return test_PE_and_same_chro_cigars(global_context, res1 -> first_base_position, res2 -> first_base_position, is_exonic_regions, is_PE_distance, is_same_chromosome , read_len_1 , read_len_2, res1 -> cigar_string, res2 -> cigar_string, read_name, res_tlen);
4832 }
4833 
4834 
4835 
4836 void test_PE_and_same_chro(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2)
4837 {
4838  	char * r1_chr, * r2_chr;
4839 	int r1_pos, r2_pos;
4840 
4841 	(*is_same_chromosome) = 0;
4842 	(*is_PE_distance) = 0;
4843 
4844 	int re1 = locate_gene_position(pos1, &global_context -> chromosome_table, & r1_chr, & r1_pos);
4845 	int re2 = locate_gene_position(pos2, &global_context -> chromosome_table, & r2_chr, & r2_pos);
4846 
4847 	if(re1 ==0 && 0 ==re2) {
4848 		long long tlen = r1_pos;
4849 		tlen  -= r2_pos;
4850 		tlen = abs(tlen);
4851 		tlen += (r1_pos > r2_pos)?read_len_1:read_len_2;
4852 		unsigned int tlenI = (unsigned int) tlen;
4853 
4854 		//SUBREADprintf("TEST PE: %p == %p , TLEN=%u\n", r1_chr, r2_chr, tlenI);
4855 
4856 		if(r1_chr == r2_chr){
4857 			(*is_same_chromosome) = 1;
4858 			if(tlenI >= global_context -> config.minimum_pair_distance && tlenI <= global_context -> config.maximum_pair_distance)
4859 				(* is_PE_distance) = 1;
4860 		}
4861 	}
4862 }
4863 
4864 int FIXLENstrcmp(char * flen, char * rname){
4865 	char * tt=NULL;
4866 	char * fixed_len = malloc(strlen(flen)+1);
4867 	strcpy(fixed_len, flen);
4868 	char * s1 = strtok_r(fixed_len, "\n", &tt);
4869 
4870 	int found = 0;
4871 	while(1){
4872 		int this_ok = 1,x=0;
4873 		for(; s1[x]; x++){
4874 			if(rname[x]!=s1[x]) {
4875 				this_ok = 0;
4876 				break;
4877 			}
4878 		}
4879 		if(this_ok){
4880 			found = 1;
4881 			break;
4882 		}
4883 		s1 = strtok_r(NULL, "\n", &tt);
4884 		if(!s1)break;
4885 	}
4886 	free(fixed_len);
4887 	return !found;
4888 }
4889