1 static char rcsid[] = "$Id: samprint.c 222839 2020-06-04 21:49:59Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "samprint.h"
7 #include "samflags.h"
8 #include <stdlib.h>
9 #include <ctype.h>
10 #include <string.h>
11
12 #include "mem.h"
13 #include "complement.h"
14 #include "mapq.h"
15 #include "assert.h"
16 #include "cigar.h"
17 #include "transcript.h"
18 #include "method.h"
19 #include "simplepair.h"
20
21
22 #define SANGER_ILLUMINA_DIFF 31
23 /* #define PRINT_ALTS_COORDS 1 */
24
25 /* BAM appears to truncate the H information on the ends of a cigar */
26 /* Also, this provides the information needed for getting term information */
27
28
29 #ifdef DEBUG
30 #define debug(x) x
31 #else
32 #define debug(x)
33 #endif
34
35
36 /* overlap */
37 #ifdef DEBUG3
38 #define debug3(x) x
39 #else
40 #define debug3(x)
41 #endif
42
43
44
45 static bool add_paired_nomappers_p;
46 static bool paired_flag_means_concordant_p;
47 static bool quiet_if_excessive_p;
48 static int maxpaths_report;
49 static char *failedinput_root;
50 static bool fastq_format_p;
51 static bool hide_soft_clips_p;
52 static bool method_print_p;
53
54 static bool clip_overlap_p;
55 static bool merge_overlap_p;
56 static bool merge_samechr_p;
57
58 static bool sam_multiple_primaries_p;
59 static bool sam_sparse_secondaries_p;
60
61 static bool force_xs_direction_p;
62 static IIT_T snps_iit;
63
64 static bool omit_concordant_uniq_p = false;
65 static bool omit_concordant_mult_p = false;
66
67 static bool find_dna_chimeras_p;
68 static bool splicingp;
69 static int donor_typeint;
70 static int acceptor_typeint;
71
72 static Univ_IIT_T chromosome_iit;
73 static Univ_IIT_T transcript_iit;
74
75 static char complCode[128] = COMPLEMENT_LC;
76
77
78 void
SAM_setup(bool add_paired_nomappers_p_in,bool paired_flag_means_concordant_p_in,bool omit_concordant_uniq_p_in,bool omit_concordant_mult_p_in,bool quiet_if_excessive_p_in,int maxpaths_report_in,char * failedinput_root_in,bool fastq_format_p_in,bool hide_soft_clips_p_in,bool method_print_p_in,bool clip_overlap_p_in,bool merge_overlap_p_in,bool merge_samechr_p_in,bool sam_multiple_primaries_p_in,bool sam_sparse_secondaries_p_in,bool force_xs_direction_p_in,IIT_T snps_iit_in,bool find_dna_chimeras_p_in,bool splicingp_in,int donor_typeint_in,int acceptor_typeint_in,Univ_IIT_T chromosome_iit_in,Univ_IIT_T transcript_iit_in)79 SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_in,
80 bool omit_concordant_uniq_p_in, bool omit_concordant_mult_p_in,
81 bool quiet_if_excessive_p_in, int maxpaths_report_in,
82 char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in, bool method_print_p_in,
83 bool clip_overlap_p_in, bool merge_overlap_p_in, bool merge_samechr_p_in,
84 bool sam_multiple_primaries_p_in, bool sam_sparse_secondaries_p_in,
85 bool force_xs_direction_p_in, IIT_T snps_iit_in,
86 bool find_dna_chimeras_p_in, bool splicingp_in, int donor_typeint_in, int acceptor_typeint_in,
87 Univ_IIT_T chromosome_iit_in, Univ_IIT_T transcript_iit_in) {
88 add_paired_nomappers_p = add_paired_nomappers_p_in;
89 paired_flag_means_concordant_p = paired_flag_means_concordant_p_in;
90
91 omit_concordant_uniq_p = omit_concordant_uniq_p_in;
92 omit_concordant_mult_p = omit_concordant_mult_p_in;
93
94 quiet_if_excessive_p = quiet_if_excessive_p_in;
95 failedinput_root = failedinput_root_in;
96 fastq_format_p = fastq_format_p_in;
97 hide_soft_clips_p = hide_soft_clips_p_in;
98 method_print_p = method_print_p_in;
99
100 clip_overlap_p = clip_overlap_p_in;
101 merge_overlap_p = merge_overlap_p_in;
102 merge_samechr_p = merge_samechr_p_in;
103 maxpaths_report = maxpaths_report_in;
104 sam_multiple_primaries_p = sam_multiple_primaries_p_in;
105 sam_sparse_secondaries_p = sam_sparse_secondaries_p_in;
106
107 force_xs_direction_p = force_xs_direction_p_in;
108 snps_iit = snps_iit_in;
109
110 find_dna_chimeras_p = find_dna_chimeras_p_in;
111 splicingp = splicingp_in;
112 donor_typeint = donor_typeint_in;
113 acceptor_typeint = acceptor_typeint_in;
114
115 chromosome_iit = chromosome_iit_in;
116 transcript_iit = transcript_iit_in;
117
118 return;
119 }
120
121
122 static unsigned int
compute_flag(bool plusp,Stage3end_T mate,bool mate_plusp,Resulttype_T resulttype,bool first_read_p,int pathnum,int npaths,bool artificial_mate_p,int npaths_mate,int absmq_score,int first_absmq,bool invertp,bool invert_mate_p,bool supplementaryp)123 compute_flag (bool plusp, Stage3end_T mate, bool mate_plusp, Resulttype_T resulttype,
124 bool first_read_p, int pathnum, int npaths, bool artificial_mate_p, int npaths_mate,
125 int absmq_score, int first_absmq, bool invertp, bool invert_mate_p,
126 bool supplementaryp) {
127 unsigned int flag = 0U;
128
129
130 debug(printf("compute_flag: resulttype %s, mate %p, mate_plusp %d\n",
131 Resulttype_string(resulttype),mate_plusp));
132
133 if (npaths == 0) {
134 debug(printf("npaths = 0, so QUERY_UNMAPPED %d\n",QUERY_UNMAPPED));
135 flag |= QUERY_UNMAPPED;
136 } else if (plusp == invertp) {
137 debug(printf("plusp %d and invertp %d, so QUERY_MINUSP %d\n",
138 plusp,invertp,QUERY_MINUSP));
139 flag |= QUERY_MINUSP;
140 }
141
142 if (resulttype == SINGLEEND_NOMAPPING || resulttype == SINGLEEND_UNIQ || resulttype == SINGLEEND_TRANSLOC || resulttype == SINGLEEND_MULT) {
143 /* No first or second read or mate */
144 } else {
145 debug(printf("PAIRED_READ %d\n",PAIRED_READ));
146 flag |= PAIRED_READ;
147 if (first_read_p == true) {
148 debug(printf("FIRST_READ %d\n",FIRST_READ_P));
149 flag |= FIRST_READ_P;
150 } else {
151 debug(printf("SECOND_READ %d\n",SECOND_READ_P));
152 flag |= SECOND_READ_P;
153 }
154 if (artificial_mate_p == true) {
155 debug(printf("MATE_UNMAPPED %d\n",MATE_UNMAPPED));
156 flag |= MATE_UNMAPPED;
157
158 } else if (npaths_mate == 0) {
159 debug(printf("MATE_UNMAPPED %d\n",MATE_UNMAPPED));
160 flag |= MATE_UNMAPPED;
161
162 } else if (quiet_if_excessive_p && npaths_mate > maxpaths_report) {
163 debug(printf("MATE_UNMAPPED %d\n",MATE_UNMAPPED));
164 flag |= MATE_UNMAPPED;
165
166 } else if (mate == NULL) {
167 /* Unpaired; no mate. Not clear if should be MATE_UNMAPPED. */
168 /* Picard says MATE_UNMAPPED flag should not be set for unpaired reads */
169
170 } else {
171 if (mate_plusp == invert_mate_p) {
172 debug(printf("MATE_MINUSP %d\n",MATE_MINUSP));
173 flag |= MATE_MINUSP;
174 }
175
176 if (npaths == 0) {
177 /* Need to check npaths == 0 in case clipping of overlaps results in a nomapping */
178
179 } else if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
180 /* Can distinguish concordant mappings by presence of insert length */
181 debug(printf("PAIRED_MAPPING %d\n",PAIRED_MAPPING));
182 flag |= PAIRED_MAPPING;
183
184 } else if (resulttype == PAIRED_UNIQ || resulttype == PAIRED_MULT) {
185 /* Note: We are counting PAIRED_UNIQ and PAIRED_MULT as "paired" mappings.
186 However, we are no longer counting UNPAIRED_UNIQ as a "paired" mapping. */
187 if (paired_flag_means_concordant_p == true) {
188 /* Don't turn on paired flag */
189 } else {
190 debug(printf("PAIRED_MAPPING %d\n",PAIRED_MAPPING));
191 flag |= PAIRED_MAPPING;
192 }
193
194 } else {
195 /* Not paired */
196 }
197 }
198 }
199
200 if (pathnum > 1) {
201 if (sam_multiple_primaries_p == false) {
202 debug(printf("NOT_PRIMARY %d\n",NOT_PRIMARY));
203 flag |= NOT_PRIMARY;
204 } else if (absmq_score != first_absmq) {
205 debug(printf("NOT_PRIMARY %d\n",NOT_PRIMARY));
206 flag |= NOT_PRIMARY;
207 } else {
208 /* Just as good as first alignment, so don't mark as altloc */
209 }
210 }
211
212 if (supplementaryp == true) {
213 flag |= SUPPLEMENTARY;
214 }
215
216 debug(printf("Returning flag %d\n",flag));
217 return flag;
218 }
219
220
221 #if 0
222 /* Previously called by output.c */
223 /* Use Cigar_compute instead */
224 /* Returns chrpos_low */
225 Chrpos_T
226 SAM_compute_chrpos (Chrnum_T *chrnum, int hardclip_low, int hardclip_high,
227 Stage3end_T this, int querylength, bool first_read_p) {
228 Substring_T low_substring, high_substring, substring;
229 Hittype_T hittype;
230
231 if (this == NULL) {
232 *chrnum = 0;
233 return 0U;
234
235 } else if ((hittype = Stage3end_hittype(this)) == TRANSLOC_SPLICE ||
236 (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
237
238 /* Want concordant substring for both chrpos_low and chrpos_high */
239 substring = Stage3end_substring_for_concordance(this);
240 *chrnum = Substring_chrnum(substring);
241 return Substring_compute_chrpos(substring,hardclip_low,/*hardclip_high,*/hide_soft_clips_p);
242
243 } else if ((low_substring = Stage3end_substring_low(this,hardclip_low)) != NULL) {
244 *chrnum = Substring_chrnum(low_substring);
245 return Substring_compute_chrpos(low_substring,hardclip_low,/*hardclip_high,*/hide_soft_clips_p);
246
247 } else if ((high_substring = Stage3end_substring_high(this,hardclip_high)) != NULL) {
248 *chrnum = Substring_chrnum(high_substring);
249 return Substring_compute_chrpos(high_substring,hardclip_low,/*hardclip_high,*/hide_soft_clips_p);
250
251 } else {
252 /* Both hardclips are invalid */
253 fprintf(stderr,"Both hardclips %d and %d are invalid\n",hardclip_low,hardclip_high);
254 abort();
255 }
256 }
257 #endif
258
259 #if 0
260 static void
261 print_chromosomal_pos (Filestring_T fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos_T chrlength,
262 Univ_IIT_T chromosome_iit) {
263 bool allocp;
264 char *chr;
265
266 #if 0
267 if (chrpos == 0U) {
268 /* No mapping */
269 FPRINTF(fp,"\t*\t0");
270 return;
271 }
272 #endif
273
274 if (chrnum == 0) {
275 /* Interchromosomal splice */
276 fprintf(stderr,"Trying to print interchrosomal splice in one line\n");
277 abort();
278
279 } else {
280 chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
281
282 /* chrpos already in 1-based coordinates */
283 if (chrpos > chrlength) {
284 FPRINTF(fp,"\t%s\t%u",chr,chrpos - chrlength /*+1U*/);
285 } else {
286 FPRINTF(fp,"\t%s\t%u",chr,chrpos /*+1U*/);
287 }
288
289 if (allocp == true) {
290 FREE(chr);
291 }
292
293 return;
294 }
295 }
296 #endif
297
298 #if 0
299 /* first_read_p here is that of the printed end, not the mate */
300 static void
301 print_mate_chromosomal_pos (Filestring_T fp, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
302 Chrpos_T mate_chrlength, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
303 Univ_IIT_T chromosome_iit) {
304 bool allocp;
305 char *chr;
306
307 if (mate_chrpos_low == 0U) {
308 FPRINTF(fp,"\t*\t0");
309 return;
310
311 } else if (mate_chrnum == 0) {
312 /* Abort because chrpos should have been 0 */
313 abort();
314
315 } else if (anchor_chrpos > 0U && anchor_chrnum > 0 && mate_chrnum == anchor_chrnum) {
316 /* chrpos already in 1-based coordinates */
317 if (mate_chrpos_low > mate_chrlength) {
318 FPRINTF(fp,"\t=\t%u",mate_chrpos_low - mate_chrlength /*+1U*/);
319 } else {
320 FPRINTF(fp,"\t=\t%u",mate_chrpos_low /*+1U*/);
321 }
322
323 return;
324
325 } else {
326 chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
327
328 /* chrpos already in 1-based coordinates */
329 if (mate_chrpos_low > mate_chrlength) {
330 FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low - mate_chrlength /*+1U*/);
331 } else {
332 FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low /*+1U*/);
333 }
334
335 if (allocp == true) {
336 FREE(chr);
337 }
338
339 return;
340 }
341 }
342 #endif
343
344
345
346
347
348 /* npaths could be non-zero, if user selected --quiet-if-excessive */
349 void
SAM_print_nomapping(Filestring_T fp,char * abbrev,Shortread_T queryseq,char * acc1,char * acc2,Univ_IIT_T chromosome_iit,Resulttype_T resulttype,bool first_read_p,int pathnum,int npaths_primary,int npaths_altloc,bool artificial_mate_p,int npaths_mate,Stage3end_T mate,int mate_querylength,int mate_hardclip_low,int mate_hardclip_high,int quality_shift,char * sam_read_group_id,bool invertp,bool invert_mate_p)350 SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, char *acc1, char *acc2,
351 Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
352 int pathnum, int npaths_primary, int npaths_altloc, bool artificial_mate_p, int npaths_mate,
353
354 Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high,
355
356 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
357 unsigned int flag;
358
359 int sequence_hardclip_low, sequence_hardclip_high;
360 List_T prevp, nextp, finalp;
361
362 bool mate_first_read_p, mate_plusp;
363 Chrnum_T mate_chrnum;
364 Chrpos_T mate_chrpos_low;
365 Filestring_T mate_cigar_fp, mate_md_fp;
366 int mate_nmismatches_refdiff, mate_nmismatches_bothdiff;
367 int nindels;
368
369 char *chr;
370 bool allocp;
371
372
373 mate_first_read_p = (first_read_p == true ? false : true);
374 debug(printf("Calling Cigar_compute_main on mate %p with hardclips %d and %d\n",
375 mate,mate_hardclip_low,mate_hardclip_high));
376 Cigar_compute_main(&mate_cigar_fp,&mate_md_fp,
377 &mate_nmismatches_refdiff,&mate_nmismatches_bothdiff,
378 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
379 &nindels,&mate_plusp,&mate_chrnum,&mate_chrpos_low,
380 mate,mate_querylength,mate_first_read_p,/*mate*/NULL,
381 mate_hardclip_low,mate_hardclip_high,hide_soft_clips_p);
382
383 /* 1. QNAME */
384 if (acc2 == NULL) {
385 FPRINTF(fp,"%s",acc1);
386 } else {
387 FPRINTF(fp,"%s,%s",acc1,acc2);
388 }
389
390 /* 2. FLAG */
391 /* 3. RNAME: chr */
392 /* 4. POS: chrpos */
393 /* 5. MAPQ: Mapping quality. Picard says MAPQ should be 0 for an unmapped read */
394 /* 6. CIGAR */
395 flag = compute_flag(/*plusp (NA)*/true,mate,mate_plusp,resulttype,first_read_p,
396 /*pathnum*/0,/*npaths*/0,artificial_mate_p,npaths_mate,
397 /*absmq_score*/0,/*first_absmq*/0,invertp,invert_mate_p,
398 /*supplementaryp*/false);
399 FPRINTF(fp,"\t%u\t*\t0\t0\t*",flag);
400
401 /* 7. MRNM: Mate chr */
402 /* 8. MPOS: Mate chrpos */
403 if (mate_chrpos_low == 0U) {
404 FPRINTF(fp,"\t*\t0");
405 #if 0
406 } else if (mate_chrnum == chrnum) {
407 FPRINTF(fp,"\t=\t%u",mate_chrpos_low /*+1U*/);
408 #endif
409 } else {
410 chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
411 FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low /*+1U*/);
412 if (allocp == true) {
413 FREE(chr);
414 }
415 }
416
417
418 /* 9. ISIZE: Insert size */
419 FPRINTF(fp,"\t0");
420
421 /* 10. SEQ: queryseq and 11. QUAL: quality scores */
422 /* Since there is no mapping, we print the original query sequence. */
423 if (sam_sparse_secondaries_p == true && (flag & NOT_PRIMARY) != 0) {
424 /* SAM format specification says that secondary mappings should not print SEQ or QUAL to reduce file size */
425 FPRINTF(fp,"\t*\t*");
426
427 } else if (invertp == false) {
428 Shortread_print_chopped_sam(fp,queryseq,/*sequence_hardclip_low*/0,/*sequence_hardclip_high*/0);
429 Shortread_print_quality(fp,queryseq,/*sequence_hardclip_low*/0,/*sequence_hardclip_high*/0,
430 quality_shift,/*show_chopped_p*/false);
431 } else {
432 Shortread_print_chopped_revcomp_sam(fp,queryseq,/*sequence_hardclip_low*/0,/*sequence_hardclip_high*/0);
433 Shortread_print_quality_revcomp(fp,queryseq,/*sequence_hardclip_low*/0,/*sequence_hardclip_high*/0,
434 quality_shift,/*show_chopped_p*/false);
435 }
436
437 /* 12. TAGS: XM */
438 if (mate == NULL || (flag & MATE_UNMAPPED) != 0) {
439 /* Previously checked if queryseq_mate == NULL */
440 /* Unpaired alignment. Don't print XM. */
441 } else {
442 FPRINTF(fp,"\tXM:Z:");
443 Filestring_merge(fp,mate_cigar_fp);
444 }
445
446 /* 12. TAGS: RG */
447 if (sam_read_group_id != NULL) {
448 FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
449 }
450
451 /* 12. TAGS: NH */
452 if (npaths_primary + npaths_altloc > 0) {
453 FPRINTF(fp,"\tNH:i:%d",npaths_primary + npaths_altloc);
454 if (add_paired_nomappers_p == true) {
455 FPRINTF(fp,"\tHI:i:%d",pathnum);
456 }
457 }
458
459 /* 12. TAGS: XB */
460 Shortread_print_barcode(fp,queryseq);
461
462 /* 12. TAGS: XP */
463 Shortread_print_chop(fp,queryseq,invertp);
464
465 /* 12. TAGS: XO */
466 FPRINTF(fp,"\tXO:Z:%s",abbrev);
467
468 FPRINTF(fp,"\n");
469
470 Filestring_free(&mate_md_fp);
471 Filestring_free(&mate_cigar_fp);
472
473 return;
474 }
475
476
477 static void
print_substrings(Filestring_T fp,char * abbrev,Stage3pair_T stage3pair,Stage3end_T stage3end,int querylength,char * acc1,char * acc2,int pathnum,int npaths_primary,int npaths_altloc,int absmq_score,int first_absmq,int second_absmq,int mapq_score,Shortread_T queryseq,int pairedlength,int pair_relationship,int hardclip_low,int hardclip_high,Stage3end_T mate,int mate_querylength,int mate_hardclip_low,int mate_hardclip_high,Resulttype_T resulttype,bool first_read_p,bool artificial_mate_p,int npaths_mate,int quality_shift,char * sam_read_group_id,bool invertp,bool invert_mate_p,bool circularp,bool supplementaryp)478 print_substrings (Filestring_T fp, char *abbrev, Stage3pair_T stage3pair, Stage3end_T stage3end,
479 int querylength,
480
481 char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
482 int absmq_score, int first_absmq, int second_absmq, int mapq_score,
483 Shortread_T queryseq, int pairedlength, int pair_relationship,
484
485 int hardclip_low, int hardclip_high,
486
487 Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high,
488 Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
489 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
490 bool circularp, bool supplementaryp) {
491 unsigned int flag = 0U;
492 bool mate_first_read_p, mate_plusp;
493 Substring_T substringL, substringH, substringM;
494 int nindels;
495
496 int sequence_hardclip_low, sequence_hardclip_high;
497 List_T prevp, nextp, finalp;
498
499 Chrnum_T chrnum, mate_chrnum;
500 Chrpos_T chrpos_low, mate_chrpos_low;
501 Filestring_T cigar_fp, mate_cigar_fp, md_fp, mate_md_fp;
502
503 char *chr;
504 bool allocp;
505
506 int nmismatches_refdiff, nmismatches_bothdiff, mate_nmismatches_refdiff, mate_nmismatches_bothdiff;
507 int sensedir;
508 bool plusp;
509 bool ambigL, ambigH;
510 int n, i;
511 Univcoord_T *alts_coords, splicecoord;
512 #ifdef PRINT_ALTS_COORDS
513 Univcoord_T chroffset;
514 #endif
515
516
517 /* Compute on mate first, so we don't use its nindels and MD pointers */
518 debug(printf("Calling Cigar_compute_main on mate %p with hardclips %d and %d\n",
519 mate,mate_hardclip_low,mate_hardclip_high));
520 mate_first_read_p = (first_read_p == true ? false : true);
521 Cigar_compute_main(&mate_cigar_fp,&mate_md_fp,
522 &mate_nmismatches_refdiff,&mate_nmismatches_bothdiff,
523 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
524 &nindels,&mate_plusp,&mate_chrnum,&mate_chrpos_low,
525 mate,mate_querylength,mate_first_read_p,/*mate*/stage3end,
526 mate_hardclip_low,mate_hardclip_high,hide_soft_clips_p);
527
528 if (supplementaryp == true) {
529 debug(printf("Calling Cigar_compute_supplemental on self with hardclips %d and %d\n",
530 hardclip_low,hardclip_high));
531 Cigar_compute_supplemental(&cigar_fp,&md_fp,
532 &nmismatches_refdiff,&nmismatches_bothdiff,
533 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
534 &nindels,&plusp,&chrnum,&chrpos_low,
535 stage3end,querylength,first_read_p,mate,
536 hardclip_low,hardclip_high,hide_soft_clips_p);
537 } else {
538 debug(printf("Calling Cigar_compute_main on self with hardclips %d and %d\n",
539 hardclip_low,hardclip_high));
540 Cigar_compute_main(&cigar_fp,&md_fp,
541 &nmismatches_refdiff,&nmismatches_bothdiff,
542 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
543 &nindels,&plusp,&chrnum,&chrpos_low,
544 stage3end,querylength,first_read_p,mate,
545 hardclip_low,hardclip_high,hide_soft_clips_p);
546 }
547
548
549 if (chrnum == 0) {
550 /* Unexpected: All substrings were hard-clipped */
551 SAM_print_nomapping(fp,abbrev,queryseq,acc1,acc2,chromosome_iit,resulttype,first_read_p,
552 pathnum,npaths_primary,npaths_altloc,artificial_mate_p,npaths_mate,
553 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
554 quality_shift,sam_read_group_id,invertp,invert_mate_p);
555 return;
556 }
557
558
559 #if 1
560 if ((sensedir = Stage3end_sensedir(stage3end)) == SENSE_NULL && mate != NULL) {
561 sensedir = Stage3end_sensedir(mate);
562 }
563 #else
564 /* If we use this, we need to change code in pair.c also */
565 sensedir = Stage3end_sensedir(stage3end);
566 #endif
567 /* sensep = (sensedir == SENSE_ANTI) ? false : true; */
568
569
570 /* 1. QNAME */
571 if (acc2 == NULL) {
572 FPRINTF(fp,"%s",acc1);
573 } else {
574 FPRINTF(fp,"%s,%s",acc1,acc2);
575 }
576
577 /* 2. FLAG */
578 flag = compute_flag(plusp,mate,mate_plusp,resulttype,first_read_p,
579 pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
580 absmq_score,first_absmq,invertp,invert_mate_p,supplementaryp);
581 FPRINTF(fp,"\t%u",flag);
582
583 /* 3. RNAME: chr */
584 /* 4. POS: chrpos */
585 chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
586 FPRINTF(fp,"\t%s\t%u",chr,chrpos_low /*+1U*/);
587 if (allocp == true) {
588 FREE(chr);
589 }
590
591 /* 5. MAPQ: Mapping quality */
592 FPRINTF(fp,"\t%d\t",mapq_score);
593
594 /* 6. CIGAR */
595 Filestring_merge(fp,cigar_fp);
596
597 /* 7. MRNM: Mate chr */
598 /* 8. MPOS: Mate chrpos */
599 if (mate_chrpos_low == 0U) {
600 FPRINTF(fp,"\t*\t0");
601 } else if (mate_chrnum == chrnum) {
602 FPRINTF(fp,"\t=\t%u",mate_chrpos_low /*+1U*/);
603 } else {
604 chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
605 FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low /*+1U*/);
606 if (allocp == true) {
607 FREE(chr);
608 }
609 }
610
611
612 /* 9. ISIZE: Insert size */
613 if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
614 if (pair_relationship > 0) {
615 if (first_read_p == true) {
616 FPRINTF(fp,"\t%d",pairedlength);
617 } else {
618 FPRINTF(fp,"\t%d",-pairedlength);
619 }
620
621 } else if (pair_relationship < 0) {
622 if (first_read_p == true) {
623 FPRINTF(fp,"\t%d",-pairedlength);
624 } else {
625 FPRINTF(fp,"\t%d",pairedlength);
626 }
627
628 } else if (plusp == invertp) {
629 FPRINTF(fp,"\t%d",-pairedlength);
630 } else {
631 FPRINTF(fp,"\t%d",pairedlength);
632 }
633
634 } else if (mate_chrpos_low == 0) {
635 FPRINTF(fp,"\t%d",pairedlength);
636 } else if (chrpos_low < mate_chrpos_low) {
637 FPRINTF(fp,"\t%d",pairedlength);
638 } else if (chrpos_low > mate_chrpos_low) {
639 FPRINTF(fp,"\t%d",-pairedlength);
640 } else if (first_read_p == true) {
641 FPRINTF(fp,"\t%d",pairedlength);
642 } else {
643 FPRINTF(fp,"\t%d",-pairedlength);
644 }
645
646
647 /* 10. SEQ: queryseq and 11. QUAL: quality scores */
648 /* Queryseq has already been inverted, so just measure plusp relative to its current state */
649 if (sam_sparse_secondaries_p == true && (flag & NOT_PRIMARY) != 0) {
650 /* SAM format specification says that secondary mappings should not print SEQ or QUAL to reduce file size */
651 FPRINTF(fp,"\t*\t*");
652
653 } else if (plusp == true) {
654 Shortread_print_chopped_sam(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
655 Shortread_print_quality(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high,
656 quality_shift,/*show_chopped_p*/false);
657 } else {
658 Shortread_print_chopped_revcomp_sam(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
659 Shortread_print_quality_revcomp(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high,
660 quality_shift,/*show_chopped_p*/false);
661 }
662
663
664 /* 12. TAGS: XM */
665 if (mate == NULL || (flag & MATE_UNMAPPED) != 0) {
666 /* Previously checked if queryseq_mate == NULL */
667 /* Unpaired alignment. Don't print XM. */
668 } else {
669 FPRINTF(fp,"\tXM:Z:");
670 Filestring_merge(fp,mate_cigar_fp);
671 }
672
673 /* 12. TAGS: RG */
674 if (sam_read_group_id != NULL) {
675 FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
676 }
677
678 /* 12. TAGS: XH and XI */
679 if (sequence_hardclip_low > 0 || sequence_hardclip_high > 0) {
680 FPRINTF(fp,"\tXH:Z:");
681 if (plusp == true) {
682 Shortread_print_chopped_end(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
683 } else {
684 Shortread_print_chopped_end_revcomp(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
685 }
686
687 if (Shortread_quality_string(queryseq) != NULL) {
688 FPRINTF(fp,"\tXI:Z:");
689 if (plusp == true) {
690 Shortread_print_chopped_end_quality(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
691 } else {
692 Shortread_print_chopped_end_quality_reverse(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
693 }
694 }
695 }
696
697 /* 12. TAGS: XB */
698 Shortread_print_barcode(fp,queryseq);
699
700 /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
701 Shortread_print_chop(fp,queryseq,invertp);
702
703 /* 12. TAGS: MD */
704 FPRINTF(fp,"\tMD:Z:");
705 Filestring_merge(fp,md_fp);
706
707 /* 12. TAGS: NH */
708 /* 12. TAGS: HI */
709 /* 12. TAGS: NM */
710 FPRINTF(fp,"\tNH:i:%d\tHI:i:%d\tNM:i:%d",npaths_primary + npaths_altloc,pathnum,nmismatches_refdiff + nindels);
711
712 if (snps_iit) {
713 /* 12. TAGS: XW and XV */
714 FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
715 FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
716 }
717
718 /* 12. TAGS: SM */
719 /* 12. TAGS: XQ */
720 /* 12. TAGS: X2 */
721 FPRINTF(fp,"\tSM:i:%d\tXQ:i:%d\tX2:i:%d",mapq_score,absmq_score,second_absmq);
722
723 /* 12. TAGS: XO */
724 FPRINTF(fp,"\tXO:Z:%s",abbrev);
725
726 /* 12. TAGS: XS */
727 if (transcript_iit == NULL && splicingp == false) {
728 /* Do not print XS field */
729
730 } else if (sensedir == SENSE_FORWARD) {
731 if (plusp == true) {
732 FPRINTF(fp,"\tXS:A:+");
733 } else {
734 FPRINTF(fp,"\tXS:A:-");
735 }
736
737 } else if (sensedir == SENSE_ANTI) {
738 if (plusp == true) {
739 FPRINTF(fp,"\tXS:A:-");
740 } else {
741 FPRINTF(fp,"\tXS:A:+");
742 }
743 #if 0
744 /* Don't print XS field for SENSE_NULL */
745
746 } else if (force_xs_direction_p == true) {
747 FPRINTF(fp,"\tXS:A:+");
748
749 } else {
750 FPRINTF(fp,"\tXS:A:?");
751 #endif
752 }
753
754
755 /* 12. TAGS: XA */
756 if (prevp == NULL) {
757 /* substringL = (Substring_T) NULL; */
758 ambigL = false;
759 } else {
760 substringL = (Substring_T) List_head(prevp);
761 ambigL = Substring_has_alts_p(substringL);
762 }
763 if (nextp == NULL) {
764 ambigH = false;
765 } else {
766 substringH = (Substring_T) List_head(nextp);
767 ambigH = Substring_has_alts_p(substringH);
768 }
769
770 if (ambigL == true || ambigH == true) {
771 FPRINTF(fp,"\tXA:Z:");
772
773 if (ambigL == true) {
774 alts_coords = Substring_alts_coords(substringL);
775 n = Substring_alts_ncoords(substringL);
776 #ifdef PRINT_ALTS_COORDS
777 chroffset = Substring_chroffset(substringL);
778 FPRINTF(fp,"%u",alts_coords[0] - chroffset + 1U);
779 for (i = 1; i < n; i++) {
780 FPRINTF(fp,",%u",alts_coords[i] - chroffset + 1U);
781 }
782 #else
783 substringM = (Substring_T) List_head(List_next(prevp));
784 if (plusp == true) {
785 splicecoord = Substring_alignstart_trim(substringM);
786 } else {
787 splicecoord = Substring_alignend_trim(substringM);
788 }
789 FPRINTF(fp,"%u",splicecoord - alts_coords[0]);
790 for (i = 1; i < n; i++) {
791 FPRINTF(fp,",%u",splicecoord - alts_coords[i]);
792 }
793 #endif
794 }
795 FPRINTF(fp,"|");
796 if (ambigH == true) {
797 alts_coords = Substring_alts_coords(substringH);
798 n = Substring_alts_ncoords(substringH);
799 #ifdef PRINT_ALTS_COORDS
800 chroffset = Substring_chroffset(substringH);
801 FPRINTF(fp,"%u",alts_coords[0] - chroffset + 1U);
802 for (i = 1; i < n; i++) {
803 FPRINTF(fp,",%u",alts_coords[i] - chroffset + 1U);
804 }
805 #else
806 substringM = (Substring_T) List_head(finalp);
807 if (plusp == true) {
808 splicecoord = Substring_alignend_trim(substringM);
809 } else {
810 splicecoord = Substring_alignstart_trim(substringM);
811 }
812 FPRINTF(fp,"%u",alts_coords[0] - splicecoord);
813 for (i = 1; i < n; i++) {
814 FPRINTF(fp,",%u",alts_coords[i] - splicecoord);
815 }
816 #endif
817 }
818 }
819
820
821 /* 12. TAGS: XX, XY */
822 if (first_read_p == true) {
823 if (stage3pair != NULL && Transcript_concordant_p(Stage3end_transcripts(stage3end),Stage3end_transcripts(mate)) == true) {
824 Transcript_concordance_print(fp,Stage3end_transcripts(stage3end),Stage3end_transcripts(mate),
825 transcript_iit,/*concordantp*/true,/*first_read_p*/true,invertp,/*header*/"\tXX:Z:");
826 Transcript_concordance_print(fp,Stage3end_transcripts(stage3end),Stage3end_transcripts(mate),
827 transcript_iit,/*concordantp*/false,/*first_read_p*/true,invertp,/*header*/"\tXY:Z:");
828 } else {
829 Transcript_singleend_print(fp,Stage3end_transcripts(stage3end),transcript_iit,invertp,
830 /*header*/"\tXX:Z:");
831 }
832
833 } else {
834 if (stage3pair != NULL && Transcript_concordant_p(Stage3end_transcripts(mate),Stage3end_transcripts(stage3end)) == true) {
835 Transcript_concordance_print(fp,Stage3end_transcripts(mate),Stage3end_transcripts(stage3end),
836 transcript_iit,/*concordantp*/true,/*first_read_p*/false,invertp,/*header*/"\tXX:Z:");
837 Transcript_concordance_print(fp,Stage3end_transcripts(mate),Stage3end_transcripts(stage3end),
838 transcript_iit,/*concordantp*/false,/*first_read_p*/false,invertp,/*header*/"\tXY:Z:");
839 } else {
840 Transcript_singleend_print(fp,Stage3end_transcripts(stage3end),transcript_iit,invertp,
841 /*header*/"\tXX:Z:");
842 }
843 }
844
845 #if 0
846 /* 12. TAGS: XZ (only for splices) */
847 if (Stage3end_transcripts_other(stage3end) != NULL) {
848 Transcript_singleend_print(fp,Stage3end_transcripts_other(stage3end),transcript_iit,invertp,
849 /*header*/"\tXZ:Z");
850 }
851 #endif
852
853 /* 12. TAGS: XC */
854 if (circularp == true) {
855 FPRINTF(fp,"\tXC:A:+");
856 }
857
858 /* 12. TAGS: XG */
859 if (method_print_p == true) {
860 Method_samprint(fp,Stage3end_method(stage3end));
861 }
862
863 #if 0
864 /* 12. TAGS: XE (BLAST E-value) */
865 FPRINTF(fp,"\tXE:f:%.2g",Stage3end_min_evalue(stage3end));
866 #endif
867
868
869 FPRINTF(fp,"\n");
870
871 Filestring_free(&mate_md_fp);
872 Filestring_free(&md_fp);
873 Filestring_free(&mate_cigar_fp);
874 Filestring_free(&cigar_fp);
875
876 return;
877 }
878
879
880 static void
halfdonor_dinucleotide(char * donor1,char * donor2,Substring_T donor,int sensedir)881 halfdonor_dinucleotide (char *donor1, char *donor2, Substring_T donor, int sensedir) {
882 char *genomic;
883 int substring_start, substring_end;
884
885 genomic = Substring_genomic_refdiff(donor);
886
887 if (genomic == NULL) {
888 *donor1 = *donor2 = 'X';
889
890 } else if (sensedir == SENSE_FORWARD) {
891 substring_end = Substring_queryend(donor);
892 *donor1 = toupper(genomic[substring_end]);
893 *donor2 = toupper(genomic[substring_end+1]);
894
895 } else if (sensedir == SENSE_ANTI) {
896 substring_start = Substring_querystart(donor);
897 *donor2 = toupper(complCode[(int) genomic[substring_start-2]]);
898 *donor1 = toupper(complCode[(int) genomic[substring_start-1]]);
899
900 } else {
901 *donor1 = *donor2 = 'X';
902 }
903
904 return;
905 }
906
907 static void
halfacceptor_dinucleotide(char * acceptor2,char * acceptor1,Substring_T acceptor,int sensedir)908 halfacceptor_dinucleotide (char *acceptor2, char *acceptor1, Substring_T acceptor, int sensedir) {
909 char *genomic;
910 int substring_start, substring_end;
911
912 genomic = Substring_genomic_refdiff(acceptor);
913
914 if (genomic == NULL) {
915 *acceptor1 = *acceptor2 = 'X';
916
917 } else if (sensedir == SENSE_FORWARD) {
918 substring_start = Substring_querystart(acceptor);
919 *acceptor2 = toupper(genomic[substring_start-2]);
920 *acceptor1 = toupper(genomic[substring_start-1]);
921
922 } else if (sensedir == SENSE_ANTI) {
923 substring_end = Substring_queryend(acceptor);
924 *acceptor1 = toupper(complCode[(int) genomic[substring_end]]);
925 *acceptor2 = toupper(complCode[(int) genomic[substring_end+1]]);
926
927 } else {
928 *acceptor1 = *acceptor2 = 'X';
929 }
930
931 return;
932 }
933
934
935 static void
print_xt_info(Filestring_T fp,Substring_T donor,Substring_T acceptor,int sensedir)936 print_xt_info (Filestring_T fp, Substring_T donor, Substring_T acceptor, int sensedir) {
937 char donor_strand, acceptor_strand;
938 char *donor_chr, *acceptor_chr;
939 bool alloc1p, alloc2p;
940
941 Chrpos_T donor_coord, acceptor_coord;
942 char donor1, donor2, acceptor2, acceptor1;
943 double donor_prob, acceptor_prob;
944
945
946 halfdonor_dinucleotide(&donor1,&donor2,donor,sensedir);
947 halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor,sensedir);
948 donor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(donor),&alloc1p);
949 acceptor_chr = Univ_IIT_label(chromosome_iit,Substring_chrnum(acceptor),&alloc2p);
950 donor_prob = Substring_siteD_prob(donor);
951 acceptor_prob = Substring_siteA_prob(acceptor);
952
953 donor_strand = Substring_chimera_strand(donor);
954 acceptor_strand = Substring_chimera_strand(acceptor);
955
956 donor_coord = Substring_chr_splicecoord_D(donor,donor_strand);
957 acceptor_coord = Substring_chr_splicecoord_A(acceptor,acceptor_strand);
958 FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
959 FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_coord,
960 acceptor_strand,acceptor_chr,acceptor_coord);
961
962 if (alloc1p) {
963 FREE(donor_chr);
964 }
965 if (alloc2p) {
966 FREE(acceptor_chr);
967 }
968
969 return;
970 }
971
972
973 static void
print_splice(Filestring_T fp,char * abbrev,Substring_T donor,Substring_T acceptor,Stage3pair_T stage3pair,Stage3end_T stage3end,int querylength,char * acc1,char * acc2,int pathnum,int npaths_primary,int npaths_altloc,int absmq_score,int first_absmq,int second_absmq,int mapq_score,Shortread_T queryseq,int pairedlength,int hardclip_low,int hardclip_high,Stage3end_T mate,int mate_querylength,int mate_hardclip_low,int mate_hardclip_high,Resulttype_T resulttype,bool first_read_p,bool artificial_mate_p,int npaths_mate,int quality_shift,char * sam_read_group_id,bool invertp,bool invert_mate_p,bool supplementaryp)974 print_splice (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor,
975 Stage3pair_T stage3pair, Stage3end_T stage3end, int querylength,
976
977 char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
978 int absmq_score, int first_absmq, int second_absmq, int mapq_score,
979 Shortread_T queryseq, int pairedlength,
980
981 int hardclip_low, int hardclip_high,
982
983 Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high,
984 Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
985 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
986 bool supplementaryp) {
987 unsigned int flag = 0U;
988 bool mate_first_read_p, mate_plusp;
989 Substring_T substring;
990 int nindels;
991
992 int sequence_hardclip_low, sequence_hardclip_high;
993 List_T prevp, nextp, finalp;
994
995 Chrnum_T chrnum, mate_chrnum;
996 Chrpos_T chrpos_low, mate_chrpos_low;
997 Filestring_T cigar_fp, mate_cigar_fp, md_fp, mate_md_fp;
998
999 char *chr;
1000 bool allocp;
1001
1002 int nmismatches_refdiff, nmismatches_bothdiff, mate_nmismatches_refdiff, mate_nmismatches_bothdiff;
1003 bool plusp;
1004 bool start_ambig, end_ambig;
1005 int n, i;
1006 Univcoord_T *start_alts_coords, *end_alts_coords, splicecoord;
1007 #ifdef PRINT_ALTS_COORDS
1008 Univcoord_T chroffset;
1009 #endif
1010 char substring_strand;
1011
1012
1013 /* Compute on mate first, so we don't use its nindels and MD
1014 pointers, although this procedure does not use them */
1015 debug(printf("Calling Cigar_compute_main on mate %p with hardclips %d and %d\n",
1016 mate,mate_hardclip_low,mate_hardclip_high));
1017 mate_first_read_p = (first_read_p == true ? false : true);
1018 Cigar_compute_main(&mate_cigar_fp,&mate_md_fp,
1019 &mate_nmismatches_refdiff,&mate_nmismatches_bothdiff,
1020 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
1021 &nindels,&mate_plusp,&mate_chrnum,&mate_chrpos_low,
1022 mate,mate_querylength,mate_first_read_p,/*mate*/stage3end,
1023 mate_hardclip_low,mate_hardclip_high,hide_soft_clips_p);
1024
1025 if (supplementaryp == true) {
1026 debug(printf("Calling Cigar_compute_supplemental on self with hardclips %d and %d\n",
1027 hardclip_low,hardclip_high));
1028 substring = (Stage3end_donor_concordant_p(stage3end,first_read_p) == true ? acceptor : donor);
1029 Cigar_compute_supplemental(&cigar_fp,&md_fp,
1030 &nmismatches_refdiff,&nmismatches_bothdiff,
1031 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
1032 &nindels,&plusp,&chrnum,&chrpos_low,
1033 stage3end,querylength,first_read_p,mate,
1034 hardclip_low,hardclip_high,hide_soft_clips_p);
1035 } else {
1036 debug(printf("Calling Cigar_compute_main on self with hardclips %d and %d\n",
1037 hardclip_low,hardclip_high));
1038 substring = (Stage3end_donor_concordant_p(stage3end,first_read_p) == true ? donor : acceptor);
1039 Cigar_compute_main(&cigar_fp,&md_fp,
1040 &nmismatches_refdiff,&nmismatches_bothdiff,
1041 &sequence_hardclip_low,&sequence_hardclip_high,&prevp,&nextp,&finalp,
1042 &nindels,&plusp,&chrnum,&chrpos_low,
1043 stage3end,querylength,first_read_p,mate,
1044 hardclip_low,hardclip_high,hide_soft_clips_p);
1045 }
1046 plusp = Substring_plusp(substring);
1047 substring_strand = Substring_chimera_strand(substring);
1048
1049
1050 /* 1. QNAME */
1051 if (acc2 == NULL) {
1052 FPRINTF(fp,"%s",acc1);
1053 } else {
1054 FPRINTF(fp,"%s,%s",acc1,acc2);
1055 }
1056
1057 /* 2. FLAG */
1058 flag = compute_flag(plusp,mate,mate_plusp,resulttype,first_read_p,
1059 pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
1060 absmq_score,first_absmq,invertp,invert_mate_p,supplementaryp);
1061 FPRINTF(fp,"\t%u",flag);
1062
1063 /* 3. RNAME: chr */
1064 /* 4. POS: chrpos */
1065 /* chrnum and chrpos here are for the substring */
1066 chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
1067 FPRINTF(fp,"\t%s\t%u",chr,chrpos_low /*+1U*/);
1068 if (allocp == true) {
1069 FREE(chr);
1070 }
1071
1072 /* 5. MAPQ: Mapping quality */
1073 FPRINTF(fp,"\t%d\t",mapq_score);
1074
1075 /* 6. CIGAR */
1076 Filestring_merge(fp,cigar_fp);
1077
1078 /* 7. MRNM: Mate chr */
1079 /* 8. MPOS: Mate chrpos */
1080 if (mate_chrpos_low == 0U) {
1081 FPRINTF(fp,"\t*\t0");
1082 } else if (mate_chrnum == chrnum) {
1083 FPRINTF(fp,"\t=\t%u",mate_chrpos_low /*+1U*/);
1084 } else {
1085 chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
1086 FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low /*+1U*/);
1087 if (allocp == true) {
1088 FREE(chr);
1089 }
1090 }
1091
1092
1093 /* 9. ISIZE: Insert size */
1094 if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
1095 if (plusp == invertp) {
1096 FPRINTF(fp,"\t%d",-pairedlength);
1097 } else {
1098 FPRINTF(fp,"\t%d",pairedlength);
1099 }
1100 } else if (mate_chrpos_low == 0) {
1101 FPRINTF(fp,"\t%d",pairedlength);
1102 #if 0
1103 } else if (concordant_chrpos < mate_chrpos_low) {
1104 FPRINTF(fp,"\t%d",pairedlength);
1105 } else if (concordant_chrpos > mate_chrpos_low) {
1106 FPRINTF(fp,"\t%d",-pairedlength);
1107 #endif
1108 } else if (first_read_p == true) {
1109 FPRINTF(fp,"\t%d",pairedlength);
1110 } else {
1111 FPRINTF(fp,"\t%d",-pairedlength);
1112 }
1113
1114
1115 /* 10. SEQ: queryseq and 11. QUAL: quality scores */
1116 /* Queryseq has already been inverted, so just measure plusp relative to its current state */
1117 if (sam_sparse_secondaries_p == true && (flag & NOT_PRIMARY) != 0) {
1118 /* SAM format specification says that secondary mappings should not print SEQ or QUAL to reduce file size */
1119 FPRINTF(fp,"\t*\t*");
1120
1121 } else if (plusp == true) {
1122 Shortread_print_chopped_sam(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1123 Shortread_print_quality(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high,
1124 quality_shift,/*show_chopped_p*/false);
1125 } else {
1126 Shortread_print_chopped_revcomp_sam(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1127 Shortread_print_quality_revcomp(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high,
1128 quality_shift,/*show_chopped_p*/false);
1129 }
1130
1131
1132 /* 12. TAGS: XM */
1133 if (mate == NULL || (flag & MATE_UNMAPPED) != 0) {
1134 /* Previously checked if queryseq_mate == NULL */
1135 /* Unpaired alignment. Don't print XM. */
1136 } else {
1137 FPRINTF(fp,"\tXM:Z:");
1138 Filestring_merge(fp,mate_cigar_fp);
1139 }
1140
1141 /* 12. TAGS: RG */
1142 if (sam_read_group_id != NULL) {
1143 FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
1144 }
1145
1146 /* 12. TAGS: XH and XI */
1147 if (sequence_hardclip_low > 0 || sequence_hardclip_high > 0) {
1148 FPRINTF(fp,"\tXH:Z:");
1149 if (plusp == true) {
1150 Shortread_print_chopped_end(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1151 } else {
1152 Shortread_print_chopped_end_revcomp(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1153 }
1154
1155 if (Shortread_quality_string(queryseq) != NULL) {
1156 FPRINTF(fp,"\tXI:Z:");
1157 if (plusp == true) {
1158 Shortread_print_chopped_end_quality(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1159 } else {
1160 Shortread_print_chopped_end_quality_reverse(fp,queryseq,sequence_hardclip_low,sequence_hardclip_high);
1161 }
1162 }
1163 }
1164
1165 /* 12. TAGS: XB */
1166 Shortread_print_barcode(fp,queryseq);
1167
1168 /* 12. TAGS: XP. Logically should be last in reconstructing a read. */
1169 Shortread_print_chop(fp,queryseq,invertp);
1170
1171 /* 12. TAGS: MD */
1172 FPRINTF(fp,"\tMD:Z:");
1173 Filestring_merge(fp,md_fp);
1174
1175 /* 12. TAGS: NH */
1176 /* 12. TAGS: HI */
1177 /* 12. TAGS: NM */
1178 FPRINTF(fp,"\tNH:i:%d\tHI:i:%d\tNM:i:%d",npaths_primary + npaths_altloc,pathnum,nmismatches_refdiff);
1179
1180 if (snps_iit) {
1181 /* 12. TAGS: XW and XV */
1182 FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
1183 FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
1184 }
1185
1186 /* 12. TAGS: SM */
1187 /* 12. TAGS: XQ */
1188 /* 12. TAGS: X2 */
1189 FPRINTF(fp,"\tSM:i:%d\tXQ:i:%d\tX2:i:%d",mapq_score,absmq_score,second_absmq);
1190
1191 /* 12. TAGS: XO */
1192 FPRINTF(fp,"\tXO:Z:%s",abbrev);
1193
1194 /* 12. TAGS: XS */
1195 /* Doesn't hold for DNA-Seq chimeras */
1196 /* assert(donor_sensedir != SENSE_NULL); */
1197 if (Stage3end_sensedir(stage3end) != SENSE_NULL) {
1198 FPRINTF(fp,"\tXS:A:%c",substring_strand);
1199 }
1200
1201
1202 /* 12. TAGS: XA */
1203 /* Probably not relevant, since splices don't allow for ambiguity anymore */
1204 if ((start_ambig = Stage3end_start_has_alts_p(stage3end)) == true ||
1205 (end_ambig = Stage3end_end_has_alts_p(stage3end)) == true) {
1206 FPRINTF(fp,"\tXA:Z:");
1207
1208 if (plusp == true) {
1209 if ((n = Stage3end_start_alts_ncoords(stage3end)) > 0) {
1210 /* assert(sensep == false); */
1211 start_alts_coords = Stage3end_start_alts_coords(stage3end);
1212 splicecoord = Substring_alignstart_trim(substring);
1213 #ifdef PRINT_ALTS_COORDS
1214 chroffset = Substring_chroffset(substring);
1215 FPRINTF(fp,"%u",start_alts_coords[0] - chroffset + 1U);
1216 for (i = 1; i < n; i++) {
1217 FPRINTF(fp,",%u",start_alts_coords[i] - chroffset + 1U);
1218 }
1219 #else
1220 splicecoord = Substring_alignstart_trim(substring);
1221 FPRINTF(fp,"%u",splicecoord - start_alts_coords[0]);
1222 for (i = 1; i < n; i++) {
1223 FPRINTF(fp,",%u",splicecoord - start_alts_coords[i]);
1224 }
1225 #endif
1226 }
1227 FPRINTF(fp,"|");
1228 if ((n = Stage3end_end_alts_ncoords(stage3end)) > 0) {
1229 /* assert(sensep == true); */
1230 end_alts_coords = Stage3end_end_alts_coords(stage3end);
1231 #ifdef PRINT_ALTS_COORDS
1232 chroffset = Substring_chroffset(substring);
1233 FPRINTF(fp,"%u",end_alts_coords[0] - chroffset + 1U);
1234 for (i = 1; i < n; i++) {
1235 FPRINTF(fp,",%u",end_alts_coords[i] - chroffset + 1U);
1236 }
1237 #else
1238 splicecoord = Substring_alignend_trim(substring);
1239 FPRINTF(fp,"%u",end_alts_coords[0] - splicecoord);
1240 for (i = 1; i < n; i++) {
1241 FPRINTF(fp,",%u",end_alts_coords[i] - splicecoord);
1242 }
1243 #endif
1244 }
1245
1246 } else {
1247 if ((n = Stage3end_end_alts_ncoords(stage3end)) > 0) {
1248 /* assert(sensep == true); */
1249 end_alts_coords = Stage3end_end_alts_coords(stage3end);
1250 #ifdef PRINT_ALTS_COORDS
1251 chroffset = Substring_chroffset(substring);
1252 FPRINTF(fp,"%u",end_alts_coords[0] - chroffset + 1U);
1253 for (i = 1; i < n; i++) {
1254 FPRINTF(fp,",%u",end_alts_coords[i] - chroffset + 1U);
1255 }
1256 #else
1257 splicecoord = Substring_alignend_trim(substring);
1258 FPRINTF(fp,"%u",splicecoord - end_alts_coords[0]);
1259 for (i = 1; i < n; i++) {
1260 FPRINTF(fp,",%u",splicecoord - end_alts_coords[i]);
1261 }
1262 #endif
1263 }
1264 FPRINTF(fp,"|");
1265 if ((n = Stage3end_start_alts_ncoords(stage3end)) > 0) {
1266 /* assert(sensep == false); */
1267 start_alts_coords = Stage3end_start_alts_coords(stage3end);
1268 #ifdef PRINT_ALTS_COORDS
1269 chroffset = Substring_chroffset(substring);
1270 FPRINTF(fp,"%u",start_alts_coords[0] - chroffset + 1U);
1271 for (i = 1; i < n; i++) {
1272 FPRINTF(fp,",%u",start_alts_coords[i] - chroffset + 1U);
1273 }
1274 #else
1275 splicecoord = Substring_alignstart_trim(substring);
1276 FPRINTF(fp,"%u",start_alts_coords[0] - splicecoord);
1277 for (i = 1; i < n; i++) {
1278 FPRINTF(fp,",%u",start_alts_coords[i] - splicecoord);
1279 }
1280 #endif
1281 }
1282 }
1283 }
1284
1285 /* 12. TAGS: XT */
1286 print_xt_info(fp,donor,acceptor,Stage3end_sensedir(stage3end));
1287
1288
1289 /* 12. TAGS: XX, XY */
1290 if (stage3pair != NULL && Transcript_concordant_p(Stage3pair_transcripts5(stage3pair),
1291 Stage3pair_transcripts3(stage3pair)) == true) {
1292 Transcript_concordance_print(fp,Stage3pair_transcripts5(stage3pair),Stage3pair_transcripts3(stage3pair),
1293 transcript_iit,/*concordantp*/true,first_read_p,invertp,/*header*/"\tXX:Z:");
1294 Transcript_concordance_print(fp,Stage3pair_transcripts5(stage3pair),Stage3pair_transcripts3(stage3pair),
1295 transcript_iit,/*concordantp*/false,first_read_p,invertp,/*header*/"\tXY:Z:");
1296 } else {
1297 Transcript_singleend_print(fp,Stage3end_transcripts(stage3end),transcript_iit,invertp,
1298 /*header*/"\tXX:Z:");
1299 }
1300
1301 /* 12. TAGS: XZ */
1302 if (Stage3end_transcripts_other(stage3end) != NULL) {
1303 Transcript_singleend_print(fp,Stage3end_transcripts_other(stage3end),transcript_iit,invertp,
1304 /*header*/"\tXZ:Z");
1305 }
1306
1307 #if 0
1308 /* 12. TAGS: XC */
1309 if (circularp == true) {
1310 FPRINTF(fp,"\tXC:A:+");
1311 }
1312 #endif
1313
1314 /* 12. TAGS: XG */
1315 if (method_print_p == true) {
1316 Method_samprint(fp,Stage3end_method(stage3end));
1317 }
1318
1319 #if 0
1320 /* 12. TAGS: XE (BLAST E-value) */
1321 FPRINTF(fp,"\tXE:f:%.2g",Stage3end_min_evalue(stage3end));
1322 #endif
1323
1324 FPRINTF(fp,"\n");
1325
1326 Filestring_free(&mate_md_fp);
1327 Filestring_free(&md_fp);
1328 Filestring_free(&mate_cigar_fp);
1329 Filestring_free(&cigar_fp);
1330
1331 return;
1332 }
1333
1334
1335 void
SAM_print(Filestring_T fp,Filestring_T fp_failedinput,char * abbrev,Stage3pair_T stage3pair,Stage3end_T this,int querylength,char * acc1,char * acc2,int pathnum,int npaths_primary,int npaths_altloc,int absmq_score,int first_absmq,int second_absmq,int mapq_score,Univ_IIT_T chromosome_iit,Shortread_T queryseq,Shortread_T queryseq_mate,int pairedlength,int pair_relationship,int hardclip_low,int hardclip_high,Stage3end_T mate,int mate_querylength,int mate_hardclip_low,int mate_hardclip_high,Resulttype_T resulttype,bool first_read_p,bool artificial_mate_p,int npaths_mate,int quality_shift,char * sam_read_group_id,bool invertp,bool invert_mate_p)1336 SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev, Stage3pair_T stage3pair,
1337 Stage3end_T this, int querylength, char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
1338 int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit,
1339 Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength, int pair_relationship,
1340
1341 int hardclip_low, int hardclip_high,
1342 Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high,
1343
1344 Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
1345 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
1346 Hittype_T hittype;
1347 Substring_T donor, acceptor;
1348
1349 debug(printf("Entered SAM_print of hit %p with hittype %s\n",this,Stage3end_hittype_string(this)));
1350
1351 /* Test for nomapping was chrpos == 0, but we can actually align to chrpos 0 */
1352 /* Also, can use this test here because --quiet-if-excessive cases go directly to SAM_print_nomapping */
1353 if (npaths_primary + npaths_altloc == 0) {
1354 SAM_print_nomapping(fp,abbrev,queryseq,acc1,acc2,chromosome_iit,resulttype,first_read_p,
1355 /*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,artificial_mate_p,npaths_mate,
1356 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1357 quality_shift,sam_read_group_id,invertp,invert_mate_p);
1358
1359 if (fp_failedinput != NULL) {
1360 if (first_read_p == true) {
1361 Shortread_print_query_singleend(fp_failedinput,queryseq,/*headerseq*/queryseq);
1362 } else {
1363 Shortread_print_query_singleend(fp_failedinput,queryseq,/*headerseq*/queryseq_mate);
1364 }
1365 }
1366
1367 } else if ((hittype = Stage3end_hittype(this)) == TRANSLOC_SPLICE ||
1368 (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
1369 /* Translocation: Let SAM_print and Cigar_compute procedures decide which parts are main and supplemental */
1370 donor = Stage3end_substring_donor(this);
1371 acceptor = Stage3end_substring_acceptor(this);
1372
1373 print_splice(fp,abbrev,donor,acceptor,stage3pair,this,querylength,
1374 acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1375 absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
1376 hardclip_low,hardclip_high,
1377 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1378 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
1379 invertp,invert_mate_p,/*supplementaryp*/false);
1380
1381 print_splice(fp,abbrev,donor,acceptor,stage3pair,this,querylength,
1382 acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1383 absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
1384 hardclip_low,hardclip_high,
1385 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1386 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
1387 invertp,invert_mate_p,/*supplementaryp*/true);
1388
1389 } else if (Stage3end_circularpos(this) <= 0) {
1390 print_substrings(fp,abbrev,stage3pair,this,querylength,
1391 acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1392 absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,pair_relationship,
1393 hardclip_low,hardclip_high,
1394 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1395 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
1396 invertp,invert_mate_p,/*circularp*/false,/*supplementaryp*/false);
1397
1398 } else {
1399 /* Circular: Let SAM_print and Cigar_compute procedures decide which parts are main and supplemental */
1400 print_substrings(fp,abbrev,stage3pair,this,querylength,
1401 acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1402 absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,pair_relationship,
1403 hardclip_low,hardclip_high,
1404 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1405 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
1406 invertp,invert_mate_p,/*circularp*/true,/*supplementaryp*/false);
1407
1408 print_substrings(fp,abbrev,stage3pair,this,querylength,
1409 acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1410 absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,pair_relationship,
1411 hardclip_low,hardclip_high,
1412 mate,mate_querylength,mate_hardclip_low,mate_hardclip_high,
1413 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
1414 invertp,invert_mate_p,/*circularp*/true,/*supplementaryp*/true);
1415 }
1416
1417 return;
1418 }
1419
1420
1421 void
SAM_print_paired(Filestring_T fp,Filestring_T fp_failedinput_1,Filestring_T fp_failedinput_2,Result_T result,Resulttype_T resulttype,Univ_IIT_T chromosome_iit,Shortread_T queryseq1,Shortread_T queryseq2,bool invert_first_p,bool invert_second_p,bool nofailsp,bool failsonlyp,int quality_shift,char * sam_read_group_id)1422 SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2,
1423 Result_T result, Resulttype_T resulttype, Univ_IIT_T chromosome_iit,
1424 Shortread_T queryseq1, Shortread_T queryseq2, bool invert_first_p, bool invert_second_p,
1425 bool nofailsp, bool failsonlyp, int quality_shift, char *sam_read_group_id) {
1426 Stage3pair_T *stage3pairarray, stage3pair;
1427 Stage3end_T *stage3array1, *stage3array2, stage3, mate, hit5, hit3;
1428 int npaths_primary, npaths_altloc, npaths_max, npaths_primary_max, npaths_altloc_max,
1429 npaths1_primary, npaths1_altloc, npaths2_primary, npaths2_altloc, pathnum;
1430 int first_absmq, second_absmq, first_absmq1, second_absmq1, first_absmq2, second_absmq2;
1431 int querylength5, querylength3;
1432 int hardclip5_low = 0, hardclip5_high = 0, hardclip3_low = 0, hardclip3_high = 0, clipdir;
1433 char *acc1, *acc2;
1434 Pairtype_T pairtype;
1435 char *abbrev;
1436
1437 struct Simplepair_T *pairarray;
1438 int npairs;
1439 char *queryseq_merged, *quality_merged;
1440 int querylength_merged;
1441 int flag;
1442 Chrpos_T chrpos_low;
1443
1444
1445 acc1 = Shortread_accession(queryseq1);
1446 acc2 = Shortread_accession(queryseq2); /* NULL, unless --allow-pe-name-mismatch is specified */
1447
1448 debug(printf("Entered SAM_print_paired with resulttype %d\n",resulttype));
1449
1450 if (resulttype == PAIREDEND_NOMAPPING) {
1451 if (nofailsp == true) {
1452 /* No output */
1453 return;
1454
1455 } else {
1456 Filestring_set_split_output(fp,OUTPUT_NM);
1457 SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,acc1,acc2,chromosome_iit,resulttype,
1458 /*first_read_p*/true,/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,
1459 /*artificial_mate_p*/false,/*npaths_mate*/0,
1460 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1461 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1462 SAM_print_nomapping(fp,ABBREV_NOMAPPING_2,queryseq2,acc1,acc2,chromosome_iit,resulttype,
1463 /*first_read_p*/false,/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,
1464 /*artificial_mate_p*/false,/*npaths_mate*/0,
1465 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1466 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1467
1468 if (fp_failedinput_1 != NULL) {
1469 Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
1470 }
1471 }
1472
1473 } else {
1474 querylength5 = Shortread_fulllength(queryseq1);
1475 querylength3 = Shortread_fulllength(queryseq2);
1476
1477 if (failsonlyp == true) {
1478 /* Unwanted success: skip */
1479
1480 } else if (resulttype == CONCORDANT_UNIQ) {
1481 stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
1482 /* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
1483
1484 stage3pair = stage3pairarray[0];
1485 hit5 = Stage3pair_hit5(stage3pair);
1486 hit3 = Stage3pair_hit3(stage3pair);
1487
1488 if (Stage3pair_circularp(stage3pair) == true) {
1489 /* Don't resolve overlaps on a circular alignment */
1490 clipdir = 0;
1491 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1492 Filestring_set_split_output(fp,OUTPUT_CC);
1493 abbrev = ABBREV_CONCORDANT_CIRCULAR;
1494
1495 } else if (omit_concordant_uniq_p == true) {
1496 Filestring_set_split_output(fp,OUTPUT_NONE);
1497
1498 } else {
1499 if (clip_overlap_p == false && merge_overlap_p == false) {
1500 clipdir = 0;
1501 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1502 Filestring_set_split_output(fp,OUTPUT_CU);
1503 abbrev = ABBREV_CONCORDANT_UNIQ;
1504
1505 } else {
1506 clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
1507 debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
1508 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
1509 Filestring_set_split_output(fp,OUTPUT_CU);
1510 abbrev = ABBREV_CONCORDANT_UNIQ;
1511 }
1512 }
1513
1514 #if 0
1515 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1516 hit5,querylength5,/*first_read_p*/true);
1517 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1518 hit3,querylength3,/*first_read_p*/false);
1519 #endif
1520
1521 if (merge_overlap_p == false || clipdir == 0) {
1522 /* print first end */
1523 SAM_print(fp,fp_failedinput_1,abbrev,stage3pair,hit5,querylength5,
1524 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1525 Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
1526 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1527 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1528 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1529 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1530 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1531 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1532 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1533
1534 /* print second end */
1535 SAM_print(fp,fp_failedinput_2,abbrev,stage3pair,hit3,querylength3,
1536 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1537 Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
1538 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1539 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1540 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1541 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1542 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1543 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1544 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1545
1546 } else {
1547 /* merge_overlap_p == true and overlap was found */
1548 pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
1549 stage3pair,queryseq1,queryseq2,
1550 /*querylength5*/Stage3end_querylength(hit5),
1551 /*querylength3*/Stage3end_querylength(hit3),
1552 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
1553 /* printf("queryseq_merged: %s\n",queryseq_merged); */
1554 #if 0
1555 if (clipdir >= 0) {
1556 chrnum = chrnum5;
1557 chrpos_low = chrpos_low_5;
1558 } else {
1559 chrnum = chrnum3;
1560 chrpos_low = chrpos_low_3;
1561 }
1562 #else
1563 chrpos_low = Simplepair_genomicpos_low(/*hardclip_low*/0,/*hardclip_high*/0,pairarray,npairs,
1564 querylength_merged,/*plusp*/Stage3end_plusp(hit5),
1565 hide_soft_clips_p);
1566 #endif
1567
1568 /* merging changes resulttype from UNPAIRED_UNIQ to SINGLEEND_UNIQ */
1569
1570 flag = compute_flag(Stage3end_plusp(hit5),/*mate*/NULL,/*mate_plusp*/true,
1571 /*resulttype*/SINGLEEND_UNIQ,/*first_read_p*/true,
1572 /*pathnum*/1,/*npaths*/1,/*artificial_mate_p*/false,/*npaths_mate*/0,
1573 Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
1574 /*invert_mate_p*/false,/*supplementaryp*/false);
1575
1576 Filestring_set_split_output(fp,OUTPUT_UU);
1577 Simplepair_overlap_print_sam(fp,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,
1578 acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
1579 /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
1580 /*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
1581 Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
1582 quality_shift,/*first_read_p*/true,
1583 /*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1584 #if 0
1585 Stage3pair_absmq_score(stage3pair),/*second_absmq*/0,
1586 #else
1587 /*absmq_score*/MAX_QUALITY_SCORE,/*second_absmq*/0,
1588 #endif
1589 chrpos_low,Stage3end_chrlength(hit5),/*queryseq*/NULL,flag,
1590 /*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
1591 stage3pair,hit5,sam_read_group_id);
1592
1593 if (quality_merged != NULL) {
1594 FREE_OUT(quality_merged);
1595 }
1596 FREE_OUT(queryseq_merged);
1597 FREE_OUT(pairarray);
1598 }
1599
1600 } else if (resulttype == CONCORDANT_TRANSLOC) {
1601 Filestring_set_split_output(fp,OUTPUT_CT);
1602 stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
1603
1604 if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
1605 /* Print as nomapping, but send to fp_concordant_transloc */
1606 SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,queryseq1,acc1,acc2,chromosome_iit,resulttype,
1607 /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
1608 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1609 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1610 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1611 SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,queryseq2,acc1,acc2,chromosome_iit,resulttype,
1612 /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
1613 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1614 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1615 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1616
1617 } else {
1618 /* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
1619 /* Note: We are ignoring merge_overlap for concordant_transloc */
1620 for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
1621
1622 stage3pair = stage3pairarray[pathnum-1];
1623 hit5 = Stage3pair_hit5(stage3pair);
1624 hit3 = Stage3pair_hit3(stage3pair);
1625
1626 if (Stage3pair_circularp(stage3pair) == true) {
1627 /* Don't resolve overlaps on a circular alignment */
1628 clipdir = 0;
1629 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1630
1631 } else if (clip_overlap_p == false) {
1632 clipdir = 0;
1633 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1634
1635 } else {
1636 clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
1637 debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
1638 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
1639 }
1640
1641 #if 0
1642 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1643 hit5,querylength5,/*first_read_p*/true);
1644 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1645 hit3,querylength3,/*first_read_p*/false);
1646 #endif
1647
1648 /* print first end */
1649 SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_TRANSLOC,stage3pair,
1650 hit5,querylength5,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1651 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1652 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1653 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1654 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1655 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1656 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1657 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1658 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1659
1660 /* print second end */
1661 SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_TRANSLOC,stage3pair,
1662 hit3,querylength3,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1663 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1664 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1665 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1666 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1667 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1668 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1669 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1670 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1671 }
1672 }
1673
1674 } else if (resulttype == CONCORDANT_MULT) {
1675 stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
1676
1677 if (omit_concordant_mult_p == true) {
1678 /* Skip printing */
1679 Filestring_set_split_output(fp,OUTPUT_NONE);
1680
1681 } else if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
1682 /* Print as nomapping, but send to fp_concordant_mult_xs */
1683 Filestring_set_split_output(fp,OUTPUT_CX);
1684 SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,queryseq1,acc1,acc2,chromosome_iit,resulttype,
1685 /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
1686 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1687 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1688 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1689 SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,queryseq2,acc1,acc2,chromosome_iit,resulttype,
1690 /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
1691 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1692 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1693 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1694
1695 if (fp_failedinput_1 != NULL) {
1696 Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
1697
1698 }
1699
1700 } else {
1701 /* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
1702 Filestring_set_split_output(fp,OUTPUT_CM);
1703 for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
1704
1705 stage3pair = stage3pairarray[pathnum-1];
1706 hit5 = Stage3pair_hit5(stage3pair);
1707 hit3 = Stage3pair_hit3(stage3pair);
1708
1709 if (Stage3pair_circularp(stage3pair) == true) {
1710 /* Don't resolve overlaps on a circular alignment */
1711 clipdir = 0;
1712 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1713
1714 } else if (clip_overlap_p == false && merge_overlap_p == false) {
1715 clipdir = 0;
1716 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1717
1718 } else {
1719 clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
1720 debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
1721 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
1722 }
1723
1724 #if 0
1725 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1726 hit5,querylength5,/*first_read_p*/true);
1727 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1728 hit3,querylength3,/*first_read_p*/false);
1729 #endif
1730
1731 if (merge_overlap_p == false || clipdir == 0) {
1732 /* print first end */
1733 SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_MULT,stage3pair,
1734 hit5,querylength5,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1735 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1736 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1737 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1738 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1739 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1740 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1741 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1742 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1743
1744 /* print second end */
1745 SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_MULT,stage3pair,
1746 hit3,querylength3,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1747 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1748 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1749 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1750 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1751 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1752 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1753 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1754 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1755
1756 } else {
1757 /* merge_overlap_p == true and overlap was found */
1758 pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
1759 stage3pair,queryseq1,queryseq2,
1760 /*querylength5*/Stage3end_querylength(hit5),
1761 /*querylength3*/Stage3end_querylength(hit3),
1762 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
1763 /* printf("queryseq_merged: %s\n",queryseq_merged); */
1764 #if 0
1765 if (clipdir >= 0) {
1766 chrnum = chrnum5;
1767 chrpos_low = chrpos_low_5;
1768 } else {
1769 chrnum = chrnum3;
1770 chrpos_low = chrpos_low_3;
1771 }
1772 #else
1773 chrpos_low = Simplepair_genomicpos_low(/*hardclip_low*/0,/*hardclip_high*/0,pairarray,npairs,
1774 querylength_merged,/*plusp*/Stage3end_plusp(hit5),
1775 hide_soft_clips_p);
1776 #endif
1777
1778 /* merging changes resulttype from UNPAIRED_UNIQ to SINGLEEND_UNIQ */
1779 flag = compute_flag(Stage3end_plusp(hit5),/*mate*/NULL,/*mate_plusp*/true,
1780 /*resulttype*/SINGLEEND_UNIQ,/*first_read_p*/true,
1781 /*pathnum*/1,/*npaths*/1,/*artificial_mate_p*/false,/*npaths_mate*/0,
1782 Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
1783 /*invert_mate_p*/false,/*supplementaryp*/false);
1784
1785 Simplepair_overlap_print_sam(fp,ABBREV_CONCORDANT_MULT,pairarray,npairs,
1786 acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
1787 /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
1788 /*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
1789 Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
1790 quality_shift,/*first_read_p*/true,pathnum,npaths_primary,npaths_altloc,
1791 #if 0
1792 Stage3pair_absmq_score(stage3pair),/*second_absmq*/0,
1793 #else
1794 /*absmq_score*/MAX_QUALITY_SCORE,/*second_absmq*/0,
1795 #endif
1796 chrpos_low,Stage3end_chrlength(hit5),/*queryseq*/NULL,flag,
1797 /*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
1798 stage3pair,hit5,sam_read_group_id);
1799
1800 if (quality_merged != NULL) {
1801 FREE_OUT(quality_merged);
1802 }
1803 FREE_OUT(queryseq_merged);
1804 FREE_OUT(pairarray);
1805 }
1806 }
1807 }
1808
1809 } else if (resulttype == PAIRED_UNIQ) {
1810 stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
1811 /* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
1812
1813 stage3pair = stage3pairarray[0];
1814 if (Stage3pair_circularp(stage3pair) == true) {
1815 Filestring_set_split_output(fp,OUTPUT_PC);
1816 abbrev = ABBREV_PAIRED_UNIQ_CIRCULAR;
1817 } else if ((pairtype = Stage3pair_determine_pairtype(stage3pair)) == PAIRED_INVERSION) {
1818 Filestring_set_split_output(fp,OUTPUT_PI);
1819 abbrev = ABBREV_PAIRED_UNIQ_INV;
1820 } else if (pairtype == PAIRED_SCRAMBLE) {
1821 Filestring_set_split_output(fp,OUTPUT_PS);
1822 abbrev = ABBREV_PAIRED_UNIQ_SCR;
1823 } else if (pairtype == PAIRED_TOOLONG) {
1824 Filestring_set_split_output(fp,OUTPUT_PL);
1825 abbrev = ABBREV_PAIRED_UNIQ_LONG;
1826 } else if (pairtype == CONCORDANT) {
1827 /* Possible when re-mapping to the transcriptome */
1828 Filestring_set_split_output(fp,OUTPUT_CU);
1829 abbrev = ABBREV_CONCORDANT_UNIQ;
1830 } else {
1831 fprintf(stderr,"Unexpected pairtype %d\n",pairtype);
1832 abort();
1833 }
1834
1835 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1836
1837 hit5 = Stage3pair_hit5(stage3pair);
1838 hit3 = Stage3pair_hit3(stage3pair);
1839 #if 0
1840 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1841 hit5,querylength5,/*first_read_p*/true);
1842 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1843 hit3,querylength3,/*first_read_p*/false);
1844 #endif
1845
1846 /* print first end */
1847 SAM_print(fp,fp_failedinput_1,abbrev,stage3pair,hit5,querylength5,
1848 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1849 Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
1850 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1851 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1852 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1853 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1854 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1855 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1856 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1857
1858 /* print second end */
1859 SAM_print(fp,fp_failedinput_2,abbrev,stage3pair,hit3,querylength3,
1860 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1861 Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
1862 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1863 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1864 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1865 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1866 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1867 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1868 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1869
1870 } else if (resulttype == PAIRED_MULT) {
1871 stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
1872
1873 if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
1874 /* Print as nomapping, but send to fp_paired_mult */
1875 Filestring_set_split_output(fp,OUTPUT_PX);
1876 SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,queryseq1,acc1,acc2,chromosome_iit,resulttype,
1877 /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
1878 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1879 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1880 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1881 SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,queryseq2,acc1,acc2,chromosome_iit,resulttype,
1882 /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
1883 /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1884 /*mate*/(Stage3end_T) NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
1885 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1886
1887 if (fp_failedinput_1 != NULL) {
1888 Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
1889 }
1890
1891 } else {
1892 /* Stage3pair_eval(stage3pairarray,npaths,maxpaths_report,queryseq1,queryseq2); */
1893 Filestring_set_split_output(fp,OUTPUT_PM);
1894 for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
1895
1896 stage3pair = stage3pairarray[pathnum-1];
1897 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1898
1899 hit5 = Stage3pair_hit5(stage3pair);
1900 hit3 = Stage3pair_hit3(stage3pair);
1901 #if 0
1902 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1903 hit5,querylength5,/*first_read_p*/true);
1904 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1905 hit3,querylength3,/*first_read_p*/false);
1906 #endif
1907
1908 /* print first end */
1909 SAM_print(fp,fp_failedinput_1,ABBREV_PAIRED_MULT,stage3pair,
1910 hit5,querylength5,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1911 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1912 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1913 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1914 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1915 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1916 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1917 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1918 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
1919
1920 /* print second end */
1921 SAM_print(fp,fp_failedinput_2,ABBREV_PAIRED_MULT,stage3pair,
1922 hit3,querylength3,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
1923 Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
1924 Stage3pair_mapq_score(stage3pair),chromosome_iit,
1925 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1926 Stage3pair_pairlength(stage3pair),Stage3pair_relationship(stage3pair),
1927 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1928 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1929 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
1930 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
1931 }
1932 }
1933
1934 } else if (resulttype == UNPAIRED_UNIQ) {
1935 /* Even though they are not related, we should print mate information in this situation */
1936 stage3array1 = (Stage3end_T *) Result_array(&npaths1_primary,&npaths1_altloc,&first_absmq1,&second_absmq1,result);
1937 stage3array2 = (Stage3end_T *) Result_array2(&npaths2_primary,&npaths2_altloc,&first_absmq2,&second_absmq2,result);
1938
1939 hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
1940
1941 hit5 = stage3array1[0];
1942 hit3 = stage3array2[0];
1943 #if 0
1944 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
1945 hit5,querylength5,/*first_read_p*/true);
1946 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
1947 hit3,querylength3,/*first_read_p*/false);
1948 #endif
1949
1950 if (Stage3end_circularpos(hit5) > 0 || Stage3end_circularpos(hit3) > 0) {
1951 Filestring_set_split_output(fp,OUTPUT_UC);
1952 abbrev = ABBREV_UNPAIRED_CIRCULAR;
1953 } else {
1954 Filestring_set_split_output(fp,OUTPUT_UU);
1955 abbrev = ABBREV_UNPAIRED_UNIQ;
1956 }
1957
1958 /* print first end */
1959 /* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
1960 /* Previously set hardclips to be 0. Not sure why. */
1961 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,hit5,querylength5,
1962 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1963 Stage3end_absmq_score(stage3array1[0]),first_absmq1,/*second_absmq*/0,
1964 Stage3end_mapq_score(stage3array1[0]),chromosome_iit,
1965 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
1966 /*pairedlength*/0U,/*pair_relationship*/0,
1967 /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
1968 /*mate*/hit3,/*mate_querylength*/querylength3,/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
1969 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/1,quality_shift,sam_read_group_id,
1970 invert_first_p,invert_second_p);
1971
1972 /* Note: Do not act on add_paired_nomappers_p, since the two reads are artificially paired up already */
1973
1974 /* print second end */
1975 /* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
1976 /* Previously set hardclips to be 0. Not sure why. */
1977 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,hit3,querylength3,
1978 acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
1979 Stage3end_absmq_score(stage3array2[0]),first_absmq2,/*second_absmq*/0,
1980 Stage3end_mapq_score(stage3array2[0]),chromosome_iit,
1981 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
1982 /*pairedlength*/0U,/*pair_relationship*/0,
1983 /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
1984 /*mate*/hit5,/*mate_querylength*/querylength5,/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
1985 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/1,quality_shift,sam_read_group_id,
1986 invert_second_p,invert_first_p);
1987
1988 } else if (resulttype == UNPAIRED_MULT || resulttype == UNPAIRED_TRANSLOC) {
1989 if (resulttype == UNPAIRED_MULT) {
1990 if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report &&
1991 npaths2_primary + npaths2_altloc > maxpaths_report) {
1992 Filestring_set_split_output(fp,OUTPUT_UX);
1993 } else {
1994 Filestring_set_split_output(fp,OUTPUT_UM);
1995 }
1996 abbrev = ABBREV_UNPAIRED_MULT;
1997 } else {
1998 Filestring_set_split_output(fp,OUTPUT_UT);
1999 abbrev = ABBREV_UNPAIRED_TRANSLOC;
2000 }
2001
2002 stage3array1 = (Stage3end_T *) Result_array(&npaths1_primary,&npaths1_altloc,&first_absmq1,&second_absmq1,result);
2003 stage3array2 = (Stage3end_T *) Result_array2(&npaths2_primary,&npaths2_altloc,&first_absmq2,&second_absmq2,result);
2004
2005 #if 0
2006 /* Do eval and sorting first */
2007 if (npaths1 == 1) {
2008 /* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
2009 } else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
2010 /* Don't sort */
2011 } else {
2012 /* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
2013 }
2014 #endif
2015
2016 #if 0
2017 if (npaths2 == 1) {
2018 /* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
2019 } else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
2020 /* Don't sort */
2021 } else {
2022 /* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
2023 }
2024 #endif
2025
2026 if (add_paired_nomappers_p == true) {
2027 /* Artificially pair up results */
2028 if (npaths1_primary + npaths1_altloc > npaths2_primary + npaths2_altloc) {
2029 npaths_primary_max = npaths1_primary;
2030 npaths_altloc_max = npaths1_altloc;
2031 npaths_max = npaths1_primary + npaths1_altloc;
2032 } else {
2033 npaths_primary_max = npaths2_primary;
2034 npaths_altloc_max = npaths2_altloc;
2035 npaths_max = npaths2_primary + npaths2_altloc;
2036 }
2037 for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc &&
2038 pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
2039 #if 0
2040 /* hardclip5_low = hardclip5_high = 0; */
2041 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2042 /*stage3*/stage3array1[pathnum-1],
2043 querylength5,/*first_read_p*/true);
2044
2045 /* hardclip3_low = hardclip3_high = 0; */
2046 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2047 /*stage3*/stage3array2[pathnum-1],
2048 querylength3,/*first_read_p*/false);
2049 #endif
2050
2051 stage3 = stage3array1[pathnum-1];
2052 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2053 acc1,acc2,pathnum,npaths_primary_max,npaths_altloc_max,
2054 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2055 Stage3end_mapq_score(stage3),chromosome_iit,
2056 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2057 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2058 /*mate*/stage3array2[pathnum-1],/*mate_querylength*/querylength3,
2059 /*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2060 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2061 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2062
2063 stage3 = stage3array2[pathnum-1];
2064 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2065 acc1,acc2,pathnum,npaths_primary_max,npaths_altloc_max,
2066 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2067 Stage3end_mapq_score(stage3),chromosome_iit,
2068 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2069 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2070 /*mate*/stage3array1[pathnum-1],/*mate_querylength*/querylength5,
2071 /*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2072 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2073 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2074 }
2075
2076 /* Print remaining results with non-mappers */
2077 if (npaths1_primary + npaths1_altloc > npaths2_primary + npaths2_altloc) {
2078 for ( ; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
2079 stage3 = stage3array1[pathnum-1];
2080 #if 0
2081 /* hardclip5_low = hardclip5_high = 0; */
2082 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2083 stage3,querylength5,/*first_read_p*/true);
2084 chrnum3 = 0;
2085 chrpos_low_3 = 0;
2086 #endif
2087
2088 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2089 acc1,acc2,pathnum,npaths_primary_max,npaths_altloc_max,
2090 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2091 Stage3end_mapq_score(stage3),chromosome_iit,
2092 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2093 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2094 /*mate*/NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2095 resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
2096 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2097
2098 /* matching nomapper for second end */
2099 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2100 /*first_read_p*/false,pathnum,npaths_primary_max,npaths_altloc_max,
2101 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2102 /*mate*/stage3,/*mate_querylength*/querylength5,
2103 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2104 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2105 }
2106
2107 } else if (npaths2_primary + npaths2_altloc > npaths1_primary + npaths1_altloc) {
2108 for ( ; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
2109 stage3 = stage3array2[pathnum-1];
2110 #if 0
2111 /* hardclip3_low = hardclip3_high = 0; */
2112 chrnum5 = 0;
2113 chrpos_low_5 = 0;
2114 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2115 stage3,querylength3,/*first_read_p*/false);
2116 #endif
2117
2118 /* matching nomapper for first end */
2119 SAM_print_nomapping(fp,abbrev,queryseq1,acc1,acc2,chromosome_iit,resulttype,
2120 /*first_read_p*/true,pathnum,npaths_primary_max,npaths_altloc_max,
2121 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2122 /*mate*/stage3,/*mate_querylength*/querylength3,
2123 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2124 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2125
2126 SAM_print(fp,fp_failedinput_2,abbrev,/*stagepair*/NULL,stage3,querylength3,
2127 acc1,acc2,pathnum,npaths_primary_max,npaths_altloc_max,
2128 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2129 Stage3end_mapq_score(stage3),chromosome_iit,
2130 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2131 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2132 /*mate*/NULL,/*mate_querylength*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2133 resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
2134 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2135 }
2136 }
2137
2138 } else {
2139 /* print first end results */
2140 if (npaths2_primary + npaths2_altloc == 0) {
2141 mate = (Stage3end_T) NULL;
2142 /* chrnum3 = 0; */
2143 /* chrpos_low_3 = 0U; */
2144 } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
2145 mate = (Stage3end_T) NULL;
2146 /* chrnum3 = 0; */
2147 /* chrpos_low_3 = 0U; */
2148 } else {
2149 mate = stage3array2[0];
2150 hardclip3_low = hardclip3_high = 0;
2151 #if 0
2152 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2153 mate,querylength3,/*first_read_p*/false);
2154 #endif
2155 }
2156
2157 if (npaths1_primary + npaths1_altloc == 1) {
2158 stage3 = stage3array1[0];
2159 hardclip5_low = hardclip5_high = 0;
2160 #if 0
2161 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2162 stage3,querylength5,/*first_read_p*/true);
2163 #endif
2164
2165 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2166 acc1,acc2,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2167 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2168 Stage3end_mapq_score(stage3),chromosome_iit,
2169 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2170 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2171 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2172 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2173 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2174
2175 } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
2176 /* Just printing one end as nomapping */
2177 SAM_print_nomapping(fp,abbrev,queryseq1,acc1,acc2,chromosome_iit,resulttype,
2178 /*first_read_p*/true,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2179 /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2180 mate,/*mate_querylength*/querylength3,
2181 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2182 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2183
2184 } else {
2185 for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
2186 stage3 = stage3array1[pathnum-1];
2187 hardclip5_low = hardclip5_high = 0;
2188 #if 0
2189 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
2190 stage3,querylength5,/*first_read_p*/true);
2191 #endif
2192
2193 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2194 acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
2195 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2196 Stage3end_mapq_score(stage3),chromosome_iit,
2197 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2198 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2199 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2200 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2201 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2202 }
2203 }
2204
2205 /* print second end results */
2206 if (npaths1_primary + npaths1_altloc == 0) {
2207 mate = (Stage3end_T) NULL;
2208 /* chrnum5 = 0; */
2209 /* chrpos_low_5 = 0U; */
2210 } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
2211 mate = (Stage3end_T) NULL;
2212 /* chrnum5 = 0;*/
2213 /* chrpos_low_5 = 0U; */
2214 } else {
2215 mate = stage3array1[0];
2216 hardclip5_low = hardclip5_high = 0;
2217 #if 0
2218 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2219 mate,querylength5,/*first_read_p*/true);
2220 #endif
2221 }
2222
2223 if (npaths2_primary + npaths2_altloc == 1) {
2224 stage3 = stage3array2[0];
2225 hardclip3_low = hardclip3_high = 0;
2226 #if 0
2227 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2228 stage3,querylength3,/*first_read_p*/false);
2229 #endif
2230
2231 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2232 acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2233 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2234 Stage3end_mapq_score(stage3),chromosome_iit,
2235 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2236 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2237 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2238 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2239 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2240
2241 } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
2242 /* Just printing one end as nomapping */
2243 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2244 /*first_read_p*/false,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2245 /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2246 mate,/*mate_querylength*/querylength5,
2247 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2248 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2249
2250 } else {
2251 for (pathnum = 1; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
2252 stage3 = stage3array2[pathnum-1];
2253 hardclip3_low = hardclip3_high = 0;
2254 #if 0
2255 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2256 stage3,querylength3,/*first_read_p*/false);
2257 #endif
2258
2259 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2260 acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
2261 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2262 Stage3end_mapq_score(stage3),chromosome_iit,
2263 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2264 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2265 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2266 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2267 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2268 }
2269 }
2270 }
2271
2272 } else {
2273 stage3array1 = (Stage3end_T *) Result_array(&npaths1_primary,&npaths1_altloc,&first_absmq1,&second_absmq1,result);
2274 stage3array2 = (Stage3end_T *) Result_array2(&npaths2_primary,&npaths2_altloc,&first_absmq2,&second_absmq2,result);
2275
2276 if (resulttype == HALFMAPPING_UNIQ) {
2277 if (npaths1_primary + npaths1_altloc == 1 && Stage3end_circularpos(stage3array1[0]) > 0) {
2278 Filestring_set_split_output(fp,OUTPUT_HC);
2279 abbrev = ABBREV_HALFMAPPING_CIRCULAR;
2280 } else if (npaths2_primary + npaths2_altloc == 1 && Stage3end_circularpos(stage3array2[0]) > 0) {
2281 Filestring_set_split_output(fp,OUTPUT_HC);
2282 abbrev = ABBREV_HALFMAPPING_CIRCULAR;
2283 } else {
2284 Filestring_set_split_output(fp,OUTPUT_HU);
2285 abbrev = ABBREV_HALFMAPPING_UNIQ;
2286 }
2287 } else if (resulttype == HALFMAPPING_TRANSLOC) {
2288 Filestring_set_split_output(fp,OUTPUT_HT);
2289 abbrev = ABBREV_HALFMAPPING_TRANSLOC;
2290 } else if (resulttype == HALFMAPPING_MULT) {
2291 if (quiet_if_excessive_p == true && npaths1_primary + npaths1_altloc > maxpaths_report && npaths2_primary + npaths2_altloc > maxpaths_report) {
2292 Filestring_set_split_output(fp,OUTPUT_HX);
2293 abbrev = ABBREV_HALFMAPPING_MULT_XS;
2294 } else {
2295 Filestring_set_split_output(fp,OUTPUT_HM);
2296 abbrev = ABBREV_HALFMAPPING_MULT;
2297 }
2298 } else {
2299 abort();
2300 }
2301
2302 #if 0
2303 /* Do eval and sorting first */
2304 if (npaths1 == 0) {
2305 /* Nothing to sort */
2306 } else if (npaths1 == 1) {
2307 /* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
2308 } else if (quiet_if_excessive_p && npaths1 > maxpaths_report) {
2309 /* Don't sort */
2310 } else {
2311 /* Stage3end_eval_and_sort(stage3array1,npaths1,maxpaths_report,queryseq1); */
2312 }
2313 #endif
2314
2315 #if 0
2316 if (npaths2 == 0) {
2317 /* Nothing to sort */
2318 } else if (npaths2 == 1) {
2319 /* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
2320 } else if (quiet_if_excessive_p && npaths2 > maxpaths_report) {
2321 /* Don't sort */
2322 } else {
2323 /* Stage3end_eval_and_sort(stage3array2,npaths2,maxpaths_report,queryseq2); */
2324 }
2325 #endif
2326
2327
2328 /* print first end results */
2329 if (npaths2_primary + npaths2_altloc == 0) {
2330 mate = (Stage3end_T) NULL;
2331 /* chrnum3 = 0; */
2332 /* chrpos_low_3 = 0U; */
2333 } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
2334 mate = (Stage3end_T) NULL;
2335 /* chrnum3 = 0; */
2336 /* chrpos_low_3 = 0U; */
2337 } else {
2338 mate = stage3array2[0];
2339 hardclip3_low = hardclip3_high = 0;
2340 #if 0
2341 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2342 mate,querylength3,/*first_read_p*/false);
2343 #endif
2344 }
2345
2346 if (npaths1_primary + npaths1_altloc == 0) {
2347 /* just printing one end as nomapping */
2348 /* mate should be non-NULL here */
2349 if (add_paired_nomappers_p == true) {
2350 /* Handle nomappers with each mapped mate */
2351 } else {
2352 SAM_print_nomapping(fp,abbrev,queryseq1,acc1,acc2,chromosome_iit,resulttype,
2353 /*first_read_p*/true,/*pathnum*/0,npaths1_primary,npaths1_altloc,
2354 /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2355 mate,/*mate_querylength*/querylength3,
2356 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2357 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2358 }
2359
2360 } else if (npaths1_primary + npaths1_altloc == 1) {
2361 /* mate should be NULL here */
2362
2363 stage3 = stage3array1[0];
2364 hardclip5_low = hardclip5_high = 0;
2365 #if 0
2366 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2367 stage3,querylength5,/*first_read_p*/true);
2368 #endif
2369
2370 if (add_paired_nomappers_p == true) {
2371 /* matching nomapper for second end */
2372 npaths_max = npaths1_primary + npaths1_altloc; /* since npaths2_primary + npaths2_altloc == 0 */
2373 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2374 acc1,acc2,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2375 Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
2376 Stage3end_mapq_score(stage3),chromosome_iit,
2377 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2378 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2379 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2380 resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
2381 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2382 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,
2383 resulttype,/*first_read_p*/false,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2384 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2385 /*mate*/stage3,/*mate_querylength*/querylength5,
2386 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2387 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2388 } else {
2389 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2390 acc1,acc2,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2391 Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
2392 Stage3end_mapq_score(stage3),chromosome_iit,
2393 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2394 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2395 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2396 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2397 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2398 }
2399
2400 } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
2401 /* Just printing one end as nomapping */
2402 /* mate should be NULL here */
2403 if (add_paired_nomappers_p == true) {
2404 /* Handle nomappers with each mapped mate */
2405 } else {
2406 SAM_print_nomapping(fp,abbrev,queryseq1,acc1,acc2,chromosome_iit,resulttype,
2407 /*first_read_p*/true,/*pathnum*/1,npaths1_primary,npaths1_altloc,
2408 /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2409 mate,/*mate_querylength*/querylength3,
2410 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2411 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2412 }
2413
2414 } else {
2415 /* mate should be NULL here */
2416 for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
2417 stage3 = stage3array1[pathnum-1];
2418 hardclip5_low = hardclip5_high = 0;
2419 #if 0
2420 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2421 stage3,querylength5,/*first_read_p*/true);
2422 #endif
2423
2424 if (add_paired_nomappers_p == true) {
2425 /* matching nomapper for second end */
2426 npaths_max = npaths1_primary + npaths1_altloc; /* since npaths2 == 0 */
2427 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2428 /*first_read_p*/false,pathnum,npaths1_primary,npaths1_altloc,
2429 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2430 /*mate*/stage3,/*mate_querylength*/querylength5,
2431 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2432 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2433 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2434 acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
2435 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2436 Stage3end_mapq_score(stage3),chromosome_iit,
2437 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2438 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2439 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2440 resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
2441 invert_first_p,invert_second_p);
2442
2443 } else {
2444 SAM_print(fp,fp_failedinput_1,abbrev,/*stage3pair*/NULL,stage3,querylength5,
2445 acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
2446 Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
2447 Stage3end_mapq_score(stage3),chromosome_iit,
2448 /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
2449 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2450 mate,/*mate_querylength*/querylength3,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2451 resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
2452 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2453 }
2454 }
2455 }
2456
2457 /* print second end results */
2458 if (npaths1_primary + npaths1_altloc == 0) {
2459 mate = (Stage3end_T) NULL;
2460 /* chrnum5 = 0; */
2461 /* chrpos_low_5 = 0U; */
2462 } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
2463 mate = (Stage3end_T) NULL;
2464 /* chrnum5 = 0; */
2465 /* chrpos_low_5 = 0U; */
2466 } else {
2467 mate = stage3array1[0];
2468 hardclip5_low = hardclip5_high = 0;
2469 #if 0
2470 chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
2471 mate,querylength5,/*first_read_p*/true);
2472 #endif
2473 }
2474
2475 if (npaths2_primary + npaths2_altloc == 0) {
2476 /* Just printing one end as nomapping */
2477 /* mate should be non-NULL here */
2478 if (add_paired_nomappers_p == true) {
2479 /* Handle nomappers with each mapped mate */
2480 } else {
2481 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2482 /*first_read_p*/false,/*pathnum*/0,npaths2_primary,npaths2_altloc,
2483 /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2484 mate,/*mate_querylength*/querylength5,
2485 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2486 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2487 }
2488
2489 } else if (npaths2_primary + npaths2_altloc == 1) {
2490 /* mate should be NULL here */
2491
2492 stage3 = stage3array2[0];
2493 hardclip3_low = hardclip3_high = 0;
2494 #if 0
2495 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2496 stage3,querylength3,/*first_read_p*/false);
2497 #endif
2498
2499 if (add_paired_nomappers_p == true) {
2500 /* matching nomapper for first end */
2501 npaths_max = npaths2_primary + npaths2_altloc; /* since npaths1_primary + npaths1_altloc == 0 */
2502 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2503 /*first_read_p*/true,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2504 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2505 /*mate*/stage3,/*mate_querylength*/querylength3,
2506 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2507 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2508 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2509 acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2510 Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
2511 Stage3end_mapq_score(stage3),chromosome_iit,
2512 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2513 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2514 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2515 resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
2516 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2517
2518 } else {
2519 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2520 acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2521 Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
2522 Stage3end_mapq_score(stage3),chromosome_iit,
2523 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2524 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2525 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2526 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2527 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2528 }
2529
2530 } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
2531 /* Just printing one end as nomapping */
2532 /* mate should be NULL here */
2533 if (add_paired_nomappers_p == true) {
2534 /* Handle nomappers with each mapped mate */
2535 } else {
2536 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2537 /*first_read_p*/false,/*pathnum*/1,npaths2_primary,npaths2_altloc,
2538 /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2539 mate,/*mate_querylength*/querylength5,
2540 /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
2541 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2542 }
2543
2544 } else {
2545 /* mate should be NULL here */
2546 for (pathnum = 1; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
2547 stage3 = stage3array2[pathnum-1];
2548 hardclip3_low = hardclip3_high = 0;
2549 #if 0
2550 chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
2551 stage3,querylength3,/*first_read_p*/false);
2552 #endif
2553
2554 if (add_paired_nomappers_p == true) {
2555 /* matching nomapper for first end */
2556 npaths_max = npaths2_primary + npaths2_altloc; /* since npaths1_primary + npaths1_altloc == 0 */
2557 SAM_print_nomapping(fp,abbrev,queryseq2,acc1,acc2,chromosome_iit,resulttype,
2558 /*first_read_p*/true,pathnum,npaths2_primary,npaths2_altloc,
2559 /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
2560 /*mate*/stage3,/*mate_querylength*/querylength3,
2561 /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
2562 quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
2563 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2564 acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
2565 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2566 Stage3end_mapq_score(stage3),chromosome_iit,
2567 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2568 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2569 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2570 resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
2571 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2572
2573 } else {
2574 SAM_print(fp,fp_failedinput_2,abbrev,/*stage3pair*/NULL,stage3,querylength3,
2575 acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
2576 Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
2577 Stage3end_mapq_score(stage3),chromosome_iit,
2578 /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
2579 /*pairedlength*/0U,/*pair_relationship*/0,/*hardclip_low*/0,/*hardclip_high*/0,
2580 mate,/*mate_querylength*/querylength5,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
2581 resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
2582 quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
2583 }
2584 }
2585 }
2586
2587 }
2588 }
2589
2590 return;
2591 }
2592
2593