1 static char rcsid[] = "$Id: concordance.c 222730 2020-05-29 13:10:09Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "concordance.h"
7 
8 #include <stdlib.h>		/* For qsort */
9 #include <stdio.h>
10 #include <string.h>
11 #include <strings.h>
12 
13 #include "assert.h"
14 #include "mem.h"
15 #include "univdiag.h"
16 #include "univdiagdef.h"
17 #include "transcript.h"
18 #include "stage3hrdef.h"
19 
20 
21 static int subopt_levels;
22 
23 static Chrpos_T pairmax_transcriptome;
24 static Chrpos_T pairmax_linear;	/* For two ends that both lack a splice */
25 static Chrpos_T pairmax_circular;
26 
27 static Chrpos_T expected_pairlength;
28 static Chrpos_T pairlength_deviation;
29 static Chrpos_T adjacent_pairlength; /* For two ends, one of which has a splice */
30 
31 static bool *circularp;
32 static bool merge_samechr_p;
33 
34 
35 /* #define MAX_HITS 1000 */
36 #define MAX_OVERLAP 5
37 
38 #define T Stage3end_T
39 
40 #ifdef DEBUG
41 #define debug(x) x
42 #else
43 #define debug(x)
44 #endif
45 
46 /* Details */
47 #ifdef DEBUG1
48 #define debug1(x) x
49 #else
50 #define debug1(x)
51 #endif
52 
53 
54 
55 #if 0
56 /* Previously called by stage1hr.c, but now we use Transcript_concordance */
57 /* Since we haven't called merge procedure yet, we can just look at the Intlist_head */
58 List_T
59 Stage3hr_filter_concordant_tr (List_T *disjoint, List_T hits, List_T mates,
60 			       Hitlistpool_T hitlistpool) {
61   List_T common = NULL, p, q;
62   T hit, mate;
63   int trnum;
64   Chrpos_T start1, end2;
65   bool concordantp;
66 
67   *disjoint = (List_T) NULL;
68   for (p = hits; p != NULL; p = List_next(p)) {
69     hit = (T) List_head(p);
70     trnum = Intlist_head(hit->trnums);
71     concordantp = false;
72 
73     for (q = mates; q != NULL; q = List_next(q)) {
74       mate = (T) List_head(q);
75       if (Intlist_head(mate->trnums) == trnum) {
76 	start1 = (Chrpos_T) Intlist_head(hit->trstarts);
77 	end2 = (Chrpos_T) Intlist_head(mate->trends);
78 	if (start1 < end2) {
79 	  if (end2 <= start1 + pairmax_transcriptome) {
80 	    concordantp = true;
81 	  }
82 	} else {
83 	  if (start1 <= end2 + pairmax_transcriptome) {
84 	    concordantp = true;
85 	  }
86 	}
87       }
88     }
89 
90     if (concordantp == true) {
91       debug7(printf("trnum %d is concordant\n",trnum));
92       common = Hitlist_push(common,hitlistpool,(void *) hit);
93     } else {
94       debug7(printf("trnum %d is discordant\n",trnum));
95       *disjoint = Hitlist_push(*disjoint,hitlistpool,(void *) hit);
96     }
97   }
98 
99   debug7(printf("Returning %d common hits\n",List_length(common)));
100   return common;
101 }
102 #endif
103 
104 
105 #ifdef DEBUG
106 static char *
print_sense(int sense)107 print_sense (int sense) {
108   if (sense == SENSE_NULL) {
109     return "sense:null";
110   } else if (sense == SENSE_ANTI) {
111     return "sense:anti";
112   } else if (sense == SENSE_FORWARD) {
113     return "sense:fwd";
114   } else {
115     abort();
116   }
117 }
118 #endif
119 
120 
121 #if 0
122 static int
123 do_transcriptome_plus (int *concordant_score_overall, List_T *hitpairs,
124 		       T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
125 		       int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
126 		       Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
127 		       Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
128 #if 0
129 		       char *queryuc_ptr_5, char *queryuc_ptr_3,
130 		       Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
131 		       Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
132 #endif
133 		       Listpool_T listpool, Hitlistpool_T hitlistpool) {
134 
135   int nconcordant = 0;
136   int i, j;
137   Stage3pair_T stage3pair;
138   T hit5, hit3;
139   int min_insertlength;
140   int score;
141   Chrnum_T chrnum;
142 
143   if (nhits5 > 0 && nhits3 > 0) {
144     i = j = 0;
145     while (i < nhits5) {
146       hit5 = hits5[i];
147       chrnum = hit5->effective_chrnum;
148 
149 #ifdef DEBUG
150       printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
151 	     i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
152 	     print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
153       if (j >= 0 && j < nhits3) {
154 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
155 	       hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
156       }
157       printf("\n");
158 #endif
159 
160       while (j >= 0 && hits3[j]->effective_chrnum == chrnum) {
161 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
162 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
163 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
164 	j--;
165       }
166       j++;		/* Finish backup */
167 
168       /* No need for advance */
169 
170       while (j < nhits3 && hits3[j]->effective_chrnum == chrnum) {
171 	debug(printf("  samechr: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
172 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
173 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
174 	hit3 = hits3[j];
175 
176 	if (Transcript_intersect_p(&min_insertlength,hit5->transcripts,hit3->transcripts) == false) {
177 	  debug(printf(" => transcripts do not intersect:\n"));
178 	  debug(Transcript_print_nums(hit5->transcripts));
179 	  debug(Transcript_print_nums(hit5->transcripts));
180 	  debug(printf("\n"));
181 
182 	} else {
183 	  debug(printf(" => concordant transcriptome with min_insertlength %d\n",min_insertlength));
184 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
185 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
186 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
187 #if 0
188 					   queryuc_ptr_5,queryuc_ptr_3,
189 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
190 #endif
191 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/true)) != NULL) {
192 	    stage3pair->insertlength = min_insertlength;
193 
194 	    /* No need to update found_score */
195 	    debug(printf(" Have new pair with scores %d + %d\n",stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
196 
197 	    if ((score = (stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall)) < *concordant_score_overall) {
198 	      *concordant_score_overall = score;
199 	      assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
200 	      assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
201 	      debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
202 			   *concordant_score_overall,stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
203 	    }
204 	    *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
205 	    nconcordant++;
206 	  }
207 	}
208 	debug(printf("\n"));
209 
210 	j++;
211       }
212       j--;		/* Finish advance */
213 
214       i++;
215     }
216   }
217 
218   return nconcordant;
219 }
220 #endif
221 
222 
223 #if 0
224 static int
225 do_transcriptome_minus (int *concordant_score_overall, List_T *hitpairs,
226 			T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
227 			int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
228 			Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
229 			Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
230 #if 0
231 			char *queryuc_ptr_5, char *queryuc_ptr_3,
232 			Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
233 			Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
234 #endif
235 			Listpool_T listpool, Hitlistpool_T hitlistpool) {
236 
237   int nconcordant = 0;
238   int i, j;
239   Stage3pair_T stage3pair;
240   T hit5, hit3;
241   int min_insertlength;
242   int score;
243   Chrnum_T chrnum;
244 
245   if (nhits3 > 0 && nhits5 > 0) {
246     i = j = 0;
247     while (i < nhits3) {
248       hit3 = hits3[i];
249       chrnum = hit3->effective_chrnum;
250 
251 #ifdef DEBUG
252       printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
253 	     i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
254 	     print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
255       if (j >= 0 && j < nhits5) {
256 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
257 	       hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
258       }
259       printf("\n");
260 #endif
261 
262       while (j >= 0 && hits5[j]->effective_chrnum == chrnum) {
263 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
264 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
265 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
266 	j--;
267       }
268       j++;			/* Finish backup */
269 
270       /* No need for advance */
271 
272       while (j < nhits5 && hits5[j]->effective_chrnum == chrnum) {
273 	debug(printf("  samechr: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
274 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
275 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
276 	hit5 = hits5[j];
277 
278 	if (Transcript_intersect_p(&min_insertlength,hit5->transcripts,hit3->transcripts) == false) {
279 	  debug(printf(" => transcripts do not intersect:\n"));
280 	  debug(Transcript_print_nums(hit5->transcripts));
281 	  debug(Transcript_print_nums(hit5->transcripts));
282 	  debug(printf("\n"));
283 
284 	} else {
285 	  debug(printf(" => concordant transcriptome with min_insertlength %d\n",min_insertlength));
286 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
287 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
288 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
289 #if 0
290 					   queryuc_ptr_5,queryuc_ptr_3,
291 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
292 #endif
293 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/true)) != NULL) {
294 	    stage3pair->insertlength = min_insertlength;
295 
296 	    if ((score = (stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall)) < *concordant_score_overall) {
297 	      *concordant_score_overall = score;
298 	      assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
299 	      assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
300 	      debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
301 			   *concordant_score_overall,stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
302 	    }
303 	    *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
304 	    nconcordant++;
305 	  }
306 	}
307 	debug(printf("\n"));
308 
309 	j++;
310       }
311       j--;		/* Finish advance */
312 
313       i++;
314     }
315   }
316 
317   return nconcordant;
318 }
319 #endif
320 
321 
322 #if 0
323 List_T
324 Concordance_pair_up_transcriptome (bool *abort_pairing_p, int *concordant_score_overall, List_T hitpairs,
325 
326 				   Ladder_T ladder5_plus, Ladder_T ladder5_minus,
327 				   Ladder_T ladder3_plus, Ladder_T ladder3_minus,
328 				   int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
329 				   Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
330 				   Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
331 #if 0
332 				   char *queryuc_ptr_5, char *queryuc_ptr_3,
333 				   Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
334 				   Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
335 #endif
336 				   Listpool_T listpool, Hitlistpool_T hitlistpool,
337 				   int maxpairedpaths, int genestrand) {
338   int nconcordant = 0, n;  /* was List_length(hitpairs), but this hurts minus if there are too many plus hits */
339 
340   T *hits5, *hits3;
341   int nhits5, nhits3;
342   int max_frontier_score, frontier_score, cutoff_level, level, score5, score3;
343 
344 
345   debug(printf("Starting Concordance_pair_up_transcriptome\n"));
346 
347   /* Initial value, which resets to frontier_score + subopt_levels upon our first hitpair */
348   cutoff_level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(ladder3_plus);
349   if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
350     cutoff_level = level;
351   }
352   max_frontier_score = cutoff_level;
353 
354   frontier_score = 0;
355   while (*abort_pairing_p == false && frontier_score <= cutoff_level) {
356     debug(printf("frontier_score = %d\n",frontier_score));
357     for (score5 = 0; score5 <= frontier_score; score5++) {
358       score3 = frontier_score - score5;
359       debug(printf("score5 = %d, score3 = %d\n",score5,score3));
360 
361       /* plus/plus: hits5_plus against hits3_plus (really on minus) */
362       if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
363 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
364 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
365 	debug(printf("at score %d+%d, nhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
366 
367 	if ((n = do_transcriptome_plus(&(*concordant_score_overall),&hitpairs,
368 				       hits5,nhits5,hits3,nhits3,genestrand,sensedir,
369 				       mismatch_positions_alloc_5,mismatch_positions_alloc_3,
370 				       query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
371 #if 0
372 				       queryuc_ptr_5,queryuc_ptr_3,
373 				       pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
374 #endif
375 				       listpool,hitlistpool)) > 0) {
376 	  if (nconcordant == 0) {
377 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
378 	      cutoff_level = max_frontier_score;
379 	    }
380 	  }
381 	  if ((nconcordant += n) > maxpairedpaths) {
382 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
383 	    *abort_pairing_p = true;
384 	  }
385 	}
386       }
387 
388       /* minus/minus: hits3_minus (really on plus) against hits5_minus */
389       if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
390 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
391 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
392 	debug(printf("at score %d+%d, nhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
393 	if ((n = do_transcriptome_minus(&(*concordant_score_overall),&hitpairs,
394 					hits5,nhits5,hits3,nhits3,genestrand,sensedir,
395 					mismatch_positions_alloc_5,mismatch_positions_alloc_3,
396 					query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
397 #if 0
398 					queryuc_ptr_5,queryuc_ptr_3,
399 					pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
400 #endif
401 					listpool,hitlistpool)) > 0) {
402 	  if (nconcordant == 0) {
403 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
404 	      cutoff_level = max_frontier_score;
405 	    }
406 	  }
407 	  if ((nconcordant += n) > maxpairedpaths) {
408 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
409 	    *abort_pairing_p = true;
410 	  }
411 	}
412       }
413     }
414 
415     frontier_score++;
416   }
417 
418   debug(printf("Finished with Concordance_pair_up_transcriptome: %d concordant\n",List_length(hitpairs)));
419 
420   return hitpairs;
421 }
422 #endif
423 
424 
425 static int
do_genome_plus(int * adjacent_score,int * concordant_score_overall,int * concordant_score_within_trims,List_T * local_hitpairs,List_T * distant_hitpairs,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)426 do_genome_plus (int *adjacent_score, int *concordant_score_overall, int *concordant_score_within_trims,
427 		List_T *local_hitpairs, List_T *distant_hitpairs, T *hits5, int nhits5, T *hits3, int nhits3,
428 		int genestrand, int sensedir, int querylength5, int querylength3,
429 		int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
430 		Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
431 		Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
432 #if 0
433 		char *queryuc_ptr_5, char *queryuc_ptr_3,
434 		Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
435 		Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
436 #endif
437 		Listpool_T listpool, Hitlistpool_T hitlistpool) {
438 
439   int nconcordant = 0, noverlap;
440   int i, j;
441   Stage3pair_T stage3pair;
442   T hit5, hit3;
443   Univcoord_T insert_start;
444   int score;
445   int pairmax;
446 
447   *concordant_score_within_trims = querylength5 + querylength3;
448 
449   if (nhits5 > 0 && nhits3 > 0) {
450     i = j = 0;
451     while (i < nhits5) {
452       hit5 = hits5[i];
453       if (circularp[hit5->effective_chrnum] == true) {
454 	pairmax = pairmax_circular;
455       } else {
456 	pairmax = pairmax_linear;
457       }
458 
459       insert_start = hit5->genomicend - querylength5;
460 #ifdef DEBUG
461       printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
462 	     i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
463 	     print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
464       if (j >= 0 && j < nhits3) {
465 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
466 	       hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
467       }
468       printf("\n");
469 #endif
470 
471       while (j >= 0 &&
472 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
473 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
474 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
475 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
476 	j--;
477       }
478       j++;		/* Finish backup */
479 
480       while (j < nhits3 &&
481 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
482 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
483 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
484 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
485 	j++;
486       }
487 
488       noverlap = 0;
489       while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
490 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
491 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
492 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
493 	hit3 = hits3[j];
494 
495 	if (hit5->effective_chrnum != hit3->effective_chrnum) {
496 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
497 
498 	} else if (SENSE_INCONSISTENT_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
499 	  /* Use sensedir_for_concordance here and not sensedir */
500 	  debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
501 
502 	} else if (hit3->genomicend < hit5->genomicstart) {
503 	  debug(printf(" => scramble because end3 %llu < start5 %llu\n",
504 		       (unsigned long long) hit3->genomicend,(unsigned long long) hit5->genomicstart));
505 
506 	} else {
507 	  debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
508 		       hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
509 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
510 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
511 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
512 #if 0
513 					   queryuc_ptr_5,queryuc_ptr_3,
514 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
515 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
516 #endif
517 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
518 	    if (Stage3pair_distant_splice_p(stage3pair) == true) {
519 	      *distant_hitpairs = Hitlist_push(*distant_hitpairs,hitlistpool,(void *) stage3pair);
520 	    } else {
521 	      if ((score = stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall) < *concordant_score_overall) {
522 		debug(printf(" => Updating concordant_score_overall to be %d\n",score));
523 		*concordant_score_overall = score;
524 	      }
525 	      if ((score = stage3pair->hit5->refalt_score_within_trims + stage3pair->hit3->refalt_score_within_trims) < *concordant_score_within_trims) {
526 		*concordant_score_within_trims = score;
527 		assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
528 		assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
529 		debug(printf(" => Updating concordant_score_within_trims to be %d = %d + %d\n",
530 			     *concordant_score_within_trims,stage3pair->hit5->refalt_score_within_trims,
531 			     stage3pair->hit3->refalt_score_within_trims));
532 	      }
533 	      if (stage3pair->insertlength <= adjacent_pairlength && score < *adjacent_score) {
534 		*adjacent_score = score;
535 	      }
536 	      *local_hitpairs = Hitlist_push(*local_hitpairs,hitlistpool,(void *) stage3pair);
537 	      nconcordant++;
538 	    }
539 	  }
540 	}
541 	debug(printf("\n"));
542 
543 	j++;
544       }
545       j--;		/* Finish advance */
546 
547       i++;
548     }
549   }
550 
551   return nconcordant;
552 }
553 
554 
555 
556 static int
do_genome_minus(int * adjacent_score,int * concordant_score_overall,int * concordant_score_within_trims,List_T * local_hitpairs,List_T * distant_hitpairs,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)557 do_genome_minus (int *adjacent_score, int *concordant_score_overall, int *concordant_score_within_trims,
558 		 List_T *local_hitpairs, List_T *distant_hitpairs, T *hits5, int nhits5, T *hits3, int nhits3,
559 		 int genestrand, int sensedir, int querylength5, int querylength3,
560 		 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
561 		 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
562 		 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
563 #if 0
564 		 char *queryuc_ptr_5, char *queryuc_ptr_3,
565 		 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
566 		 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
567 #endif
568 		 Listpool_T listpool, Hitlistpool_T hitlistpool) {
569 
570   *concordant_score_within_trims = querylength5 + querylength3;
571 
572   int nconcordant = 0, noverlap;
573   int i, j;
574   Stage3pair_T stage3pair;
575   T hit5, hit3;
576   Univcoord_T insert_start;
577   int score;
578   int pairmax;
579 
580 
581   if (nhits3 > 0 && nhits5 > 0) {
582     i = j = 0;
583     while (i < nhits3) {
584       hit3 = hits3[i];
585       if (circularp[hit3->effective_chrnum] == true) {
586 	pairmax = pairmax_circular;
587       } else {
588 	pairmax = pairmax_linear;
589       }
590       insert_start = hit3->genomicstart - querylength3;
591 #ifdef DEBUG
592       printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
593 	     i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
594 	     print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
595       if (j >= 0 && j < nhits5) {
596 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
597 	       hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
598       }
599       printf("\n");
600 #endif
601 
602       while (j >= 0 &&
603 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
604 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
605 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
606 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
607 	j--;
608       }
609       j++;			/* Finish backup */
610 
611       while (j < nhits5 &&
612 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
613 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
614 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
615 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
616 	j++;
617       }
618 
619       noverlap = 0;
620       while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
621 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
622 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
623 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
624 	hit5 = hits5[j];
625 
626 	if (hit3->effective_chrnum != hit5->effective_chrnum) {
627 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
628 
629 	} else if (SENSE_INCONSISTENT_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
630 	  /* Use sensedir_for_concordance here and not sensedir */
631 	  debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
632 
633 	} else if (hit5->genomicstart < hit3->genomicend) {
634 	  debug(printf(" => scramble because start5 %llu < end3 %llu\n",
635 		       (unsigned long long) hit5->genomicstart,(unsigned long long) hit3->genomicend));
636 
637 	} else {
638 	  debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
639 		       hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
640 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
641 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
642 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
643 #if 0
644 					   queryuc_ptr_5,queryuc_ptr_3,
645 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
646 #endif
647 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
648 	    if (Stage3pair_distant_splice_p(stage3pair) == true) {
649 	      *distant_hitpairs = Hitlist_push(*distant_hitpairs,hitlistpool,(void *) stage3pair);
650 	    } else {
651 	      if ((score = stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall) < *concordant_score_overall) {
652 		debug(printf(" => Updating concordant_score_overall to be %d\n",score));
653 		*concordant_score_overall = score;
654 	      }
655 	      if ((score = stage3pair->hit5->refalt_score_within_trims + stage3pair->hit3->refalt_score_within_trims) < *concordant_score_within_trims) {
656 		*concordant_score_within_trims = score;
657 		assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
658 		assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
659 		debug(printf(" => Updating concordant_score_within_trims to be %d = %d + %d\n",
660 			     *concordant_score_within_trims,stage3pair->hit5->refalt_score_within_trims,
661 			     stage3pair->hit3->refalt_score_within_trims));
662 	      }
663 	      if (stage3pair->insertlength <= adjacent_pairlength && score < *adjacent_score) {
664 		*adjacent_score = score;
665 	      }
666 	      *local_hitpairs = Hitlist_push(*local_hitpairs,hitlistpool,(void *) stage3pair);
667 	      nconcordant++;
668 	    }
669 	  }
670 	}
671 	debug(printf("\n"));
672 
673 	j++;
674       }
675       j--;		/* Finish advance */
676 
677       i++;
678     }
679   }
680 
681   return nconcordant;
682 }
683 
684 
685 
686 /* Finds concordant pairs if nconcordant is 0 */
687 List_T
Concordance_pair_up_genome(bool * abort_pairing_p,int * adjacent_score,int * concordant_score_overall,List_T * distant_hitpairs,List_T hitpairs,List_T hitlist5_gplus,List_T hitlist5_gminus,List_T hitlist3_gplus,List_T hitlist3_gminus,Ladder_T ladder5_plus,Ladder_T ladder5_minus,Ladder_T ladder3_plus,Ladder_T ladder3_minus,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool,int maxpairedpaths,int genestrand,int sensedir)688 Concordance_pair_up_genome (bool *abort_pairing_p, int *adjacent_score, int *concordant_score_overall,
689 			    List_T *distant_hitpairs, List_T hitpairs,
690 
691 			    List_T hitlist5_gplus, List_T hitlist5_gminus,
692 			    List_T hitlist3_gplus, List_T hitlist3_gminus,
693 
694 			    Ladder_T ladder5_plus, Ladder_T ladder5_minus,
695 			    Ladder_T ladder3_plus, Ladder_T ladder3_minus,
696 			    int querylength5, int querylength3,
697 
698 			    int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
699 			    Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
700 			    Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
701 
702 #if 0
703 			    char *queryuc_ptr_5, char *queryuc_ptr_3,
704 			    Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
705 			    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
706 #endif
707 			    Listpool_T listpool, Hitlistpool_T hitlistpool,
708 			    int maxpairedpaths, int genestrand, int sensedir) {
709   int nconcordant = 0, n; /* Also used as an indicator of when to reset cutoff_level */
710   Ladder_T newladder5_plus, newladder5_minus, newladder3_plus, newladder3_minus;
711 
712   T *hits5, *hits3;
713   int nhits5, nhits3;
714 
715   int frontier_score, score5, score3;
716   int concordant_score_within_trims;
717   int cutoff_level, level;
718 
719 
720   debug(printf("Starting Concordance_pair_up_genome\n"));
721 
722 #if 0
723   /* Rely instead on Ladder_cutoff, in case there are a few good hits */
724   if (0 && List_length(hitlist5_gplus) > MAX_HITS) {
725     Stage3end_gc(hitlist5_gplus);
726     newladder5_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
727   } else {
728     newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
729     Ladder_merge(ladder5_plus,newladder5_plus);
730   }
731 
732   if (0 && List_length(hitlist5_gminus) > MAX_HITS) {
733     Stage3end_gc(hitlist5_gminus);
734     newladder5_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
735   } else {
736     newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
737     Ladder_merge(ladder5_minus,newladder5_minus);
738   }
739 
740   if (0 && List_length(hitlist3_gplus) > MAX_HITS) {
741     Stage3end_gc(hitlist3_gplus);
742     newladder3_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
743   } else {
744     newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
745     Ladder_merge(ladder3_plus,newladder3_plus);
746   }
747 
748   if (0 && List_length(hitlist3_gminus) > MAX_HITS) {
749     Stage3end_gc(hitlist3_gminus);
750     newladder3_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
751   } else {
752     newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
753     Ladder_merge(ladder3_minus,newladder3_minus);
754   }
755 
756 #else
757   newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
758   newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
759   newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
760   newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
761 
762   Ladder_merge(ladder5_plus,newladder5_plus,hitlistpool);
763   Ladder_merge(ladder5_minus,newladder5_minus,hitlistpool);
764   Ladder_merge(ladder3_plus,newladder3_plus,hitlistpool);
765   Ladder_merge(ladder3_minus,newladder3_minus,hitlistpool);
766 #endif
767 
768 #ifdef DEBUG
769   Ladder_dump_nhits(ladder5_plus);
770   Ladder_dump_nhits(ladder5_minus);
771   Ladder_dump_nhits(ladder3_plus);
772   Ladder_dump_nhits(ladder3_minus);
773 #endif
774 
775 
776   /* Initial value, which resets to frontier_score + subopt_levels upon our first hitpair */
777   cutoff_level = Ladder_cutoff(newladder5_plus) + Ladder_cutoff(ladder3_plus);
778   if ((level = Ladder_cutoff(newladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
779     cutoff_level = level;
780   }
781   if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(newladder3_plus)) > cutoff_level) {
782     cutoff_level = level;
783   }
784   if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(newladder3_minus)) > cutoff_level) {
785     cutoff_level = level;
786   }
787   /* max_frontier_score = cutoff_level; */
788 
789   frontier_score = 0;
790   /* Eliminating check on frontier_score against *found_score, because too greedy and can miss correct result */
791   while (*abort_pairing_p == false && frontier_score <= cutoff_level) {
792     debug(printf("frontier_score = %d, cutoff_level %d\n",frontier_score,cutoff_level));
793     for (score5 = 0; score5 <= frontier_score; score5++) {
794       score3 = frontier_score - score5;
795       debug(printf("score5 = %d, score3 = %d\n",score5,score3));
796 
797       /* New hits 5 vs all hits 3 */
798       /* plus/plus: hits5_plus against hits3_plus (really on minus) */
799       if (score5 <= Ladder_cutoff(newladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
800 	hits5 = Ladder_hits_for_score(&nhits5,newladder5_plus,hitlistpool,score5);
801 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
802 	debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
803 
804 	if ((n = do_genome_plus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
805 				&hitpairs,&(*distant_hitpairs),
806 				hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
807 				mismatch_positions_alloc_5,mismatch_positions_alloc_3,
808 				query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
809 #if 0
810 				queryuc_ptr_5,queryuc_ptr_3,
811 				pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
812 #endif
813 				listpool,hitlistpool)) > 0) {
814 #if 0
815 	  /* Too greedy */
816 	  if (nconcordant == 0) {
817 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
818 	      cutoff_level = max_frontier_score;
819 	    }
820 	  }
821 #else
822 	  if (concordant_score_within_trims + subopt_levels < cutoff_level) {
823 	    cutoff_level = concordant_score_within_trims + subopt_levels;
824 	    debug(printf("Updated cutoff level to be %d\n",cutoff_level));
825 	  }
826 #endif
827 	  if ((nconcordant += n) > maxpairedpaths) {
828 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
829 	    *abort_pairing_p = true;
830 	  }
831 	}
832       }
833 
834       /* minus/minus: hits3_minus (really on plus) against newhits5_minus */
835       if (score5 <= Ladder_cutoff(newladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
836 	hits5 = Ladder_hits_for_score(&nhits5,newladder5_minus,hitlistpool,score5);
837 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
838 	debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
839 	if ((n = do_genome_minus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
840 				 &hitpairs,&(*distant_hitpairs),
841 				 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
842 				 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
843 				 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
844 #if 0
845 				 queryuc_ptr_5,queryuc_ptr_3,
846 				 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
847 #endif
848 				 listpool,hitlistpool)) > 0) {
849 #if 0
850 	  /* Too greedy */
851 	  if (nconcordant == 0) {
852 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
853 	      cutoff_level = max_frontier_score;
854 	    }
855 	  }
856 #else
857 	  if (concordant_score_within_trims + subopt_levels < cutoff_level) {
858 	    cutoff_level = concordant_score_within_trims + subopt_levels;
859 	    debug(printf("Updated cutoff level to be %d\n",cutoff_level));
860 	  }
861 #endif
862 	  if ((nconcordant += n) > maxpairedpaths) {
863 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
864 	    *abort_pairing_p = true;
865 	  }
866 	}
867       }
868 
869       /* All hits 5 vs new hits 3 */
870       /* plus/plus: hits5_plus against newhits3_plus (really on minus) */
871       if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(newladder3_plus)) {
872 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
873 	hits3 = Ladder_hits_for_score(&nhits3,newladder3_plus,hitlistpool,score3);
874 	debug(printf("at score %d+%d, nhits5_plus = %d, nnewhits3_plus = %d\n",score5,score3,nhits5,nhits3));
875 
876 	if ((n = do_genome_plus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
877 				&hitpairs,&(*distant_hitpairs),
878 				hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
879 				mismatch_positions_alloc_5,mismatch_positions_alloc_3,
880 				query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
881 #if 0
882 				queryuc_ptr_5,queryuc_ptr_3,
883 				pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
884 #endif
885 				listpool,hitlistpool)) > 0) {
886 #if 0
887 	  /* Too greedy */
888 	  if (nconcordant == 0) {
889 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
890 	      cutoff_level = max_frontier_score;
891 	    }
892 	  }
893 #else
894 	  if (concordant_score_within_trims + subopt_levels < cutoff_level) {
895 	    cutoff_level = concordant_score_within_trims + subopt_levels;
896 	    debug(printf("Updated cutoff level to be %d\n",cutoff_level));
897 	  }
898 #endif
899 	  if ((nconcordant += n) > maxpairedpaths) {
900 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
901 	    *abort_pairing_p = true;
902 	  }
903 	}
904       }
905 
906       /* minus/minus: newhits3_minus (really on plus) against hits5_minus */
907       if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(newladder3_minus)) {
908 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
909 	hits3 = Ladder_hits_for_score(&nhits3,newladder3_minus,hitlistpool,score3);
910 
911 	debug(printf("at score %d+%d, nhits5_minus = %d, nnewhits3_minus = %d\n",score5,score3,nhits5,nhits3));
912 	if ((n = do_genome_minus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
913 				 &hitpairs,&(*distant_hitpairs),
914 				 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
915 				 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
916 				 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
917 #if 0
918 				 queryuc_ptr_5,queryuc_ptr_3,
919 				 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
920 #endif
921 				 listpool,hitlistpool)) > 0) {
922 #if 0
923 	  /* Too greedy */
924 	  if (nconcordant == 0) {
925 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
926 	      cutoff_level = max_frontier_score;
927 	    }
928 	  }
929 #else
930 	  if (concordant_score_within_trims + subopt_levels < cutoff_level) {
931 	    cutoff_level = concordant_score_within_trims + subopt_levels;
932 	    debug(printf("Updated cutoff level to be %d\n",cutoff_level));
933 	  }
934 #endif
935 	  if ((nconcordant += n) > maxpairedpaths) {
936 	    debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
937 	    *abort_pairing_p = true;
938 	  }
939 	}
940       }
941     }
942 
943     frontier_score++;
944   }
945 
946 
947   Ladder_free(&newladder5_plus);
948   Ladder_free(&newladder5_minus);
949   Ladder_free(&newladder3_plus);
950   Ladder_free(&newladder3_minus);
951 
952   Ladder_gc_duplicates(ladder5_plus);
953   Ladder_gc_duplicates(ladder5_minus);
954   Ladder_gc_duplicates(ladder3_plus);
955   Ladder_gc_duplicates(ladder3_minus);
956 
957   debug(printf("Finished with Concordance_pair_up_genome: %d concordant\n",List_length(hitpairs)));
958 
959   return hitpairs;
960 }
961 
962 
963 static int
do_distant_plus_plus(int * concordant_score,List_T * hitpairs,List_T * conc_transloc,List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)964 do_distant_plus_plus (int *concordant_score, List_T *hitpairs,
965 		      List_T *conc_transloc, List_T *samechr,
966 		      T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
967 		      int querylength5, int querylength3,
968 		      int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
969 		      Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
970 		      Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
971 #if 0
972 		      char *queryuc_ptr_5, char *queryuc_ptr_3,
973 		      Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
974 		      Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
975 #endif
976 		      Listpool_T listpool, Hitlistpool_T hitlistpool) {
977 
978   int nfound = 0, noverlap;
979   int i, j;
980   Stage3pair_T stage3pair;
981   T hit5, hit3;
982   int score;
983   Univcoord_T insert_start;
984   Chrpos_T pairmax;
985 
986   if (nhits5 > 0 && nhits3 > 0) {
987     i = j = 0;
988     while (i < nhits5) {
989       hit5 = hits5[i];
990       if (circularp[hit5->effective_chrnum] == true) {
991 	pairmax = pairmax_circular;
992       } else {
993 	pairmax = pairmax_linear;
994       }
995 
996       insert_start = hit5->genomicend - querylength5;
997 #ifdef DEBUG
998       printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
999 	     i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
1000 	     print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
1001       if (j >= 0 && j < nhits3) {
1002 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
1003 	       hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
1004       }
1005       printf("\n");
1006 #endif
1007 
1008       while (j >= 0 &&
1009 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
1010 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1011 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1012 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1013 	j--;
1014       }
1015       j++;		/* Finish backup */
1016 
1017       while (j < nhits3 &&
1018 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
1019 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1020 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1021 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1022 	j++;
1023       }
1024 
1025       noverlap = 0;
1026       while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
1027 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1028 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1029 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1030 	hit3 = hits3[j];
1031 
1032 	if (hit5->effective_chrnum != hit3->effective_chrnum) {
1033 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1034 
1035 	} else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1036 	  /* Could potentially miss an alignment if the two ends overlap */
1037 	  debug(printf(" => double splice translocations"));
1038 
1039 	} else if (hit5->distant_splice_p == true || hit3->distant_splice_p == true) {
1040 	  debug(printf(" => conc_transloc effchr %d (chrnum5 %d, chrnum3 %d)",
1041 		       hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1042 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
1043 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1044 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1045 #if 0
1046 					   queryuc_ptr_5,queryuc_ptr_3,
1047 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1048 #endif
1049 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1050 	    *conc_transloc = Hitlist_push(*conc_transloc,hitlistpool,(void *) stage3pair);
1051 	    nfound++;
1052 	  }
1053 
1054 	} else if (SENSE_INCONSISTENT_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
1055 	  /* Use sensedir_for_concordance here and not sensedir */
1056 	  debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
1057 
1058 	} else if (hit3->genomicend < hit5->genomicstart) {
1059 	  debug(printf(" => scramble because end3 %llu < start5 %llu\n",
1060 		       (unsigned long long) hit3->genomicend,(unsigned long long) hit5->genomicstart));
1061 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1062 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1063 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1064 #if 0
1065 					   queryuc_ptr_5,queryuc_ptr_3,
1066 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1067 #endif
1068 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1069 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1070 	    nfound++;
1071 	  }
1072 
1073 	} else {
1074 	  /* Unexpected? */
1075 	  debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
1076 		       hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1077 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
1078 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1079 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1080 #if 0
1081 					   queryuc_ptr_5,queryuc_ptr_3,
1082 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1083 #endif
1084 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1085 
1086 	    if ((score = (stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims) +
1087 		 (stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims)) < *concordant_score) {
1088 	      *concordant_score = score;
1089 	      assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
1090 	      assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
1091 	      debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
1092 			   *concordant_score,stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims,
1093 			   stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims));
1094 	    }
1095 	    *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
1096 	    nfound++;
1097 	  }
1098 	}
1099 	debug(printf("\n"));
1100 
1101 	j++;
1102       }
1103       j--;		/* Finish advance */
1104 
1105       i++;
1106     }
1107   }
1108 
1109   return nfound;
1110 }
1111 
1112 
1113 static int
do_distant_minus_minus(int * concordant_score,List_T * hitpairs,List_T * conc_transloc,List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1114 do_distant_minus_minus (int *concordant_score, List_T *hitpairs,
1115 			List_T *conc_transloc, List_T *samechr,
1116 			T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1117 			int querylength5, int querylength3,
1118 			int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1119 			Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1120 			Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1121 #if 0
1122 			char *queryuc_ptr_5, char *queryuc_ptr_3,
1123 			Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1124 			Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1125 #endif
1126 			Listpool_T listpool, Hitlistpool_T hitlistpool) {
1127 
1128   int nfound = 0, noverlap;
1129   int i, j;
1130   Stage3pair_T stage3pair;
1131   T hit5, hit3;
1132   int score;
1133   Univcoord_T insert_start;
1134   Chrpos_T pairmax;
1135 
1136   if (nhits3 > 0 && nhits5 > 0) {
1137     i = j = 0;
1138     while (i < nhits3) {
1139       hit3 = hits3[i];
1140       if (circularp[hit3->effective_chrnum] == true) {
1141 	pairmax = pairmax_circular;
1142       } else {
1143 	pairmax = pairmax_linear;
1144       }
1145       insert_start = hit3->genomicstart - querylength3;
1146 #ifdef DEBUG
1147       printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1148 	     i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
1149 	     print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
1150       if (j >= 0 && j < nhits5) {
1151 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
1152 	       hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
1153       }
1154       printf("\n");
1155 #endif
1156 
1157       while (j >= 0 &&
1158 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
1159 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1160 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1161 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1162 	j--;
1163       }
1164       j++;			/* Finish backup */
1165 
1166       while (j < nhits5 &&
1167 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
1168 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1169 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1170 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1171 	j++;
1172       }
1173 
1174       noverlap = 0;
1175       while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
1176 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1177 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1178 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1179 	hit5 = hits5[j];
1180 
1181 	/* Do want to see pairs previously seen */
1182 	if (hit3->effective_chrnum != hit5->effective_chrnum) {
1183 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1184 
1185 	} else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1186 	  /* Could potentially miss an alignment if the two ends overlap */
1187 	  debug(printf(" => double splice translocations"));
1188 
1189 	} else if (hit5->distant_splice_p == true || hit3->distant_splice_p == true) {
1190 	  debug(printf(" => conc_transloc effchr %d (chrnum5 %d, chrnum3 %d)",
1191 		       hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1192 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
1193 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1194 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1195 #if 0
1196 					   queryuc_ptr_5,queryuc_ptr_3,
1197 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1198 #endif
1199 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1200 	    *conc_transloc = Hitlist_push(*conc_transloc,hitlistpool,(void *) stage3pair);
1201 	    nfound++;
1202 	  }
1203 
1204 	} else if (SENSE_INCONSISTENT_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
1205 	  /* Use sensedir_for_concordance here and not sensedir */
1206 	  debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
1207 
1208 	} else if (hit5->genomicstart < hit3->genomicend) {
1209 	  debug(printf(" => scramble because start5 %llu < end3 %llu\n",
1210 		       (unsigned long long) hit5->genomicstart,(unsigned long long) hit3->genomicend));
1211 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1212 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1213 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1214 #if 0
1215 					   queryuc_ptr_5,queryuc_ptr_3,
1216 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1217 #endif
1218 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1219 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1220 	    nfound++;
1221 	  }
1222 
1223 	} else {
1224 	  /* Unexpected? */
1225 	  debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
1226 		       hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1227 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
1228 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1229 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1230 #if 0
1231 					   queryuc_ptr_5,queryuc_ptr_3,
1232 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1233 #endif
1234 					   listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1235 
1236 	    if ((score = (stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims) +
1237 		 (stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims)) < *concordant_score) {
1238 	      *concordant_score = score;
1239 	      assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
1240 	      assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
1241 	      debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
1242 			   *concordant_score,stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims,
1243 			   stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims));
1244 	    }
1245 	    *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
1246 	    nfound++;
1247 	  }
1248 	}
1249 	debug(printf("\n"));
1250 
1251 	j++;
1252       }
1253       j--;		/* Finish advance */
1254 
1255       i++;
1256     }
1257   }
1258 
1259   return nfound;
1260 }
1261 
1262 
1263 static int
do_distant_plus_minus(List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1264 do_distant_plus_minus (List_T *samechr,
1265 		       T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1266 		       int querylength5, int querylength3,
1267 		       int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1268 		       Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1269 		       Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1270 #if 0
1271 		       char *queryuc_ptr_5, char *queryuc_ptr_3,
1272 		       Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1273 		       Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1274 #endif
1275 		       Listpool_T listpool, Hitlistpool_T hitlistpool) {
1276 
1277   int nfound = 0, noverlap;
1278   int i, j;
1279   Stage3pair_T stage3pair;
1280   T hit5, hit3;
1281   Univcoord_T insert_start;
1282   Chrpos_T pairmax;
1283 
1284   if (nhits5 > 0 && nhits3 > 0) {
1285     i = j = 0;
1286     while (i < nhits5) {
1287       hit5 = hits5[i];
1288       if (circularp[hit5->effective_chrnum] == true) {
1289 	pairmax = pairmax_circular;
1290       } else {
1291 	pairmax = pairmax_linear;
1292       }
1293       insert_start = hit5->genomicend - querylength5;
1294 #ifdef DEBUG
1295       printf("plus/minus: i=%d/%d #%d:%u..%u %s %s %p",
1296 	     i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
1297 	     print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5);
1298       if (j >= 0 && j < nhits3) {
1299 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
1300 	       hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
1301       }
1302       printf("\n");
1303 #endif
1304 
1305       while (j >= 0 &&
1306 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
1307 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s %p\n",
1308 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1309 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1310 	j--;
1311       }
1312       j++;		/* Finish backup */
1313 
1314       while (j < nhits3 &&
1315 	     hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
1316 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s %p\n",
1317 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1318 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1319 	j++;
1320       }
1321 
1322       noverlap = 0;
1323       while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
1324 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s %p",
1325 		     j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1326 		     print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1327 	hit3 = hits3[j];
1328 
1329 	if (hit5->effective_chrnum != hit3->effective_chrnum) {
1330 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1331 
1332 	} else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1333 	  debug(printf(" => double splice translocations"));
1334 
1335 	} else if (SENSE_INCONSISTENT_FOR_INVERSION_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
1336 	  /* Use sensedir_for_concordance here and not sensedir */
1337 	  debug(printf(" => sense inconsistent for inversion"));
1338 #if 0
1339 	} else if (hits3[j]->genomicstart + querylength3 <= insert_start) {
1340 	  debug(printf(" => scramble"));
1341 	  if (nsamechr <= maxpairedpaths &&
1342 	      (stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1343 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1344 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1345 #if 0
1346 					   queryuc_ptr_5,queryuc_ptr_3,
1347 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1348 #endif
1349 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1350 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1351 	    nfound++;
1352 	  }
1353 #endif
1354 	} else {
1355 	  debug(printf(" => inversion effchr %d (chrnum5 %d, chrnum3 %d)",
1356 		       hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1357 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_INVERSION,
1358 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1359 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1360 #if 0
1361 					   queryuc_ptr_5,queryuc_ptr_3,
1362 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1363 #endif
1364 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1365 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1366 	    nfound++;
1367 	  }
1368 	}
1369 	debug(printf("\n"));
1370 
1371 	j++;
1372       }
1373       j--;		/* Finish advance */
1374 
1375       i++;
1376     }
1377   }
1378 
1379   return nfound;
1380 }
1381 
1382 
1383 static int
do_distant_minus_plus(List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1384 do_distant_minus_plus (List_T *samechr,
1385 		       T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1386 		       int querylength5, int querylength3,
1387 		       int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1388 		       Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1389 		       Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1390 #if 0
1391 		       char *queryuc_ptr_5, char *queryuc_ptr_3,
1392 		       Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1393 		       Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1394 #endif
1395 		       Listpool_T listpool, Hitlistpool_T hitlistpool) {
1396 
1397   int nfound = 0, noverlap;
1398   int i, j;
1399   Stage3pair_T stage3pair;
1400   T hit5, hit3;
1401   Univcoord_T insert_start;
1402   Chrpos_T pairmax;
1403 
1404   if (nhits3 > 0 && nhits5 > 0) {
1405     i = j = 0;
1406     while (i < nhits3) {
1407       hit3 = hits3[i];
1408       if (circularp[hit3->effective_chrnum] == true) {
1409 	pairmax = pairmax_circular;
1410       } else {
1411 	pairmax = pairmax_linear;
1412       }
1413       insert_start = hit3->genomicstart - querylength3;
1414 #ifdef DEBUG
1415       printf("minus/plus: i=%d/%d #%d:%u..%u %s %s %p",
1416 	     i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
1417 	     print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3);
1418       if (j >= 0 && j < nhits5) {
1419 	printf("    j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
1420 	       hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
1421       }
1422       printf("\n");
1423 #endif
1424 
1425       while (j >= 0 &&
1426 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
1427 	debug(printf("  backup: j=%d/%d #%d:%u..%u %s %s %p\n",
1428 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1429 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1430 	j--;
1431       }
1432       j++;			/* Finish backup */
1433 
1434       while (j < nhits5 &&
1435 	     hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
1436 	debug(printf("  advance: j=%d/%d #%d:%u..%u %s %s %p\n",
1437 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1438 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1439 	j++;
1440       }
1441 
1442       noverlap = 0;
1443       while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
1444 	debug(printf("  overlap: j=%d/%d #%d:%u..%u %s %s %p",
1445 		     j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1446 		     print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1447 	hit5 = hits5[j];
1448 
1449 	if (hit3->effective_chrnum != hit5->effective_chrnum) {
1450 	  debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1451 
1452 	} else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1453 	  debug(printf(" => double splice translocations"));
1454 
1455 	} else if (SENSE_INCONSISTENT_FOR_INVERSION_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
1456 	  /* Use sensedir_for_concordance here and not sensedir */
1457 	  debug(printf(" => sense inconsistent for inversion"));
1458 #if 0
1459 	} else if (hits5[j]->genomicend + querylength5 <= insert_start) {
1460 	  debug(printf(" => scramble"));
1461 	  if ((*nsamechr) <= maxpairedpaths &&
1462 	      (stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1463 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1464 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1465 #if 0
1466 					   queryuc_ptr_5,queryuc_ptr_3,
1467 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1468 #endif
1469 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1470 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1471 	    nfound++;
1472 	  }
1473 #endif
1474 	} else {
1475 	  debug(printf(" => inversion effchr %d (chrnum5 %d, chrnum3 %d)",
1476 		       hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1477 	  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_INVERSION,
1478 					   mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1479 					   query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1480 #if 0
1481 					   queryuc_ptr_5,queryuc_ptr_3,
1482 					   pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1483 #endif
1484 					   listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1485 	    *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1486 	    nfound++;
1487 	  }
1488 	}
1489 	debug(printf("\n"));
1490 
1491 	j++;
1492       }
1493       j--;		/* Finish advance */
1494 
1495       i++;
1496     }
1497   }
1498 
1499   return nfound;
1500 }
1501 
1502 
1503 
1504 List_T
Concordance_pair_up_distant(bool * abort_pairing_p,int * concordant_score,List_T * samechr,List_T * conc_transloc,List_T hitpairs,List_T hitlist5_gplus,List_T hitlist5_gminus,List_T hitlist3_gplus,List_T hitlist3_gminus,Ladder_T ladder5_plus,Ladder_T ladder5_minus,Ladder_T ladder3_plus,Ladder_T ladder3_minus,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool,int maxpairedpaths,int genestrand,int sensedir)1505 Concordance_pair_up_distant (bool *abort_pairing_p, int *concordant_score,
1506 			     List_T *samechr, List_T *conc_transloc, List_T hitpairs,
1507 
1508 			     List_T hitlist5_gplus, List_T hitlist5_gminus,
1509 			     List_T hitlist3_gplus, List_T hitlist3_gminus,
1510 
1511 			     Ladder_T ladder5_plus, Ladder_T ladder5_minus,
1512 			     Ladder_T ladder3_plus, Ladder_T ladder3_minus,
1513 			     int querylength5, int querylength3,
1514 
1515 			     int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1516 			     Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1517 			     Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1518 #if 0
1519 			     char *queryuc_ptr_5, char *queryuc_ptr_3,
1520 			     Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1521 			     Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1522 #endif
1523 			     Listpool_T listpool, Hitlistpool_T hitlistpool,
1524 			     int maxpairedpaths, int genestrand, int sensedir) {
1525   int nfound = 0, n;
1526   int max_frontier_score, frontier_score, score5, score3;
1527   int cutoff_level, level;
1528   Ladder_T newladder5_plus, newladder5_minus, newladder3_plus, newladder3_minus;
1529 
1530   T *hits5, *hits3;
1531   int nhits5, nhits3;
1532 
1533 
1534   debug(printf("Starting Concordance_pair_up_distant\n"));
1535 
1536 #if 0
1537   /* Rely instead on Ladder_cutoff, in case there are a few good hits */
1538   if (0 && List_length(hitlist5_gplus) > MAX_HITS) {
1539     Stage3end_gc(hitlist5_gplus);
1540     newladder5_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
1541   } else {
1542     newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
1543     Ladder_merge(ladder5_plus,newladder5_plus);
1544   }
1545 
1546   if (0 && List_length(hitlist5_gminus) > MAX_HITS) {
1547     Stage3end_gc(hitlist5_gminus);
1548     newladder5_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
1549   } else {
1550     newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
1551     Ladder_merge(ladder5_minus,newladder5_minus);
1552   }
1553 
1554   if (0 && List_length(hitlist3_gplus) > MAX_HITS) {
1555     Stage3end_gc(hitlist3_gplus);
1556     newladder3_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
1557   } else {
1558     newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
1559     Ladder_merge(ladder3_plus,newladder3_plus);
1560   }
1561 
1562   if (0 && List_length(hitlist3_gminus) > MAX_HITS) {
1563     Stage3end_gc(hitlist3_gminus);
1564     newladder3_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
1565   } else {
1566     newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
1567     Ladder_merge(ladder3_minus,newladder3_minus);
1568   }
1569 
1570 #else
1571   newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
1572   newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
1573   newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
1574   newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
1575 
1576   Ladder_merge(ladder5_plus,newladder5_plus,hitlistpool);
1577   Ladder_merge(ladder5_minus,newladder5_minus,hitlistpool);
1578   Ladder_merge(ladder3_plus,newladder3_plus,hitlistpool);
1579   Ladder_merge(ladder3_minus,newladder3_minus,hitlistpool);
1580 #endif
1581 
1582 
1583   cutoff_level = Ladder_cutoff(newladder5_plus) + Ladder_cutoff(ladder3_plus);
1584   if ((level = Ladder_cutoff(newladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
1585     cutoff_level = level;
1586   }
1587   if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(newladder3_plus)) > cutoff_level) {
1588     cutoff_level = level;
1589   }
1590   if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(newladder3_minus)) > cutoff_level) {
1591     cutoff_level = level;
1592   }
1593   if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
1594     cutoff_level = level;
1595   }
1596   if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(ladder3_plus)) > cutoff_level) {
1597     cutoff_level = level;
1598   }
1599   max_frontier_score = cutoff_level;
1600 
1601   frontier_score = 0;
1602   while (frontier_score <= cutoff_level) {
1603     debug(printf("frontier_score = %d\n",frontier_score));
1604     for (score5 = 0; score5 <= frontier_score; score5++) {
1605       score3 = frontier_score - score5;
1606       debug(printf("score5 = %d, score3 = %d\n",score5,score3));
1607 
1608       /* New hits 5 vs all hits 3 */
1609       /* plus/plus: hits5_plus against hits3_plus (really on minus) */
1610       if (score5 <= Ladder_cutoff(newladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
1611 	hits5 = Ladder_hits_for_score(&nhits5,newladder5_plus,hitlistpool,score5);
1612 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
1613 	debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1614 	if ((n = do_distant_plus_plus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1615 				      hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1616 				      mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1617 				      query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1618 #if 0
1619 				      queryuc_ptr_5,queryuc_ptr_3,
1620 				      pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1621 #endif
1622 				      listpool,hitlistpool)) > 0) {
1623 	  if (nfound == 0) {
1624 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1625 	      cutoff_level = max_frontier_score;
1626 	    }
1627 	  }
1628 	  if ((nfound += n) > maxpairedpaths) {
1629 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1630 	    *abort_pairing_p = true;
1631 	  }
1632 	}
1633       }
1634 
1635       /* minus/minus: hits3_minus (really on plus) against hits5_minus */
1636       if (score5 <= Ladder_cutoff(newladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
1637 	hits5 = Ladder_hits_for_score(&nhits5,newladder5_minus,hitlistpool,score5);
1638 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
1639 	debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1640 	if ((n = do_distant_minus_minus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1641 					hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1642 					mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1643 					query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1644 #if 0
1645 					queryuc_ptr_5,queryuc_ptr_3,
1646 					pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1647 #endif
1648 					listpool,hitlistpool)) > 0) {
1649 	  if (nfound == 0) {
1650 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1651 	      cutoff_level = max_frontier_score;
1652 	    }
1653 	  }
1654 	  if ((nfound += n) > maxpairedpaths) {
1655 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1656 	    *abort_pairing_p = true;
1657 	  }
1658 	}
1659       }
1660 
1661 
1662       /* All hits 5 vs new hits 3 */
1663       /* plus/plus: hits5_plus against hits3_plus (really on minus) */
1664       if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(newladder3_plus)) {
1665 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
1666 	hits3 = Ladder_hits_for_score(&nhits3,newladder3_plus,hitlistpool,score3);
1667 	debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1668 	if ((n = do_distant_plus_plus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1669 				      hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1670 				      mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1671 				      query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1672 #if 0
1673 				      queryuc_ptr_5,queryuc_ptr_3,
1674 				      pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1675 #endif
1676 				      listpool,hitlistpool)) > 0) {
1677 
1678 	  if (nfound == 0) {
1679 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1680 	      cutoff_level = max_frontier_score;
1681 	    }
1682 	  }
1683 	  if ((nfound += n) > maxpairedpaths) {
1684 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1685 	    *abort_pairing_p = true;
1686 	  }
1687 	}
1688       }
1689 
1690       /* minus/minus: hits3_minus (really on plus) against hits5_minus */
1691       if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(newladder3_minus)) {
1692 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
1693 	hits3 = Ladder_hits_for_score(&nhits3,newladder3_minus,hitlistpool,score3);
1694 	debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1695 	if ((n = do_distant_minus_minus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1696 					hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1697 					mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1698 					query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1699 #if 0
1700 					queryuc_ptr_5,queryuc_ptr_3,
1701 					pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1702 #endif
1703 					listpool,hitlistpool)) > 0) {
1704 	  if (nfound == 0) {
1705 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1706 	      cutoff_level = max_frontier_score;
1707 	    }
1708 	  }
1709 	  if ((nfound += n) > maxpairedpaths) {
1710 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1711 	    *abort_pairing_p = true;
1712 	  }
1713 	}
1714       }
1715 
1716 
1717       /* All hits5 vs all hits3 */
1718       /* plus/minus (inversions): hits5_plus against hits3_minus */
1719       if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(ladder3_minus)) {
1720 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
1721 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
1722 	debug(printf("at score %d+%d, nhits5_plus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1723 	if ((n = do_distant_plus_minus(&(*samechr),hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1724 				       mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1725 				       query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1726 #if 0
1727 				       queryuc_ptr_5,queryuc_ptr_3,
1728 				       pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1729 #endif
1730 				       listpool,hitlistpool)) > 0) {
1731 	  if (nfound == 0) {
1732 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1733 	      cutoff_level = max_frontier_score;
1734 	    }
1735 	  }
1736 	  if ((nfound += n) > maxpairedpaths) {
1737 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1738 	    *abort_pairing_p = true;
1739 	  }
1740 	}
1741       }
1742 
1743       /* minus/plus (inversions): hits3_plus against hits5_minus */
1744       if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(ladder3_plus)) {
1745 	hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
1746 	hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
1747 	debug(printf("at score %d+%d, nhits5_minus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1748 	if ((n = do_distant_minus_plus(&(*samechr),hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1749 				       mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1750 				       query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1751 #if 0
1752 				       queryuc_ptr_5,queryuc_ptr_3,
1753 				       pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1754 #endif
1755 				       listpool,hitlistpool)) > 0) {
1756 	  if (nfound == 0) {
1757 	    if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1758 	      cutoff_level = max_frontier_score;
1759 	    }
1760 	  }
1761 	  if ((nfound += n) > maxpairedpaths) {
1762 	    debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1763 	    *abort_pairing_p = true;
1764 	  }
1765 	}
1766       }
1767     }
1768 
1769     frontier_score++;
1770   }
1771 
1772 
1773   Ladder_free(&newladder5_plus);
1774   Ladder_free(&newladder5_minus);
1775   Ladder_free(&newladder3_plus);
1776   Ladder_free(&newladder3_minus);
1777 
1778   Ladder_gc_duplicates(ladder5_plus);
1779   Ladder_gc_duplicates(ladder5_minus);
1780   Ladder_gc_duplicates(ladder3_plus);
1781   Ladder_gc_duplicates(ladder3_minus);
1782 
1783   debug(printf("Finished with Concordance_pair_up_distant: %d concordant, %d samechr, %d conc_transloc\n",
1784 		List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
1785 
1786   return hitpairs;
1787 }
1788 
1789 
1790 
1791 #if 0
1792 static int
1793 path_ascending_cmp (const void *a, const void *b) {
1794   List_T x = * (List_T *) a;
1795   List_T y = * (List_T *) b;
1796   Univdiag_T diagonalx, diagonaly;
1797   Univcoord_T path_start_x, path_start_y, path_end_x, path_end_y;
1798 
1799   diagonalx = List_head(x);
1800   diagonaly = List_head(y);
1801   path_start_x = diagonalx->univdiagonal;
1802   path_start_y = diagonaly->univdiagonal;
1803 
1804   if (path_start_x < path_start_y) {
1805     return -1;
1806   } else if (path_start_y < path_start_x) {
1807     return +1;
1808   } else {
1809     diagonalx = (Univdiag_T) List_last_value(x);
1810     diagonaly = (Univdiag_T) List_last_value(y);
1811     path_end_x = diagonalx->univdiagonal;
1812     path_end_y = diagonaly->univdiagonal;
1813     if (path_end_x < path_end_y) {
1814       return -1;
1815     } else if (path_end_y < path_end_x) {
1816       return +1;
1817     } else {
1818       return 0;
1819     }
1820   }
1821 }
1822 #endif
1823 
1824 
1825 #if 0
1826 static int
1827 hit_ascending_cmp (const void *a, const void *b) {
1828   T x = * (T *) a;
1829   T y = * (T *) b;
1830 
1831   if (x->high < y->high) {
1832     return -1;
1833   } else if (y->high < x->high) {
1834     return +1;
1835   } else if (x->low < y->low) {
1836     return -1;
1837   } else if (y->low < x->low) {
1838     return +1;
1839   } else {
1840     return 0;
1841   }
1842 }
1843 #endif
1844 
1845 
1846 void
Concordance_setup(int subopt_levels_in,Chrpos_T pairmax_transcriptome_in,Chrpos_T pairmax_linear_in,Chrpos_T pairmax_circular_in,Chrpos_T expected_pairlength_in,Chrpos_T pairlength_deviation_in,bool * circularp_in,bool merge_samechr_p_in)1847 Concordance_setup (int subopt_levels_in, Chrpos_T pairmax_transcriptome_in,
1848 		   Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in,
1849 		   Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in,
1850 		   bool *circularp_in, bool merge_samechr_p_in) {
1851   subopt_levels = subopt_levels_in;
1852 
1853   pairmax_transcriptome = pairmax_transcriptome_in;
1854   pairmax_linear = pairmax_linear_in;
1855   pairmax_circular = pairmax_circular_in;
1856 
1857   expected_pairlength = expected_pairlength_in;
1858   pairlength_deviation = pairlength_deviation_in;
1859   adjacent_pairlength = expected_pairlength + pairlength_deviation;
1860 
1861   circularp = circularp_in;
1862   merge_samechr_p = merge_samechr_p_in;
1863 
1864   return;
1865 }
1866