1 static char rcsid[] = "$Id: concordance.c 222730 2020-05-29 13:10:09Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "concordance.h"
7
8 #include <stdlib.h> /* For qsort */
9 #include <stdio.h>
10 #include <string.h>
11 #include <strings.h>
12
13 #include "assert.h"
14 #include "mem.h"
15 #include "univdiag.h"
16 #include "univdiagdef.h"
17 #include "transcript.h"
18 #include "stage3hrdef.h"
19
20
21 static int subopt_levels;
22
23 static Chrpos_T pairmax_transcriptome;
24 static Chrpos_T pairmax_linear; /* For two ends that both lack a splice */
25 static Chrpos_T pairmax_circular;
26
27 static Chrpos_T expected_pairlength;
28 static Chrpos_T pairlength_deviation;
29 static Chrpos_T adjacent_pairlength; /* For two ends, one of which has a splice */
30
31 static bool *circularp;
32 static bool merge_samechr_p;
33
34
35 /* #define MAX_HITS 1000 */
36 #define MAX_OVERLAP 5
37
38 #define T Stage3end_T
39
40 #ifdef DEBUG
41 #define debug(x) x
42 #else
43 #define debug(x)
44 #endif
45
46 /* Details */
47 #ifdef DEBUG1
48 #define debug1(x) x
49 #else
50 #define debug1(x)
51 #endif
52
53
54
55 #if 0
56 /* Previously called by stage1hr.c, but now we use Transcript_concordance */
57 /* Since we haven't called merge procedure yet, we can just look at the Intlist_head */
58 List_T
59 Stage3hr_filter_concordant_tr (List_T *disjoint, List_T hits, List_T mates,
60 Hitlistpool_T hitlistpool) {
61 List_T common = NULL, p, q;
62 T hit, mate;
63 int trnum;
64 Chrpos_T start1, end2;
65 bool concordantp;
66
67 *disjoint = (List_T) NULL;
68 for (p = hits; p != NULL; p = List_next(p)) {
69 hit = (T) List_head(p);
70 trnum = Intlist_head(hit->trnums);
71 concordantp = false;
72
73 for (q = mates; q != NULL; q = List_next(q)) {
74 mate = (T) List_head(q);
75 if (Intlist_head(mate->trnums) == trnum) {
76 start1 = (Chrpos_T) Intlist_head(hit->trstarts);
77 end2 = (Chrpos_T) Intlist_head(mate->trends);
78 if (start1 < end2) {
79 if (end2 <= start1 + pairmax_transcriptome) {
80 concordantp = true;
81 }
82 } else {
83 if (start1 <= end2 + pairmax_transcriptome) {
84 concordantp = true;
85 }
86 }
87 }
88 }
89
90 if (concordantp == true) {
91 debug7(printf("trnum %d is concordant\n",trnum));
92 common = Hitlist_push(common,hitlistpool,(void *) hit);
93 } else {
94 debug7(printf("trnum %d is discordant\n",trnum));
95 *disjoint = Hitlist_push(*disjoint,hitlistpool,(void *) hit);
96 }
97 }
98
99 debug7(printf("Returning %d common hits\n",List_length(common)));
100 return common;
101 }
102 #endif
103
104
105 #ifdef DEBUG
106 static char *
print_sense(int sense)107 print_sense (int sense) {
108 if (sense == SENSE_NULL) {
109 return "sense:null";
110 } else if (sense == SENSE_ANTI) {
111 return "sense:anti";
112 } else if (sense == SENSE_FORWARD) {
113 return "sense:fwd";
114 } else {
115 abort();
116 }
117 }
118 #endif
119
120
121 #if 0
122 static int
123 do_transcriptome_plus (int *concordant_score_overall, List_T *hitpairs,
124 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
125 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
126 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
127 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
128 #if 0
129 char *queryuc_ptr_5, char *queryuc_ptr_3,
130 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
131 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
132 #endif
133 Listpool_T listpool, Hitlistpool_T hitlistpool) {
134
135 int nconcordant = 0;
136 int i, j;
137 Stage3pair_T stage3pair;
138 T hit5, hit3;
139 int min_insertlength;
140 int score;
141 Chrnum_T chrnum;
142
143 if (nhits5 > 0 && nhits3 > 0) {
144 i = j = 0;
145 while (i < nhits5) {
146 hit5 = hits5[i];
147 chrnum = hit5->effective_chrnum;
148
149 #ifdef DEBUG
150 printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
151 i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
152 print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
153 if (j >= 0 && j < nhits3) {
154 printf(" j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
155 hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
156 }
157 printf("\n");
158 #endif
159
160 while (j >= 0 && hits3[j]->effective_chrnum == chrnum) {
161 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
162 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
163 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
164 j--;
165 }
166 j++; /* Finish backup */
167
168 /* No need for advance */
169
170 while (j < nhits3 && hits3[j]->effective_chrnum == chrnum) {
171 debug(printf(" samechr: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
172 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
173 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
174 hit3 = hits3[j];
175
176 if (Transcript_intersect_p(&min_insertlength,hit5->transcripts,hit3->transcripts) == false) {
177 debug(printf(" => transcripts do not intersect:\n"));
178 debug(Transcript_print_nums(hit5->transcripts));
179 debug(Transcript_print_nums(hit5->transcripts));
180 debug(printf("\n"));
181
182 } else {
183 debug(printf(" => concordant transcriptome with min_insertlength %d\n",min_insertlength));
184 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
185 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
186 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
187 #if 0
188 queryuc_ptr_5,queryuc_ptr_3,
189 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
190 #endif
191 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/true)) != NULL) {
192 stage3pair->insertlength = min_insertlength;
193
194 /* No need to update found_score */
195 debug(printf(" Have new pair with scores %d + %d\n",stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
196
197 if ((score = (stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall)) < *concordant_score_overall) {
198 *concordant_score_overall = score;
199 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
200 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
201 debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
202 *concordant_score_overall,stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
203 }
204 *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
205 nconcordant++;
206 }
207 }
208 debug(printf("\n"));
209
210 j++;
211 }
212 j--; /* Finish advance */
213
214 i++;
215 }
216 }
217
218 return nconcordant;
219 }
220 #endif
221
222
223 #if 0
224 static int
225 do_transcriptome_minus (int *concordant_score_overall, List_T *hitpairs,
226 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
227 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
228 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
229 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
230 #if 0
231 char *queryuc_ptr_5, char *queryuc_ptr_3,
232 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
233 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
234 #endif
235 Listpool_T listpool, Hitlistpool_T hitlistpool) {
236
237 int nconcordant = 0;
238 int i, j;
239 Stage3pair_T stage3pair;
240 T hit5, hit3;
241 int min_insertlength;
242 int score;
243 Chrnum_T chrnum;
244
245 if (nhits3 > 0 && nhits5 > 0) {
246 i = j = 0;
247 while (i < nhits3) {
248 hit3 = hits3[i];
249 chrnum = hit3->effective_chrnum;
250
251 #ifdef DEBUG
252 printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
253 i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
254 print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
255 if (j >= 0 && j < nhits5) {
256 printf(" j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
257 hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
258 }
259 printf("\n");
260 #endif
261
262 while (j >= 0 && hits5[j]->effective_chrnum == chrnum) {
263 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
264 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
265 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
266 j--;
267 }
268 j++; /* Finish backup */
269
270 /* No need for advance */
271
272 while (j < nhits5 && hits5[j]->effective_chrnum == chrnum) {
273 debug(printf(" samechr: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
274 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
275 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
276 hit5 = hits5[j];
277
278 if (Transcript_intersect_p(&min_insertlength,hit5->transcripts,hit3->transcripts) == false) {
279 debug(printf(" => transcripts do not intersect:\n"));
280 debug(Transcript_print_nums(hit5->transcripts));
281 debug(Transcript_print_nums(hit5->transcripts));
282 debug(printf("\n"));
283
284 } else {
285 debug(printf(" => concordant transcriptome with min_insertlength %d\n",min_insertlength));
286 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
287 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
288 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
289 #if 0
290 queryuc_ptr_5,queryuc_ptr_3,
291 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
292 #endif
293 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/true)) != NULL) {
294 stage3pair->insertlength = min_insertlength;
295
296 if ((score = (stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall)) < *concordant_score_overall) {
297 *concordant_score_overall = score;
298 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
299 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
300 debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
301 *concordant_score_overall,stage3pair->hit5->ref_score_overall,stage3pair->hit3->ref_score_overall));
302 }
303 *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
304 nconcordant++;
305 }
306 }
307 debug(printf("\n"));
308
309 j++;
310 }
311 j--; /* Finish advance */
312
313 i++;
314 }
315 }
316
317 return nconcordant;
318 }
319 #endif
320
321
322 #if 0
323 List_T
324 Concordance_pair_up_transcriptome (bool *abort_pairing_p, int *concordant_score_overall, List_T hitpairs,
325
326 Ladder_T ladder5_plus, Ladder_T ladder5_minus,
327 Ladder_T ladder3_plus, Ladder_T ladder3_minus,
328 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
329 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
330 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
331 #if 0
332 char *queryuc_ptr_5, char *queryuc_ptr_3,
333 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
334 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
335 #endif
336 Listpool_T listpool, Hitlistpool_T hitlistpool,
337 int maxpairedpaths, int genestrand) {
338 int nconcordant = 0, n; /* was List_length(hitpairs), but this hurts minus if there are too many plus hits */
339
340 T *hits5, *hits3;
341 int nhits5, nhits3;
342 int max_frontier_score, frontier_score, cutoff_level, level, score5, score3;
343
344
345 debug(printf("Starting Concordance_pair_up_transcriptome\n"));
346
347 /* Initial value, which resets to frontier_score + subopt_levels upon our first hitpair */
348 cutoff_level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(ladder3_plus);
349 if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
350 cutoff_level = level;
351 }
352 max_frontier_score = cutoff_level;
353
354 frontier_score = 0;
355 while (*abort_pairing_p == false && frontier_score <= cutoff_level) {
356 debug(printf("frontier_score = %d\n",frontier_score));
357 for (score5 = 0; score5 <= frontier_score; score5++) {
358 score3 = frontier_score - score5;
359 debug(printf("score5 = %d, score3 = %d\n",score5,score3));
360
361 /* plus/plus: hits5_plus against hits3_plus (really on minus) */
362 if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
363 hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
364 hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
365 debug(printf("at score %d+%d, nhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
366
367 if ((n = do_transcriptome_plus(&(*concordant_score_overall),&hitpairs,
368 hits5,nhits5,hits3,nhits3,genestrand,sensedir,
369 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
370 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
371 #if 0
372 queryuc_ptr_5,queryuc_ptr_3,
373 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
374 #endif
375 listpool,hitlistpool)) > 0) {
376 if (nconcordant == 0) {
377 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
378 cutoff_level = max_frontier_score;
379 }
380 }
381 if ((nconcordant += n) > maxpairedpaths) {
382 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
383 *abort_pairing_p = true;
384 }
385 }
386 }
387
388 /* minus/minus: hits3_minus (really on plus) against hits5_minus */
389 if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
390 hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
391 hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
392 debug(printf("at score %d+%d, nhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
393 if ((n = do_transcriptome_minus(&(*concordant_score_overall),&hitpairs,
394 hits5,nhits5,hits3,nhits3,genestrand,sensedir,
395 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
396 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
397 #if 0
398 queryuc_ptr_5,queryuc_ptr_3,
399 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
400 #endif
401 listpool,hitlistpool)) > 0) {
402 if (nconcordant == 0) {
403 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
404 cutoff_level = max_frontier_score;
405 }
406 }
407 if ((nconcordant += n) > maxpairedpaths) {
408 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
409 *abort_pairing_p = true;
410 }
411 }
412 }
413 }
414
415 frontier_score++;
416 }
417
418 debug(printf("Finished with Concordance_pair_up_transcriptome: %d concordant\n",List_length(hitpairs)));
419
420 return hitpairs;
421 }
422 #endif
423
424
425 static int
do_genome_plus(int * adjacent_score,int * concordant_score_overall,int * concordant_score_within_trims,List_T * local_hitpairs,List_T * distant_hitpairs,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)426 do_genome_plus (int *adjacent_score, int *concordant_score_overall, int *concordant_score_within_trims,
427 List_T *local_hitpairs, List_T *distant_hitpairs, T *hits5, int nhits5, T *hits3, int nhits3,
428 int genestrand, int sensedir, int querylength5, int querylength3,
429 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
430 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
431 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
432 #if 0
433 char *queryuc_ptr_5, char *queryuc_ptr_3,
434 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
435 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
436 #endif
437 Listpool_T listpool, Hitlistpool_T hitlistpool) {
438
439 int nconcordant = 0, noverlap;
440 int i, j;
441 Stage3pair_T stage3pair;
442 T hit5, hit3;
443 Univcoord_T insert_start;
444 int score;
445 int pairmax;
446
447 *concordant_score_within_trims = querylength5 + querylength3;
448
449 if (nhits5 > 0 && nhits3 > 0) {
450 i = j = 0;
451 while (i < nhits5) {
452 hit5 = hits5[i];
453 if (circularp[hit5->effective_chrnum] == true) {
454 pairmax = pairmax_circular;
455 } else {
456 pairmax = pairmax_linear;
457 }
458
459 insert_start = hit5->genomicend - querylength5;
460 #ifdef DEBUG
461 printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
462 i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
463 print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
464 if (j >= 0 && j < nhits3) {
465 printf(" j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
466 hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
467 }
468 printf("\n");
469 #endif
470
471 while (j >= 0 &&
472 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
473 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
474 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
475 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
476 j--;
477 }
478 j++; /* Finish backup */
479
480 while (j < nhits3 &&
481 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
482 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
483 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
484 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
485 j++;
486 }
487
488 noverlap = 0;
489 while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
490 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
491 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
492 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
493 hit3 = hits3[j];
494
495 if (hit5->effective_chrnum != hit3->effective_chrnum) {
496 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
497
498 } else if (SENSE_INCONSISTENT_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
499 /* Use sensedir_for_concordance here and not sensedir */
500 debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
501
502 } else if (hit3->genomicend < hit5->genomicstart) {
503 debug(printf(" => scramble because end3 %llu < start5 %llu\n",
504 (unsigned long long) hit3->genomicend,(unsigned long long) hit5->genomicstart));
505
506 } else {
507 debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
508 hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
509 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
510 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
511 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
512 #if 0
513 queryuc_ptr_5,queryuc_ptr_3,
514 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
515 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
516 #endif
517 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
518 if (Stage3pair_distant_splice_p(stage3pair) == true) {
519 *distant_hitpairs = Hitlist_push(*distant_hitpairs,hitlistpool,(void *) stage3pair);
520 } else {
521 if ((score = stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall) < *concordant_score_overall) {
522 debug(printf(" => Updating concordant_score_overall to be %d\n",score));
523 *concordant_score_overall = score;
524 }
525 if ((score = stage3pair->hit5->refalt_score_within_trims + stage3pair->hit3->refalt_score_within_trims) < *concordant_score_within_trims) {
526 *concordant_score_within_trims = score;
527 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
528 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
529 debug(printf(" => Updating concordant_score_within_trims to be %d = %d + %d\n",
530 *concordant_score_within_trims,stage3pair->hit5->refalt_score_within_trims,
531 stage3pair->hit3->refalt_score_within_trims));
532 }
533 if (stage3pair->insertlength <= adjacent_pairlength && score < *adjacent_score) {
534 *adjacent_score = score;
535 }
536 *local_hitpairs = Hitlist_push(*local_hitpairs,hitlistpool,(void *) stage3pair);
537 nconcordant++;
538 }
539 }
540 }
541 debug(printf("\n"));
542
543 j++;
544 }
545 j--; /* Finish advance */
546
547 i++;
548 }
549 }
550
551 return nconcordant;
552 }
553
554
555
556 static int
do_genome_minus(int * adjacent_score,int * concordant_score_overall,int * concordant_score_within_trims,List_T * local_hitpairs,List_T * distant_hitpairs,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)557 do_genome_minus (int *adjacent_score, int *concordant_score_overall, int *concordant_score_within_trims,
558 List_T *local_hitpairs, List_T *distant_hitpairs, T *hits5, int nhits5, T *hits3, int nhits3,
559 int genestrand, int sensedir, int querylength5, int querylength3,
560 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
561 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
562 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
563 #if 0
564 char *queryuc_ptr_5, char *queryuc_ptr_3,
565 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
566 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
567 #endif
568 Listpool_T listpool, Hitlistpool_T hitlistpool) {
569
570 *concordant_score_within_trims = querylength5 + querylength3;
571
572 int nconcordant = 0, noverlap;
573 int i, j;
574 Stage3pair_T stage3pair;
575 T hit5, hit3;
576 Univcoord_T insert_start;
577 int score;
578 int pairmax;
579
580
581 if (nhits3 > 0 && nhits5 > 0) {
582 i = j = 0;
583 while (i < nhits3) {
584 hit3 = hits3[i];
585 if (circularp[hit3->effective_chrnum] == true) {
586 pairmax = pairmax_circular;
587 } else {
588 pairmax = pairmax_linear;
589 }
590 insert_start = hit3->genomicstart - querylength3;
591 #ifdef DEBUG
592 printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
593 i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
594 print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
595 if (j >= 0 && j < nhits5) {
596 printf(" j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
597 hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
598 }
599 printf("\n");
600 #endif
601
602 while (j >= 0 &&
603 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
604 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
605 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
606 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
607 j--;
608 }
609 j++; /* Finish backup */
610
611 while (j < nhits5 &&
612 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
613 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
614 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
615 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
616 j++;
617 }
618
619 noverlap = 0;
620 while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
621 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
622 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
623 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
624 hit5 = hits5[j];
625
626 if (hit3->effective_chrnum != hit5->effective_chrnum) {
627 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
628
629 } else if (SENSE_INCONSISTENT_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
630 /* Use sensedir_for_concordance here and not sensedir */
631 debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
632
633 } else if (hit5->genomicstart < hit3->genomicend) {
634 debug(printf(" => scramble because start5 %llu < end3 %llu\n",
635 (unsigned long long) hit5->genomicstart,(unsigned long long) hit3->genomicend));
636
637 } else {
638 debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
639 hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
640 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
641 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
642 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
643 #if 0
644 queryuc_ptr_5,queryuc_ptr_3,
645 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
646 #endif
647 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
648 if (Stage3pair_distant_splice_p(stage3pair) == true) {
649 *distant_hitpairs = Hitlist_push(*distant_hitpairs,hitlistpool,(void *) stage3pair);
650 } else {
651 if ((score = stage3pair->hit5->ref_score_overall + stage3pair->hit3->ref_score_overall) < *concordant_score_overall) {
652 debug(printf(" => Updating concordant_score_overall to be %d\n",score));
653 *concordant_score_overall = score;
654 }
655 if ((score = stage3pair->hit5->refalt_score_within_trims + stage3pair->hit3->refalt_score_within_trims) < *concordant_score_within_trims) {
656 *concordant_score_within_trims = score;
657 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
658 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
659 debug(printf(" => Updating concordant_score_within_trims to be %d = %d + %d\n",
660 *concordant_score_within_trims,stage3pair->hit5->refalt_score_within_trims,
661 stage3pair->hit3->refalt_score_within_trims));
662 }
663 if (stage3pair->insertlength <= adjacent_pairlength && score < *adjacent_score) {
664 *adjacent_score = score;
665 }
666 *local_hitpairs = Hitlist_push(*local_hitpairs,hitlistpool,(void *) stage3pair);
667 nconcordant++;
668 }
669 }
670 }
671 debug(printf("\n"));
672
673 j++;
674 }
675 j--; /* Finish advance */
676
677 i++;
678 }
679 }
680
681 return nconcordant;
682 }
683
684
685
686 /* Finds concordant pairs if nconcordant is 0 */
687 List_T
Concordance_pair_up_genome(bool * abort_pairing_p,int * adjacent_score,int * concordant_score_overall,List_T * distant_hitpairs,List_T hitpairs,List_T hitlist5_gplus,List_T hitlist5_gminus,List_T hitlist3_gplus,List_T hitlist3_gminus,Ladder_T ladder5_plus,Ladder_T ladder5_minus,Ladder_T ladder3_plus,Ladder_T ladder3_minus,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool,int maxpairedpaths,int genestrand,int sensedir)688 Concordance_pair_up_genome (bool *abort_pairing_p, int *adjacent_score, int *concordant_score_overall,
689 List_T *distant_hitpairs, List_T hitpairs,
690
691 List_T hitlist5_gplus, List_T hitlist5_gminus,
692 List_T hitlist3_gplus, List_T hitlist3_gminus,
693
694 Ladder_T ladder5_plus, Ladder_T ladder5_minus,
695 Ladder_T ladder3_plus, Ladder_T ladder3_minus,
696 int querylength5, int querylength3,
697
698 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
699 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
700 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
701
702 #if 0
703 char *queryuc_ptr_5, char *queryuc_ptr_3,
704 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
705 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
706 #endif
707 Listpool_T listpool, Hitlistpool_T hitlistpool,
708 int maxpairedpaths, int genestrand, int sensedir) {
709 int nconcordant = 0, n; /* Also used as an indicator of when to reset cutoff_level */
710 Ladder_T newladder5_plus, newladder5_minus, newladder3_plus, newladder3_minus;
711
712 T *hits5, *hits3;
713 int nhits5, nhits3;
714
715 int frontier_score, score5, score3;
716 int concordant_score_within_trims;
717 int cutoff_level, level;
718
719
720 debug(printf("Starting Concordance_pair_up_genome\n"));
721
722 #if 0
723 /* Rely instead on Ladder_cutoff, in case there are a few good hits */
724 if (0 && List_length(hitlist5_gplus) > MAX_HITS) {
725 Stage3end_gc(hitlist5_gplus);
726 newladder5_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
727 } else {
728 newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
729 Ladder_merge(ladder5_plus,newladder5_plus);
730 }
731
732 if (0 && List_length(hitlist5_gminus) > MAX_HITS) {
733 Stage3end_gc(hitlist5_gminus);
734 newladder5_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
735 } else {
736 newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
737 Ladder_merge(ladder5_minus,newladder5_minus);
738 }
739
740 if (0 && List_length(hitlist3_gplus) > MAX_HITS) {
741 Stage3end_gc(hitlist3_gplus);
742 newladder3_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
743 } else {
744 newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
745 Ladder_merge(ladder3_plus,newladder3_plus);
746 }
747
748 if (0 && List_length(hitlist3_gminus) > MAX_HITS) {
749 Stage3end_gc(hitlist3_gminus);
750 newladder3_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
751 } else {
752 newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
753 Ladder_merge(ladder3_minus,newladder3_minus);
754 }
755
756 #else
757 newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
758 newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
759 newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
760 newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
761
762 Ladder_merge(ladder5_plus,newladder5_plus,hitlistpool);
763 Ladder_merge(ladder5_minus,newladder5_minus,hitlistpool);
764 Ladder_merge(ladder3_plus,newladder3_plus,hitlistpool);
765 Ladder_merge(ladder3_minus,newladder3_minus,hitlistpool);
766 #endif
767
768 #ifdef DEBUG
769 Ladder_dump_nhits(ladder5_plus);
770 Ladder_dump_nhits(ladder5_minus);
771 Ladder_dump_nhits(ladder3_plus);
772 Ladder_dump_nhits(ladder3_minus);
773 #endif
774
775
776 /* Initial value, which resets to frontier_score + subopt_levels upon our first hitpair */
777 cutoff_level = Ladder_cutoff(newladder5_plus) + Ladder_cutoff(ladder3_plus);
778 if ((level = Ladder_cutoff(newladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
779 cutoff_level = level;
780 }
781 if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(newladder3_plus)) > cutoff_level) {
782 cutoff_level = level;
783 }
784 if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(newladder3_minus)) > cutoff_level) {
785 cutoff_level = level;
786 }
787 /* max_frontier_score = cutoff_level; */
788
789 frontier_score = 0;
790 /* Eliminating check on frontier_score against *found_score, because too greedy and can miss correct result */
791 while (*abort_pairing_p == false && frontier_score <= cutoff_level) {
792 debug(printf("frontier_score = %d, cutoff_level %d\n",frontier_score,cutoff_level));
793 for (score5 = 0; score5 <= frontier_score; score5++) {
794 score3 = frontier_score - score5;
795 debug(printf("score5 = %d, score3 = %d\n",score5,score3));
796
797 /* New hits 5 vs all hits 3 */
798 /* plus/plus: hits5_plus against hits3_plus (really on minus) */
799 if (score5 <= Ladder_cutoff(newladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
800 hits5 = Ladder_hits_for_score(&nhits5,newladder5_plus,hitlistpool,score5);
801 hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
802 debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
803
804 if ((n = do_genome_plus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
805 &hitpairs,&(*distant_hitpairs),
806 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
807 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
808 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
809 #if 0
810 queryuc_ptr_5,queryuc_ptr_3,
811 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
812 #endif
813 listpool,hitlistpool)) > 0) {
814 #if 0
815 /* Too greedy */
816 if (nconcordant == 0) {
817 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
818 cutoff_level = max_frontier_score;
819 }
820 }
821 #else
822 if (concordant_score_within_trims + subopt_levels < cutoff_level) {
823 cutoff_level = concordant_score_within_trims + subopt_levels;
824 debug(printf("Updated cutoff level to be %d\n",cutoff_level));
825 }
826 #endif
827 if ((nconcordant += n) > maxpairedpaths) {
828 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
829 *abort_pairing_p = true;
830 }
831 }
832 }
833
834 /* minus/minus: hits3_minus (really on plus) against newhits5_minus */
835 if (score5 <= Ladder_cutoff(newladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
836 hits5 = Ladder_hits_for_score(&nhits5,newladder5_minus,hitlistpool,score5);
837 hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
838 debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
839 if ((n = do_genome_minus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
840 &hitpairs,&(*distant_hitpairs),
841 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
842 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
843 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
844 #if 0
845 queryuc_ptr_5,queryuc_ptr_3,
846 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
847 #endif
848 listpool,hitlistpool)) > 0) {
849 #if 0
850 /* Too greedy */
851 if (nconcordant == 0) {
852 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
853 cutoff_level = max_frontier_score;
854 }
855 }
856 #else
857 if (concordant_score_within_trims + subopt_levels < cutoff_level) {
858 cutoff_level = concordant_score_within_trims + subopt_levels;
859 debug(printf("Updated cutoff level to be %d\n",cutoff_level));
860 }
861 #endif
862 if ((nconcordant += n) > maxpairedpaths) {
863 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
864 *abort_pairing_p = true;
865 }
866 }
867 }
868
869 /* All hits 5 vs new hits 3 */
870 /* plus/plus: hits5_plus against newhits3_plus (really on minus) */
871 if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(newladder3_plus)) {
872 hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
873 hits3 = Ladder_hits_for_score(&nhits3,newladder3_plus,hitlistpool,score3);
874 debug(printf("at score %d+%d, nhits5_plus = %d, nnewhits3_plus = %d\n",score5,score3,nhits5,nhits3));
875
876 if ((n = do_genome_plus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
877 &hitpairs,&(*distant_hitpairs),
878 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
879 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
880 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
881 #if 0
882 queryuc_ptr_5,queryuc_ptr_3,
883 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
884 #endif
885 listpool,hitlistpool)) > 0) {
886 #if 0
887 /* Too greedy */
888 if (nconcordant == 0) {
889 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
890 cutoff_level = max_frontier_score;
891 }
892 }
893 #else
894 if (concordant_score_within_trims + subopt_levels < cutoff_level) {
895 cutoff_level = concordant_score_within_trims + subopt_levels;
896 debug(printf("Updated cutoff level to be %d\n",cutoff_level));
897 }
898 #endif
899 if ((nconcordant += n) > maxpairedpaths) {
900 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
901 *abort_pairing_p = true;
902 }
903 }
904 }
905
906 /* minus/minus: newhits3_minus (really on plus) against hits5_minus */
907 if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(newladder3_minus)) {
908 hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
909 hits3 = Ladder_hits_for_score(&nhits3,newladder3_minus,hitlistpool,score3);
910
911 debug(printf("at score %d+%d, nhits5_minus = %d, nnewhits3_minus = %d\n",score5,score3,nhits5,nhits3));
912 if ((n = do_genome_minus(&(*adjacent_score),&(*concordant_score_overall),&concordant_score_within_trims,
913 &hitpairs,&(*distant_hitpairs),
914 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
915 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
916 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
917 #if 0
918 queryuc_ptr_5,queryuc_ptr_3,
919 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
920 #endif
921 listpool,hitlistpool)) > 0) {
922 #if 0
923 /* Too greedy */
924 if (nconcordant == 0) {
925 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
926 cutoff_level = max_frontier_score;
927 }
928 }
929 #else
930 if (concordant_score_within_trims + subopt_levels < cutoff_level) {
931 cutoff_level = concordant_score_within_trims + subopt_levels;
932 debug(printf("Updated cutoff level to be %d\n",cutoff_level));
933 }
934 #endif
935 if ((nconcordant += n) > maxpairedpaths) {
936 debug(printf(" -- %d concordant paths exceeds %d",nconcordant,maxpairedpaths));
937 *abort_pairing_p = true;
938 }
939 }
940 }
941 }
942
943 frontier_score++;
944 }
945
946
947 Ladder_free(&newladder5_plus);
948 Ladder_free(&newladder5_minus);
949 Ladder_free(&newladder3_plus);
950 Ladder_free(&newladder3_minus);
951
952 Ladder_gc_duplicates(ladder5_plus);
953 Ladder_gc_duplicates(ladder5_minus);
954 Ladder_gc_duplicates(ladder3_plus);
955 Ladder_gc_duplicates(ladder3_minus);
956
957 debug(printf("Finished with Concordance_pair_up_genome: %d concordant\n",List_length(hitpairs)));
958
959 return hitpairs;
960 }
961
962
963 static int
do_distant_plus_plus(int * concordant_score,List_T * hitpairs,List_T * conc_transloc,List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)964 do_distant_plus_plus (int *concordant_score, List_T *hitpairs,
965 List_T *conc_transloc, List_T *samechr,
966 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
967 int querylength5, int querylength3,
968 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
969 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
970 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
971 #if 0
972 char *queryuc_ptr_5, char *queryuc_ptr_3,
973 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
974 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
975 #endif
976 Listpool_T listpool, Hitlistpool_T hitlistpool) {
977
978 int nfound = 0, noverlap;
979 int i, j;
980 Stage3pair_T stage3pair;
981 T hit5, hit3;
982 int score;
983 Univcoord_T insert_start;
984 Chrpos_T pairmax;
985
986 if (nhits5 > 0 && nhits3 > 0) {
987 i = j = 0;
988 while (i < nhits5) {
989 hit5 = hits5[i];
990 if (circularp[hit5->effective_chrnum] == true) {
991 pairmax = pairmax_circular;
992 } else {
993 pairmax = pairmax_linear;
994 }
995
996 insert_start = hit5->genomicend - querylength5;
997 #ifdef DEBUG
998 printf("plus/plus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
999 i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
1000 print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5->circularalias,hit5);
1001 if (j >= 0 && j < nhits3) {
1002 printf(" j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
1003 hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
1004 }
1005 printf("\n");
1006 #endif
1007
1008 while (j >= 0 &&
1009 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
1010 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1011 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1012 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1013 j--;
1014 }
1015 j++; /* Finish backup */
1016
1017 while (j < nhits3 &&
1018 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
1019 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1020 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1021 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1022 j++;
1023 }
1024
1025 noverlap = 0;
1026 while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
1027 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1028 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1029 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]->circularalias,hits3[j]));
1030 hit3 = hits3[j];
1031
1032 if (hit5->effective_chrnum != hit3->effective_chrnum) {
1033 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1034
1035 } else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1036 /* Could potentially miss an alignment if the two ends overlap */
1037 debug(printf(" => double splice translocations"));
1038
1039 } else if (hit5->distant_splice_p == true || hit3->distant_splice_p == true) {
1040 debug(printf(" => conc_transloc effchr %d (chrnum5 %d, chrnum3 %d)",
1041 hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1042 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
1043 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1044 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1045 #if 0
1046 queryuc_ptr_5,queryuc_ptr_3,
1047 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1048 #endif
1049 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1050 *conc_transloc = Hitlist_push(*conc_transloc,hitlistpool,(void *) stage3pair);
1051 nfound++;
1052 }
1053
1054 } else if (SENSE_INCONSISTENT_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
1055 /* Use sensedir_for_concordance here and not sensedir */
1056 debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
1057
1058 } else if (hit3->genomicend < hit5->genomicstart) {
1059 debug(printf(" => scramble because end3 %llu < start5 %llu\n",
1060 (unsigned long long) hit3->genomicend,(unsigned long long) hit5->genomicstart));
1061 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1062 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1063 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1064 #if 0
1065 queryuc_ptr_5,queryuc_ptr_3,
1066 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1067 #endif
1068 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1069 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1070 nfound++;
1071 }
1072
1073 } else {
1074 /* Unexpected? */
1075 debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
1076 hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1077 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
1078 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1079 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1080 #if 0
1081 queryuc_ptr_5,queryuc_ptr_3,
1082 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1083 #endif
1084 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1085
1086 if ((score = (stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims) +
1087 (stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims)) < *concordant_score) {
1088 *concordant_score = score;
1089 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
1090 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
1091 debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
1092 *concordant_score,stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims,
1093 stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims));
1094 }
1095 *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
1096 nfound++;
1097 }
1098 }
1099 debug(printf("\n"));
1100
1101 j++;
1102 }
1103 j--; /* Finish advance */
1104
1105 i++;
1106 }
1107 }
1108
1109 return nfound;
1110 }
1111
1112
1113 static int
do_distant_minus_minus(int * concordant_score,List_T * hitpairs,List_T * conc_transloc,List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1114 do_distant_minus_minus (int *concordant_score, List_T *hitpairs,
1115 List_T *conc_transloc, List_T *samechr,
1116 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1117 int querylength5, int querylength3,
1118 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1119 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1120 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1121 #if 0
1122 char *queryuc_ptr_5, char *queryuc_ptr_3,
1123 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1124 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1125 #endif
1126 Listpool_T listpool, Hitlistpool_T hitlistpool) {
1127
1128 int nfound = 0, noverlap;
1129 int i, j;
1130 Stage3pair_T stage3pair;
1131 T hit5, hit3;
1132 int score;
1133 Univcoord_T insert_start;
1134 Chrpos_T pairmax;
1135
1136 if (nhits3 > 0 && nhits5 > 0) {
1137 i = j = 0;
1138 while (i < nhits3) {
1139 hit3 = hits3[i];
1140 if (circularp[hit3->effective_chrnum] == true) {
1141 pairmax = pairmax_circular;
1142 } else {
1143 pairmax = pairmax_linear;
1144 }
1145 insert_start = hit3->genomicstart - querylength3;
1146 #ifdef DEBUG
1147 printf("minus/minus: i=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1148 i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
1149 print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3->circularalias,hit3);
1150 if (j >= 0 && j < nhits5) {
1151 printf(" j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
1152 hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
1153 }
1154 printf("\n");
1155 #endif
1156
1157 while (j >= 0 &&
1158 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
1159 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1160 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1161 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1162 j--;
1163 }
1164 j++; /* Finish backup */
1165
1166 while (j < nhits5 &&
1167 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
1168 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p\n",
1169 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1170 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1171 j++;
1172 }
1173
1174 noverlap = 0;
1175 while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
1176 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s circularalias:%d %p",
1177 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1178 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]->circularalias,hits5[j]));
1179 hit5 = hits5[j];
1180
1181 /* Do want to see pairs previously seen */
1182 if (hit3->effective_chrnum != hit5->effective_chrnum) {
1183 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1184
1185 } else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1186 /* Could potentially miss an alignment if the two ends overlap */
1187 debug(printf(" => double splice translocations"));
1188
1189 } else if (hit5->distant_splice_p == true || hit3->distant_splice_p == true) {
1190 debug(printf(" => conc_transloc effchr %d (chrnum5 %d, chrnum3 %d)",
1191 hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1192 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
1193 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1194 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1195 #if 0
1196 queryuc_ptr_5,queryuc_ptr_3,
1197 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1198 #endif
1199 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1200 *conc_transloc = Hitlist_push(*conc_transloc,hitlistpool,(void *) stage3pair);
1201 nfound++;
1202 }
1203
1204 } else if (SENSE_INCONSISTENT_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
1205 /* Use sensedir_for_concordance here and not sensedir */
1206 debug(printf(" => sense inconsistent: %d | %d = %d",hit5->sensedir_for_concordance,hit3->sensedir_for_concordance,hit5->sensedir_for_concordance|hit3->sensedir_for_concordance));
1207
1208 } else if (hit5->genomicstart < hit3->genomicend) {
1209 debug(printf(" => scramble because start5 %llu < end3 %llu\n",
1210 (unsigned long long) hit5->genomicstart,(unsigned long long) hit3->genomicend));
1211 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1212 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1213 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1214 #if 0
1215 queryuc_ptr_5,queryuc_ptr_3,
1216 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1217 #endif
1218 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1219 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1220 nfound++;
1221 }
1222
1223 } else {
1224 /* Unexpected? */
1225 debug(printf(" => concordant effchr %d (chrnum5 %d, chrnum3 %d)",
1226 hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1227 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/CONCORDANT,
1228 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1229 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1230 #if 0
1231 queryuc_ptr_5,queryuc_ptr_3,
1232 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1233 #endif
1234 listpool,/*expect_concordant_p*/true,/*transcriptome_guided_p*/false)) != NULL) {
1235
1236 if ((score = (stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims) +
1237 (stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims)) < *concordant_score) {
1238 *concordant_score = score;
1239 assert(stage3pair->hit5->refalt_nmatches_plus_spliced_trims <= stage3pair->hit5->querylength);
1240 assert(stage3pair->hit3->refalt_nmatches_plus_spliced_trims <= stage3pair->hit3->querylength);
1241 debug(printf(" => Updating concordant_score to be %d = %d + %d\n",
1242 *concordant_score,stage3pair->hit5->querylength - stage3pair->hit5->refalt_nmatches_plus_spliced_trims,
1243 stage3pair->hit3->querylength - stage3pair->hit3->refalt_nmatches_plus_spliced_trims));
1244 }
1245 *hitpairs = Hitlist_push(*hitpairs,hitlistpool,(void *) stage3pair);
1246 nfound++;
1247 }
1248 }
1249 debug(printf("\n"));
1250
1251 j++;
1252 }
1253 j--; /* Finish advance */
1254
1255 i++;
1256 }
1257 }
1258
1259 return nfound;
1260 }
1261
1262
1263 static int
do_distant_plus_minus(List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1264 do_distant_plus_minus (List_T *samechr,
1265 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1266 int querylength5, int querylength3,
1267 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1268 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1269 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1270 #if 0
1271 char *queryuc_ptr_5, char *queryuc_ptr_3,
1272 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1273 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1274 #endif
1275 Listpool_T listpool, Hitlistpool_T hitlistpool) {
1276
1277 int nfound = 0, noverlap;
1278 int i, j;
1279 Stage3pair_T stage3pair;
1280 T hit5, hit3;
1281 Univcoord_T insert_start;
1282 Chrpos_T pairmax;
1283
1284 if (nhits5 > 0 && nhits3 > 0) {
1285 i = j = 0;
1286 while (i < nhits5) {
1287 hit5 = hits5[i];
1288 if (circularp[hit5->effective_chrnum] == true) {
1289 pairmax = pairmax_circular;
1290 } else {
1291 pairmax = pairmax_linear;
1292 }
1293 insert_start = hit5->genomicend - querylength5;
1294 #ifdef DEBUG
1295 printf("plus/minus: i=%d/%d #%d:%u..%u %s %s %p",
1296 i,nhits5,hit5->effective_chrnum,hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
1297 print_sense(hit5->sensedir_for_concordance),Method_string(hit5->method),hit5);
1298 if (j >= 0 && j < nhits3) {
1299 printf(" j=%d/%d #%d:%u..%u %p",j,nhits3,hits3[j]->effective_chrnum,
1300 hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,hits3[j]);
1301 }
1302 printf("\n");
1303 #endif
1304
1305 while (j >= 0 &&
1306 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax > insert_start) {
1307 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s %p\n",
1308 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1309 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1310 j--;
1311 }
1312 j++; /* Finish backup */
1313
1314 while (j < nhits3 &&
1315 hits3[j]->genomicstart + querylength3 /* for scramble: */ + pairmax <= insert_start) {
1316 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s %p\n",
1317 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1318 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1319 j++;
1320 }
1321
1322 noverlap = 0;
1323 while (noverlap++ < MAX_OVERLAP && j < nhits3 && hits3[j]->genomicstart + querylength3 <= pairmax + insert_start) {
1324 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s %p",
1325 j,nhits3,hits3[j]->effective_chrnum,hits3[j]->genomicstart - hits3[j]->chroffset,hits3[j]->genomicend - hits3[j]->chroffset,
1326 print_sense(hits3[j]->sensedir_for_concordance),Method_string(hits3[j]->method),hits3[j]));
1327 hit3 = hits3[j];
1328
1329 if (hit5->effective_chrnum != hit3->effective_chrnum) {
1330 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1331
1332 } else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1333 debug(printf(" => double splice translocations"));
1334
1335 } else if (SENSE_INCONSISTENT_FOR_INVERSION_P(hit5->sensedir_for_concordance,hit3->sensedir_for_concordance)) {
1336 /* Use sensedir_for_concordance here and not sensedir */
1337 debug(printf(" => sense inconsistent for inversion"));
1338 #if 0
1339 } else if (hits3[j]->genomicstart + querylength3 <= insert_start) {
1340 debug(printf(" => scramble"));
1341 if (nsamechr <= maxpairedpaths &&
1342 (stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1343 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1344 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1345 #if 0
1346 queryuc_ptr_5,queryuc_ptr_3,
1347 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1348 #endif
1349 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1350 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1351 nfound++;
1352 }
1353 #endif
1354 } else {
1355 debug(printf(" => inversion effchr %d (chrnum5 %d, chrnum3 %d)",
1356 hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
1357 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_INVERSION,
1358 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1359 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1360 #if 0
1361 queryuc_ptr_5,queryuc_ptr_3,
1362 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1363 #endif
1364 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1365 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1366 nfound++;
1367 }
1368 }
1369 debug(printf("\n"));
1370
1371 j++;
1372 }
1373 j--; /* Finish advance */
1374
1375 i++;
1376 }
1377 }
1378
1379 return nfound;
1380 }
1381
1382
1383 static int
do_distant_minus_plus(List_T * samechr,T * hits5,int nhits5,T * hits3,int nhits3,int genestrand,int sensedir,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool)1384 do_distant_minus_plus (List_T *samechr,
1385 T *hits5, int nhits5, T *hits3, int nhits3, int genestrand, int sensedir,
1386 int querylength5, int querylength3,
1387 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1388 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1389 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1390 #if 0
1391 char *queryuc_ptr_5, char *queryuc_ptr_3,
1392 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1393 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1394 #endif
1395 Listpool_T listpool, Hitlistpool_T hitlistpool) {
1396
1397 int nfound = 0, noverlap;
1398 int i, j;
1399 Stage3pair_T stage3pair;
1400 T hit5, hit3;
1401 Univcoord_T insert_start;
1402 Chrpos_T pairmax;
1403
1404 if (nhits3 > 0 && nhits5 > 0) {
1405 i = j = 0;
1406 while (i < nhits3) {
1407 hit3 = hits3[i];
1408 if (circularp[hit3->effective_chrnum] == true) {
1409 pairmax = pairmax_circular;
1410 } else {
1411 pairmax = pairmax_linear;
1412 }
1413 insert_start = hit3->genomicstart - querylength3;
1414 #ifdef DEBUG
1415 printf("minus/plus: i=%d/%d #%d:%u..%u %s %s %p",
1416 i,nhits3,hit3->effective_chrnum,hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,
1417 print_sense(hit3->sensedir_for_concordance),Method_string(hit3->method),hit3);
1418 if (j >= 0 && j < nhits5) {
1419 printf(" j=%d/%d #%d:%u..%u %p",j,nhits5,hits5[j]->effective_chrnum,
1420 hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,hits5[j]);
1421 }
1422 printf("\n");
1423 #endif
1424
1425 while (j >= 0 &&
1426 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax > insert_start) {
1427 debug(printf(" backup: j=%d/%d #%d:%u..%u %s %s %p\n",
1428 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1429 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1430 j--;
1431 }
1432 j++; /* Finish backup */
1433
1434 while (j < nhits5 &&
1435 hits5[j]->genomicend + querylength5 /* for scramble: */ + pairmax <= insert_start) {
1436 debug(printf(" advance: j=%d/%d #%d:%u..%u %s %s %p\n",
1437 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1438 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1439 j++;
1440 }
1441
1442 noverlap = 0;
1443 while (noverlap++ < MAX_OVERLAP && j < nhits5 && hits5[j]->genomicend + querylength5 <= pairmax + insert_start) {
1444 debug(printf(" overlap: j=%d/%d #%d:%u..%u %s %s %p",
1445 j,nhits5,hits5[j]->effective_chrnum,hits5[j]->genomicstart - hits5[j]->chroffset,hits5[j]->genomicend - hits5[j]->chroffset,
1446 print_sense(hits5[j]->sensedir_for_concordance),Method_string(hits5[j]->method),hits5[j]));
1447 hit5 = hits5[j];
1448
1449 if (hit3->effective_chrnum != hit5->effective_chrnum) {
1450 debug(printf(" => diff chrs %d and %d",hit5->effective_chrnum,hit3->effective_chrnum));
1451
1452 } else if (hit5->distant_splice_p == true && hit3->distant_splice_p == true /* && hit5->other_chrnum != hit3->other_chrnum */) {
1453 debug(printf(" => double splice translocations"));
1454
1455 } else if (SENSE_INCONSISTENT_FOR_INVERSION_P(hit3->sensedir_for_concordance,hit5->sensedir_for_concordance)) {
1456 /* Use sensedir_for_concordance here and not sensedir */
1457 debug(printf(" => sense inconsistent for inversion"));
1458 #if 0
1459 } else if (hits5[j]->genomicend + querylength5 <= insert_start) {
1460 debug(printf(" => scramble"));
1461 if ((*nsamechr) <= maxpairedpaths &&
1462 (stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_SCRAMBLE,
1463 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1464 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1465 #if 0
1466 queryuc_ptr_5,queryuc_ptr_3,
1467 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1468 #endif
1469 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1470 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1471 nfound++;
1472 }
1473 #endif
1474 } else {
1475 debug(printf(" => inversion effchr %d (chrnum5 %d, chrnum3 %d)",
1476 hit3->effective_chrnum,hit5->chrnum,hit3->chrnum));
1477 if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,sensedir,/*pairtype*/PAIRED_INVERSION,
1478 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1479 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1480 #if 0
1481 queryuc_ptr_5,queryuc_ptr_3,
1482 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1483 #endif
1484 listpool,/*expect_concordant_p*/false,/*transcriptome_guided_p*/false)) != NULL) {
1485 *samechr = Hitlist_push(*samechr,hitlistpool,(void *) stage3pair);
1486 nfound++;
1487 }
1488 }
1489 debug(printf("\n"));
1490
1491 j++;
1492 }
1493 j--; /* Finish advance */
1494
1495 i++;
1496 }
1497 }
1498
1499 return nfound;
1500 }
1501
1502
1503
1504 List_T
Concordance_pair_up_distant(bool * abort_pairing_p,int * concordant_score,List_T * samechr,List_T * conc_transloc,List_T hitpairs,List_T hitlist5_gplus,List_T hitlist5_gminus,List_T hitlist3_gplus,List_T hitlist3_gminus,Ladder_T ladder5_plus,Ladder_T ladder5_minus,Ladder_T ladder3_plus,Ladder_T ladder3_minus,int querylength5,int querylength3,int * mismatch_positions_alloc_5,int * mismatch_positions_alloc_3,Compress_T query5_compress_fwd,Compress_T query5_compress_rev,Compress_T query3_compress_fwd,Compress_T query3_compress_rev,Listpool_T listpool,Hitlistpool_T hitlistpool,int maxpairedpaths,int genestrand,int sensedir)1505 Concordance_pair_up_distant (bool *abort_pairing_p, int *concordant_score,
1506 List_T *samechr, List_T *conc_transloc, List_T hitpairs,
1507
1508 List_T hitlist5_gplus, List_T hitlist5_gminus,
1509 List_T hitlist3_gplus, List_T hitlist3_gminus,
1510
1511 Ladder_T ladder5_plus, Ladder_T ladder5_minus,
1512 Ladder_T ladder3_plus, Ladder_T ladder3_minus,
1513 int querylength5, int querylength3,
1514
1515 int *mismatch_positions_alloc_5, int *mismatch_positions_alloc_3,
1516 Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
1517 Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
1518 #if 0
1519 char *queryuc_ptr_5, char *queryuc_ptr_3,
1520 Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
1521 Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool,
1522 #endif
1523 Listpool_T listpool, Hitlistpool_T hitlistpool,
1524 int maxpairedpaths, int genestrand, int sensedir) {
1525 int nfound = 0, n;
1526 int max_frontier_score, frontier_score, score5, score3;
1527 int cutoff_level, level;
1528 Ladder_T newladder5_plus, newladder5_minus, newladder3_plus, newladder3_minus;
1529
1530 T *hits5, *hits3;
1531 int nhits5, nhits3;
1532
1533
1534 debug(printf("Starting Concordance_pair_up_distant\n"));
1535
1536 #if 0
1537 /* Rely instead on Ladder_cutoff, in case there are a few good hits */
1538 if (0 && List_length(hitlist5_gplus) > MAX_HITS) {
1539 Stage3end_gc(hitlist5_gplus);
1540 newladder5_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
1541 } else {
1542 newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
1543 Ladder_merge(ladder5_plus,newladder5_plus);
1544 }
1545
1546 if (0 && List_length(hitlist5_gminus) > MAX_HITS) {
1547 Stage3end_gc(hitlist5_gminus);
1548 newladder5_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/true);
1549 } else {
1550 newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
1551 Ladder_merge(ladder5_minus,newladder5_minus);
1552 }
1553
1554 if (0 && List_length(hitlist3_gplus) > MAX_HITS) {
1555 Stage3end_gc(hitlist3_gplus);
1556 newladder3_plus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
1557 } else {
1558 newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
1559 Ladder_merge(ladder3_plus,newladder3_plus);
1560 }
1561
1562 if (0 && List_length(hitlist3_gminus) > MAX_HITS) {
1563 Stage3end_gc(hitlist3_gminus);
1564 newladder3_minus = Ladder_new((List_T) NULL,hitlistpool,/*end5p*/false);
1565 } else {
1566 newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
1567 Ladder_merge(ladder3_minus,newladder3_minus);
1568 }
1569
1570 #else
1571 newladder5_plus = Ladder_new(hitlist5_gplus,hitlistpool,/*end5p*/true);
1572 newladder5_minus = Ladder_new(hitlist5_gminus,hitlistpool,/*end5p*/true);
1573 newladder3_plus = Ladder_new(hitlist3_gplus,hitlistpool,/*end5p*/false);
1574 newladder3_minus = Ladder_new(hitlist3_gminus,hitlistpool,/*end5p*/false);
1575
1576 Ladder_merge(ladder5_plus,newladder5_plus,hitlistpool);
1577 Ladder_merge(ladder5_minus,newladder5_minus,hitlistpool);
1578 Ladder_merge(ladder3_plus,newladder3_plus,hitlistpool);
1579 Ladder_merge(ladder3_minus,newladder3_minus,hitlistpool);
1580 #endif
1581
1582
1583 cutoff_level = Ladder_cutoff(newladder5_plus) + Ladder_cutoff(ladder3_plus);
1584 if ((level = Ladder_cutoff(newladder5_minus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
1585 cutoff_level = level;
1586 }
1587 if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(newladder3_plus)) > cutoff_level) {
1588 cutoff_level = level;
1589 }
1590 if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(newladder3_minus)) > cutoff_level) {
1591 cutoff_level = level;
1592 }
1593 if ((level = Ladder_cutoff(ladder5_plus) + Ladder_cutoff(ladder3_minus)) > cutoff_level) {
1594 cutoff_level = level;
1595 }
1596 if ((level = Ladder_cutoff(ladder5_minus) + Ladder_cutoff(ladder3_plus)) > cutoff_level) {
1597 cutoff_level = level;
1598 }
1599 max_frontier_score = cutoff_level;
1600
1601 frontier_score = 0;
1602 while (frontier_score <= cutoff_level) {
1603 debug(printf("frontier_score = %d\n",frontier_score));
1604 for (score5 = 0; score5 <= frontier_score; score5++) {
1605 score3 = frontier_score - score5;
1606 debug(printf("score5 = %d, score3 = %d\n",score5,score3));
1607
1608 /* New hits 5 vs all hits 3 */
1609 /* plus/plus: hits5_plus against hits3_plus (really on minus) */
1610 if (score5 <= Ladder_cutoff(newladder5_plus) && score3 <= Ladder_cutoff(ladder3_plus)) {
1611 hits5 = Ladder_hits_for_score(&nhits5,newladder5_plus,hitlistpool,score5);
1612 hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
1613 debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1614 if ((n = do_distant_plus_plus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1615 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1616 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1617 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1618 #if 0
1619 queryuc_ptr_5,queryuc_ptr_3,
1620 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1621 #endif
1622 listpool,hitlistpool)) > 0) {
1623 if (nfound == 0) {
1624 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1625 cutoff_level = max_frontier_score;
1626 }
1627 }
1628 if ((nfound += n) > maxpairedpaths) {
1629 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1630 *abort_pairing_p = true;
1631 }
1632 }
1633 }
1634
1635 /* minus/minus: hits3_minus (really on plus) against hits5_minus */
1636 if (score5 <= Ladder_cutoff(newladder5_minus) && score3 <= Ladder_cutoff(ladder3_minus)) {
1637 hits5 = Ladder_hits_for_score(&nhits5,newladder5_minus,hitlistpool,score5);
1638 hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
1639 debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1640 if ((n = do_distant_minus_minus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1641 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1642 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1643 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1644 #if 0
1645 queryuc_ptr_5,queryuc_ptr_3,
1646 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1647 #endif
1648 listpool,hitlistpool)) > 0) {
1649 if (nfound == 0) {
1650 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1651 cutoff_level = max_frontier_score;
1652 }
1653 }
1654 if ((nfound += n) > maxpairedpaths) {
1655 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1656 *abort_pairing_p = true;
1657 }
1658 }
1659 }
1660
1661
1662 /* All hits 5 vs new hits 3 */
1663 /* plus/plus: hits5_plus against hits3_plus (really on minus) */
1664 if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(newladder3_plus)) {
1665 hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
1666 hits3 = Ladder_hits_for_score(&nhits3,newladder3_plus,hitlistpool,score3);
1667 debug(printf("at score %d+%d, nnewhits5_plus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1668 if ((n = do_distant_plus_plus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1669 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1670 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1671 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1672 #if 0
1673 queryuc_ptr_5,queryuc_ptr_3,
1674 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1675 #endif
1676 listpool,hitlistpool)) > 0) {
1677
1678 if (nfound == 0) {
1679 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1680 cutoff_level = max_frontier_score;
1681 }
1682 }
1683 if ((nfound += n) > maxpairedpaths) {
1684 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1685 *abort_pairing_p = true;
1686 }
1687 }
1688 }
1689
1690 /* minus/minus: hits3_minus (really on plus) against hits5_minus */
1691 if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(newladder3_minus)) {
1692 hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
1693 hits3 = Ladder_hits_for_score(&nhits3,newladder3_minus,hitlistpool,score3);
1694 debug(printf("at score %d+%d, nnewhits5_minus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1695 if ((n = do_distant_minus_minus(&(*concordant_score),&hitpairs,&(*conc_transloc),&(*samechr),
1696 hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1697 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1698 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1699 #if 0
1700 queryuc_ptr_5,queryuc_ptr_3,
1701 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1702 #endif
1703 listpool,hitlistpool)) > 0) {
1704 if (nfound == 0) {
1705 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1706 cutoff_level = max_frontier_score;
1707 }
1708 }
1709 if ((nfound += n) > maxpairedpaths) {
1710 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1711 *abort_pairing_p = true;
1712 }
1713 }
1714 }
1715
1716
1717 /* All hits5 vs all hits3 */
1718 /* plus/minus (inversions): hits5_plus against hits3_minus */
1719 if (score5 <= Ladder_cutoff(ladder5_plus) && score3 <= Ladder_cutoff(ladder3_minus)) {
1720 hits5 = Ladder_hits_for_score(&nhits5,ladder5_plus,hitlistpool,score5);
1721 hits3 = Ladder_hits_for_score(&nhits3,ladder3_minus,hitlistpool,score3);
1722 debug(printf("at score %d+%d, nhits5_plus = %d, nhits3_minus = %d\n",score5,score3,nhits5,nhits3));
1723 if ((n = do_distant_plus_minus(&(*samechr),hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1724 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1725 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1726 #if 0
1727 queryuc_ptr_5,queryuc_ptr_3,
1728 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1729 #endif
1730 listpool,hitlistpool)) > 0) {
1731 if (nfound == 0) {
1732 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1733 cutoff_level = max_frontier_score;
1734 }
1735 }
1736 if ((nfound += n) > maxpairedpaths) {
1737 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1738 *abort_pairing_p = true;
1739 }
1740 }
1741 }
1742
1743 /* minus/plus (inversions): hits3_plus against hits5_minus */
1744 if (score5 <= Ladder_cutoff(ladder5_minus) && score3 <= Ladder_cutoff(ladder3_plus)) {
1745 hits5 = Ladder_hits_for_score(&nhits5,ladder5_minus,hitlistpool,score5);
1746 hits3 = Ladder_hits_for_score(&nhits3,ladder3_plus,hitlistpool,score3);
1747 debug(printf("at score %d+%d, nhits5_minus = %d, nhits3_plus = %d\n",score5,score3,nhits5,nhits3));
1748 if ((n = do_distant_minus_plus(&(*samechr),hits5,nhits5,hits3,nhits3,genestrand,sensedir,querylength5,querylength3,
1749 mismatch_positions_alloc_5,mismatch_positions_alloc_3,
1750 query5_compress_fwd,query5_compress_rev,query3_compress_fwd,query3_compress_rev,
1751 #if 0
1752 queryuc_ptr_5,queryuc_ptr_3,
1753 pairpool,dynprogL,dynprogM,dynprogR,oligoindices_minor,diagpool,cellpool,
1754 #endif
1755 listpool,hitlistpool)) > 0) {
1756 if (nfound == 0) {
1757 if ((cutoff_level = frontier_score + subopt_levels) > max_frontier_score) {
1758 cutoff_level = max_frontier_score;
1759 }
1760 }
1761 if ((nfound += n) > maxpairedpaths) {
1762 debug(printf(" -- %d found paths exceeds %d",nfound,maxpairedpaths));
1763 *abort_pairing_p = true;
1764 }
1765 }
1766 }
1767 }
1768
1769 frontier_score++;
1770 }
1771
1772
1773 Ladder_free(&newladder5_plus);
1774 Ladder_free(&newladder5_minus);
1775 Ladder_free(&newladder3_plus);
1776 Ladder_free(&newladder3_minus);
1777
1778 Ladder_gc_duplicates(ladder5_plus);
1779 Ladder_gc_duplicates(ladder5_minus);
1780 Ladder_gc_duplicates(ladder3_plus);
1781 Ladder_gc_duplicates(ladder3_minus);
1782
1783 debug(printf("Finished with Concordance_pair_up_distant: %d concordant, %d samechr, %d conc_transloc\n",
1784 List_length(hitpairs),List_length(*samechr),List_length(*conc_transloc)));
1785
1786 return hitpairs;
1787 }
1788
1789
1790
1791 #if 0
1792 static int
1793 path_ascending_cmp (const void *a, const void *b) {
1794 List_T x = * (List_T *) a;
1795 List_T y = * (List_T *) b;
1796 Univdiag_T diagonalx, diagonaly;
1797 Univcoord_T path_start_x, path_start_y, path_end_x, path_end_y;
1798
1799 diagonalx = List_head(x);
1800 diagonaly = List_head(y);
1801 path_start_x = diagonalx->univdiagonal;
1802 path_start_y = diagonaly->univdiagonal;
1803
1804 if (path_start_x < path_start_y) {
1805 return -1;
1806 } else if (path_start_y < path_start_x) {
1807 return +1;
1808 } else {
1809 diagonalx = (Univdiag_T) List_last_value(x);
1810 diagonaly = (Univdiag_T) List_last_value(y);
1811 path_end_x = diagonalx->univdiagonal;
1812 path_end_y = diagonaly->univdiagonal;
1813 if (path_end_x < path_end_y) {
1814 return -1;
1815 } else if (path_end_y < path_end_x) {
1816 return +1;
1817 } else {
1818 return 0;
1819 }
1820 }
1821 }
1822 #endif
1823
1824
1825 #if 0
1826 static int
1827 hit_ascending_cmp (const void *a, const void *b) {
1828 T x = * (T *) a;
1829 T y = * (T *) b;
1830
1831 if (x->high < y->high) {
1832 return -1;
1833 } else if (y->high < x->high) {
1834 return +1;
1835 } else if (x->low < y->low) {
1836 return -1;
1837 } else if (y->low < x->low) {
1838 return +1;
1839 } else {
1840 return 0;
1841 }
1842 }
1843 #endif
1844
1845
1846 void
Concordance_setup(int subopt_levels_in,Chrpos_T pairmax_transcriptome_in,Chrpos_T pairmax_linear_in,Chrpos_T pairmax_circular_in,Chrpos_T expected_pairlength_in,Chrpos_T pairlength_deviation_in,bool * circularp_in,bool merge_samechr_p_in)1847 Concordance_setup (int subopt_levels_in, Chrpos_T pairmax_transcriptome_in,
1848 Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in,
1849 Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in,
1850 bool *circularp_in, bool merge_samechr_p_in) {
1851 subopt_levels = subopt_levels_in;
1852
1853 pairmax_transcriptome = pairmax_transcriptome_in;
1854 pairmax_linear = pairmax_linear_in;
1855 pairmax_circular = pairmax_circular_in;
1856
1857 expected_pairlength = expected_pairlength_in;
1858 pairlength_deviation = pairlength_deviation_in;
1859 adjacent_pairlength = expected_pairlength + pairlength_deviation;
1860
1861 circularp = circularp_in;
1862 merge_samechr_p = merge_samechr_p_in;
1863
1864 return;
1865 }
1866