1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "runtime.H"
19 
20 #include "system.H"
21 #include "sequence.H"
22 
23 #include "sqStore.H"
24 #include "sqCache.H"
25 #include "ovStore.H"
26 
27 #include "edlib.H"
28 
29 #include "overlapAlign-globalData.H"
30 #include "overlapAlign-threadData.H"
31 #include "overlapAlign-computation.H"
32 
33 
34 
35 double   localMatch     =  1;
36 double   localMismatch  = -1;
37 double   localGap       = -1;
38 
39 double
getScore(EdlibAlignResult result,int32 pp)40 getScore(EdlibAlignResult result, int32 pp) {
41 
42   if ((pp < 0) ||
43       (pp >= result.alignmentLength))
44     return(localMatch);
45 
46   switch (result.alignment[pp]) {
47     case EDLIB_EDOP_MATCH:     return(localMatch);     break;
48     case EDLIB_EDOP_INSERT:    return(localGap);       break;
49     case EDLIB_EDOP_DELETE:    return(localGap);       break;
50     case EDLIB_EDOP_MISMATCH:  return(localMismatch);  break;
51     default:                   return(0);              break;
52   }
53 }
54 
55 
56 bool
testAlignment(char * aRead,int32 & abgn,int32 & aend,int32 alen,uint32 Aid,char * bRead,int32 & bbgn,int32 & bend,int32 blen,uint32 Bid,double maxAlignErate,double maxAcceptErate,double & erate)57 testAlignment(char   *aRead,  int32  &abgn,  int32  &aend,  int32         alen,   uint32 Aid,
58               char   *bRead,  int32  &bbgn,  int32  &bend,  int32         blen,   uint32 Bid,
59               double  maxAlignErate,
60               double  maxAcceptErate,
61               double &erate) {
62 
63   assert(abgn >= 0);
64   assert(aend <= alen);
65   assert(bbgn >= 0);
66   assert(bend <= blen);
67 
68   erate = 1.0;   //  Set the default return value, 100% error.
69 
70   //  Compute an alignment with the alignment path.
71 
72   EdlibAlignResult result = edlibAlign(aRead + abgn, aend - abgn,
73                                        bRead + bbgn, bend - bbgn,
74                                        edlibNewAlignConfig((int32)ceil(1.1 * maxAlignErate * ((aend - abgn) + (bend - bbgn)) / 2.0),
75                                                            EDLIB_MODE_HW,
76                                                            EDLIB_TASK_PATH));
77 
78   //  If no result, free it and return that the alignment failed.
79 
80   if (result.numLocations == 0) {
81     edlibFreeAlignResult(result);
82     return(false);
83   }
84 
85   //  Compute the (approximate) length of the alignment, for EDLIB_TASK_LOC
86   //result.alignmentLength = ((aend - abgn) + (result.endLocations[0] + 1 - result.startLocations[0]) + (result.editDistance)) / 2;
87 
88   //  Compute the global error rate.  If it's terrible, return that the alignment failed.
89 
90   erate = (double)result.editDistance / result.alignmentLength;
91 
92   if (erate > maxAcceptErate) {
93     edlibFreeAlignResult(result);
94     return(false);
95   }
96 
97   //  Good quality.  Save the positions.
98   //  The A positions don't change (yet).
99 
100   bend = bbgn + result.endLocations[0] + 1;    //  Edlib returns 0-based positions, add one to end to get space-based.
101   bbgn = bbgn + result.startLocations[0];
102 
103   edlibFreeAlignResult(result);
104 
105   return(true);
106 }
107 
108 
109 
110 
111 
112 
113 
114 int32
extend5(char * aRead,int32 & abgn,int32 & aend,int32 alen,uint32 Aid,char * bRead,int32 & bbgn,int32 & bend,int32 blen,uint32 Bid,double maxAlignErate,double maxAcceptErate,double & erate)115 extend5(char   *aRead,  int32  &abgn,  int32  &aend,  int32         alen,   uint32 Aid,
116         char   *bRead,  int32  &bbgn,  int32  &bend,  int32         blen,   uint32 Bid,
117         double  maxAlignErate,
118         double  maxAcceptErate,
119         double &erate) {
120 
121   assert(abgn >= 0);
122   assert(aend <= alen);
123   assert(bbgn >= 0);
124   assert(bend <= blen);
125 
126   erate = 1.0;   //  Set the default return value, 100% error.
127 
128   //  Compute an alignment with the alignment path.
129 
130   EdlibAlignResult result = edlibAlign(aRead + abgn, aend - abgn,   //  Query
131                                        bRead + bbgn, bend - bbgn,   //  Target
132                                        edlibNewAlignConfig((int32)ceil(1.1 * maxAlignErate * ((aend - abgn) + (bend - bbgn)) / 2.0),
133                                                            EDLIB_MODE_HW,
134                                                            EDLIB_TASK_PATH));
135 
136   //  If no result, free it and return that the alignment failed.
137 
138   if (result.numLocations == 0) {
139     edlibFreeAlignResult(result);
140     return(-1);
141   }
142 
143 #if 0
144   char *aAln = new char [result.alignmentLength + 1];
145   char *bAln = new char [result.alignmentLength + 1];
146 
147   edlibAlignmentToStrings(result,
148                           aRead + abgn, aend - abgn,   //  Query
149                           bRead + bbgn, bend - bbgn,   //  Target
150                           aAln,
151                           bAln);
152 
153   fprintf(stderr, "A %s\n", aAln);
154   fprintf(stderr, "B %s\n", bAln);
155 
156   delete [] aAln;
157   delete [] bAln;
158 #endif
159 
160 
161   //  Compute the (approximate) length of the alignment, for EDLIB_TASK_LOC
162   //result.alignmentLength = ((aend - abgn) + (result.endLocations[0] + 1 - result.startLocations[0]) + (result.editDistance)) / 2;
163 
164   //  Compute the global error rate.  If it's terrible, return that the alignment failed.
165 
166   erate = (double)result.editDistance / result.alignmentLength;
167 
168   //if (erate > maxAcceptErate) {
169   //  edlibFreeAlignResult(result);
170   //  return(-1);
171   //}
172 
173   //  Good quality.  Save the positions.
174   //  The A positions don't change (yet).
175 
176   bend = bbgn + result.endLocations[0] + 1;    //  Edlib returns 0-based positions, add one to end to get space-based.
177   bbgn = bbgn + result.startLocations[0];
178 
179   //  Analyze the alignment in detail.  Trim back bad ends.
180   //
181   //  We're trying to extend the 5' end of the A sequence.
182   //  Scan the alignment backwards to check if we enter a region of bad alignment.
183 
184   int32    localWindow    = 50;
185   double   localScore     = localMatch * localWindow;
186 
187   double   localThreshold = 0.50;
188 
189   int32  abgnN = aend;
190   int32  bbgnN = bend;
191   int32  ii    = result.alignmentLength - 1;   //  Window start (left most position of window)
192 
193   //  Scan towards the 5' end until we hit a region of local garbage.
194 
195   for (; ii >= 0; ii--) {
196     localScore += getScore(result, ii);
197     localScore -= getScore(result, ii + localWindow);
198 
199     abgnN -= (result.alignment[ii] != EDLIB_EDOP_DELETE);   //  Move left if not gap in B (delete in B)?
200     bbgnN -= (result.alignment[ii] != EDLIB_EDOP_INSERT);   //  Move left if not gap in A (insert in B)?
201 
202     assert(abgnN >= 0);
203     assert(bbgnN >= 0);
204     //fprintf(stderr, "NEW %d %d after align %d\n", abgnN, bbgnN, result.alignment[ii]);
205 
206     //fprintf(stderr, "extend5      at ii %d = %f   A %d-%d  B %d-%d\n",
207     //        ii, localScore / localWindow, abgnN, aend, bbgnN, bend);
208 
209     if (localScore / localWindow < localThreshold) {
210       //fprintf(stderr, "extend5 fail at ii %d = %f   A %d-%d  B %d-%d\n",
211       //        ii, localScore / localWindow, abgnN, aend, bbgnN, bend);
212       break;
213     }
214   }
215 
216   //  If the score is good, the whole extension is good.
217 
218   if (localScore / localWindow >= localThreshold) {
219     edlibFreeAlignResult(result);
220     return(0);
221   }
222 
223   //  Otherwise, scan the window to find the maximal score and trim to there.
224 
225   double  maxWindowScore = localScore;
226   int32   ww             = ii;
227 
228   for (int32 kk=0; kk<localWindow; kk++) {
229     localScore -= getScore(result, ii + kk);
230     localScore += getScore(result, ii + kk + localWindow);
231 
232     if (maxWindowScore < localScore) {
233       maxWindowScore = localScore;
234       ww             = ii + kk;
235     }
236   }
237 
238   //  Now that we know where we should terminate the alignment, back up to that spot,
239   //  adjusting the coordinates.
240 
241   for (int32 kk=ii; kk<=ww; kk++) {
242     abgnN += (result.alignment[kk] != EDLIB_EDOP_DELETE);   //  Delete in B?
243     bbgnN += (result.alignment[kk] != EDLIB_EDOP_INSERT);   //  Insert in B?
244   }
245 
246   //fprintf(stderr, "extend5 fail at ii %d = %f   abgn %d  bbgn %d\n",
247   //        ii, localScore / localWindow, abgnN, bbgnN);
248 
249   abgn = abgnN;
250   bbgn = bbgnN;
251 
252   edlibFreeAlignResult(result);
253   return(1);
254 }
255 
256 
257 
258 
259 
260 
261 
262 
263 
264 
265 
266 int32
extend3(char * aRead,int32 & abgn,int32 & aend,int32 alen,uint32 Aid,char * bRead,int32 & bbgn,int32 & bend,int32 blen,uint32 Bid,double maxAlignErate,double maxAcceptErate,double & erate)267 extend3(char   *aRead,  int32  &abgn,  int32  &aend,  int32         alen,   uint32 Aid,
268         char   *bRead,  int32  &bbgn,  int32  &bend,  int32         blen,   uint32 Bid,
269         double  maxAlignErate,
270         double  maxAcceptErate,
271         double &erate) {
272 
273   assert(abgn >= 0);
274   assert(aend <= alen);
275   assert(bbgn >= 0);
276   assert(bend <= blen);
277 
278   erate = 1.0;   //  Set the default return value, 100% error.
279 
280   //  Compute an alignment with the alignment path.
281 
282   EdlibAlignResult result = edlibAlign(aRead + abgn, aend - abgn,
283                                        bRead + bbgn, bend - bbgn,
284                                        edlibNewAlignConfig((int32)ceil(1.1 * maxAlignErate * ((aend - abgn) + (bend - bbgn)) / 2.0),
285                                                            EDLIB_MODE_HW,
286                                                            EDLIB_TASK_PATH));
287 
288   //  If no result, free it and return that the alignment failed.
289 
290   if (result.numLocations == 0) {
291     edlibFreeAlignResult(result);
292     return(-1);
293   }
294 
295 #if 0
296   char *aAln = new char [result.alignmentLength + 1];
297   char *bAln = new char [result.alignmentLength + 1];
298 
299   edlibAlignmentToStrings(result,
300                           aRead + abgn, aend - abgn,   //  Query
301                           bRead + bbgn, bend - bbgn,   //  Target
302                           aAln,
303                           bAln);
304 
305   fprintf(stderr, "A %s\n", aAln);
306   fprintf(stderr, "B %s\n", bAln);
307 
308   delete [] aAln;
309   delete [] bAln;
310 #endif
311 
312   //  Compute the (approximate) length of the alignment, for EDLIB_TASK_LOC
313   //result.alignmentLength = ((aend - abgn) + (result.endLocations[0] + 1 - result.startLocations[0]) + (result.editDistance)) / 2;
314 
315   //  Compute the global error rate.  If it's terrible, return that the alignment failed.
316 
317   erate = (double)result.editDistance / result.alignmentLength;
318 
319   //if (erate > maxAcceptErate) {
320   //  edlibFreeAlignResult(result);
321   //  return(-1);
322   //}
323 
324   //  Good quality.  Save the positions.
325   //  The A positions don't change (yet).
326 
327   bend = bbgn + result.endLocations[0] + 1;    //  Edlib returns 0-based positions, add one to end to get space-based.
328   bbgn = bbgn + result.startLocations[0];
329 
330   //  Analyze the alignment in detail.  Trim back bad ends.
331   //
332   //  We're trying to extend the 5' end of the A sequence.
333   //  Scan the alignment backwards to check if we enter a region of bad alignment.
334 
335   int32    localWindow    = 50;
336   double   localScore     = localMatch * localWindow;
337 
338   double   localThreshold = 0.50;
339 
340   int32  aendN = abgn;
341   int32  bendN = bbgn;
342   int32  ii    = 0;        //  Window end (right most position of window)
343 
344   //  Scan towards the 3' end until we hit a region of local garbage.
345 
346   for (; ii < result.alignmentLength; ii++) {
347     localScore += getScore(result, ii);
348     localScore -= getScore(result, ii - localWindow);
349 
350     aendN += (result.alignment[ii] != EDLIB_EDOP_DELETE);   //  Delete in B?
351     bendN += (result.alignment[ii] != EDLIB_EDOP_INSERT);   //  Insert in B?
352 
353     assert(aendN <= alen);
354     assert(bendN <= blen);
355 
356     if (localScore / localWindow < localThreshold) {
357       //fprintf(stderr, "extend3 fail at ii %d = %f\n", ii, localScore / localWindow);
358       break;
359     }
360   }
361 
362   //  If the score is good, the whole extension is good.
363 
364   if (localScore / localWindow >= localThreshold) {
365     edlibFreeAlignResult(result);
366     //fprintf(stderr, "extend3 all good!\n");
367     return(0);
368   }
369 
370   //  Otherwise, scan the window to find the maximal score and trim to there.
371 
372   double  maxWindowScore = localScore;
373   int32   ww             = ii;
374 
375   for (int32 kk=0; kk<localWindow; kk++) {
376     localScore -= getScore(result, ii - kk);
377     localScore += getScore(result, ii - kk - localWindow);
378 
379     if (maxWindowScore < localScore) {
380       maxWindowScore = localScore;
381       ww             = ii - kk;
382     }
383   }
384 
385   //  Now that we know where we should terminate the alignment, back up to that spot,
386   //  adjusting the coordinates.
387 
388   for (int32 kk=ii; kk > ww; kk--) {
389     aendN -= (result.alignment[kk] != EDLIB_EDOP_DELETE);   //  Delete in B?
390     bendN -= (result.alignment[kk] != EDLIB_EDOP_INSERT);   //  Insert in B?
391   }
392 
393   aend = aendN;
394   bend = bendN;
395 
396   edlibFreeAlignResult(result);
397   return(1);
398 }
399 
400 
401 
402 
403 
404 
405 
406 
407 
408 
409 
410 
411 
412 
413 
414 //  Compute an alignment between A and B,
415 //    requring all of A to be in the alignment,
416 //    returning new end points on B, allowing free gaps on the ends.
417 //  The editDist and alignLen of the alignment are returned.
418 //
419 bool
computeAlignment(char * aRead,int32 abgn,int32 aend,int32 UNUSED (alen),char const * Alabel,uint32 Aid,char * bRead,int32 & bbgn,int32 & bend,int32 blen,char const * Blabel,uint32 Bid,double maxErate,int32 & editDist,int32 & alignLen,uint32 verbose)420 computeAlignment(char  *aRead,  int32   abgn,  int32   aend,  int32  UNUSED(alen),  char const *Alabel,  uint32 Aid,
421                  char  *bRead,  int32  &bbgn,  int32  &bend,  int32         blen,   char const *Blabel,  uint32 Bid,
422                  double  maxErate,
423                  int32  &editDist,
424                  int32  &alignLen,
425                  uint32  verbose) {
426   bool  success = false;
427 
428   editDist = 0;
429   alignLen = 0;
430 
431   if (verbose > 0) {
432     fprintf(stderr, "computeAlignment()--         align %s %6u %6d-%-6d  %.2f%% error\n", Alabel, Aid, abgn, aend, 100.0 * maxErate);
433     fprintf(stderr, "computeAlignment()--            vs %s %6u %6d-%-6d\n",               Blabel, Bid, bbgn, bend);
434   }
435 
436   EdlibAlignResult result = edlibAlign(aRead + abgn, aend - abgn,
437                                        bRead + bbgn, bend - bbgn,
438                                        edlibNewAlignConfig((int32)ceil(1.1 * maxErate * ((aend - abgn) + (bend - bbgn)) / 2.0),
439                                                            EDLIB_MODE_HW,
440                                                            EDLIB_TASK_LOC));
441 
442   //  If there is a result, compute the (approximate) length of the alignment.
443   //  Edlib mode TASK_LOC doesn't populate this field.
444 
445   if (result.numLocations > 0)
446     result.alignmentLength = ((aend - abgn) + (result.endLocations[0] + 1 - result.startLocations[0]) + (result.editDistance)) / 2;
447 
448   //  Save the result if it is of acceptable quality.
449 
450   //fprintf(stderr, "XX numLocations %d editDist %d maxErate %f alignLength %d\n",
451   //        result.numLocations, result.editDistance, maxErate, result.alignmentLength);
452 
453   if ((result.numLocations > 0) &&
454       (result.editDistance <= maxErate * result.alignmentLength)) {
455     success  = true;
456 
457     bend     = bbgn + result.endLocations[0] + 1;    //  Edlib returns 0-based positions, add one to end to get space-based.
458     bbgn     = bbgn + result.startLocations[0];
459 
460     editDist = result.editDistance;
461     alignLen = result.alignmentLength;
462 
463     if (verbose > 0) {
464       fprintf(stderr, "computeAlignment()--         PASS  %s %6u %6d-%-6d editDist %5d alignLen %6d error %6.4f\n",
465               Blabel, Bid, bbgn, bend,
466               result.editDistance,
467               result.alignmentLength,
468               result.editDistance / (double)result.alignmentLength);
469     }
470   }
471 
472   else {
473     if ((verbose > 0) && (result.numLocations > 0)) {
474       fprintf(stderr, "computeAlignment()--         FAIL  %s %6u %6d-%-6d editDist %5d alignLen %6d error %6.4f\n",
475               Blabel, Bid, bbgn + result.endLocations[0] + 1, bbgn + result.startLocations[0],
476               result.editDistance,
477               result.alignmentLength,
478               result.editDistance / (double)result.alignmentLength);
479     }
480 
481     if ((verbose > 0) && (result.numLocations == 0)) {
482       fprintf(stderr, "computeAlignment()--         FAIL  %s %6u %6d-%-6d no alignment\n",
483               Blabel, Bid, bbgn, bend);
484     }
485   }
486 
487   edlibFreeAlignResult(result);
488 
489   return(success);
490 }
491 
492 
493 
494 
495 //  Our goal is to convert an alignment-free overlap with imprecise edges into
496 //  an alignment-based overlap with precise edges.
497 //
498 //  The strategy is to, for each end of the overlap, extend the alignment to the end
499 //  of the sequence with the shorter hang, then compute the alignment of
500 //  that to the other read allowing free gaps.
501 //
502 //                { alignment-free overlap }
503 //      ---(a5----{-----a3)----------------}------
504 //           (b5--{---b3)------------------}-------------------
505 //
506 //  Since the 5' hang on the B read is smaller, we use b5 to compute the other values.
507 //    b5 = known from alignment-free overlap
508 //    b3 = max(4*b5, 2*maxRepeatLength)
509 //    a5 = b5 * (1 + maxErate) + SLOP
510 //    a3 = b3 * (1 + maxErate) + SLOP
511 //
512 //  For b3: this is our anchor in assumed good sequence.  it needs to be larger than
513 //          a repeat, and should be larger than the unknown sequence
514 //
515 //  For a5: we need to allow space for all of b5 to align, plus any errors.  The
516 //          SLOP adjustment allows for shifting of the imprecise overlap edge.
517 //
518 //  Similar for a3.
519 //
520 //  Output of the alignment will be updated coordinates for a5 and a3,
521 //  which we use to update this end of the overlap.
522 //
523 void
computeOverlapAlignment(uint32 ovlid,uint32 minOverlapLength,double maxErate,uint32 overlapSlop,uint32 maxRepeat)524 maComputation::computeOverlapAlignment(uint32       ovlid,
525                                        uint32       minOverlapLength,
526                                        double       maxErate,
527                                        uint32       overlapSlop,
528                                        uint32       maxRepeat) {
529   ovOverlap *ovl    = &_overlaps[ovlid];   //  Convenience pointer to the overlap.
530   ovOverlap  ori    =  _overlaps[ovlid];   //  Copy of the original overlap.
531 
532   int32   alen      = _readData[_aID].trimmedLength;
533   int32   abgn      = (int32)       ovl->dat.ovl.ahg5;
534   int32   aend      = (int32)alen - ovl->dat.ovl.ahg3;
535 
536   int32   blen      = _readData[_bID].trimmedLength;
537   int32   bbgn      = (int32)       ovl->dat.ovl.bhg5;
538   int32   bend      = (int32)blen - ovl->dat.ovl.bhg3;
539 
540   int32   alignLen  = 0;
541   int32   editDist  = INT32_MAX;
542 
543   EdlibAlignResult  result = { 0, NULL, NULL, 0, NULL, 0, 0 };
544 
545   if (_verboseAlign > 0) {
546     fprintf(stderr, "computeOverlapAlignment()-- A %8u %6d-%-6d %d\n", _aID, abgn, aend, alen);
547     fprintf(stderr, "computeOverlapAlignment()-- B %8u %6d-%-6d %d\n", _bID, bbgn, bend, blen);
548   }
549 
550   //  A    ------------{------...
551   //  B          ------{------...
552   if (ovl->dat.ovl.bhg5 < ovl->dat.ovl.ahg5) {
553     int32   ahg5 = ovl->dat.ovl.ahg5;
554     int32   ahg3 = ovl->dat.ovl.ahg3;
555     int32   bhg5 = ovl->dat.ovl.bhg5;
556     int32   bhg3 = ovl->dat.ovl.bhg3;
557 
558     int32   b5   = bhg5;
559     int32   b3   = std::max(4 * bhg5, (int32)(2 * maxRepeat));
560 
561     int32   a5   = b5 * (1 + maxErate) + overlapSlop;
562     int32   a3   = b3 * (1 + maxErate) + overlapSlop;
563 
564     int32   bbgn = std::max(0,    bhg5 - b5);    //  Now zero.
565     int32   bend = std::min(blen, bhg5 + b3);
566 
567     int32   abgn = std::max(0,    ahg5 - a5);
568     int32   aend = std::min(alen, ahg5 + a3);
569 
570     if (_verboseAlign > 0)
571       fprintf(stderr, "computeOverlapAlignment()-- bhg5:  B %d-%d onto A %d-%d\n", bbgn, bend, abgn, aend);
572 
573     if (computeAlignment(_bRead, bbgn, bend, blen, "B", _bID,    //  Align all of sequence B into
574                          _aRead, abgn, aend, alen, "A", _aID,    //  sequence A with free ends.
575                          maxErate,
576                          editDist,
577                          alignLen, _verboseAlign) == true) {
578       if (_verboseAlign > 0)
579         fprintf(stderr, "computeOverlapAlignment()--        B %d-%d onto A %d-%d\n", bbgn, bend, abgn, aend);
580 
581       ovl->dat.ovl.ahg5 = abgn;
582       ovl->dat.ovl.bhg5 = bbgn;
583 
584       if ((bend == blen) ||                      //  Update the other end if the B read is contained, or
585           (alen - ovl->dat.ovl.ahg3 < aend)) {   //  if the alignment extends past our existing end.
586         ovl->dat.ovl.ahg3 = alen - aend;
587         ovl->dat.ovl.bhg3 = blen - bend;
588       }
589     }
590 
591     else {
592     }
593   }
594 
595   //  A          ------{------...
596   //  B    ------------{------...
597   else {
598     int32   ahg5 = ovl->dat.ovl.ahg5;
599     int32   ahg3 = ovl->dat.ovl.ahg3;
600     int32   bhg5 = ovl->dat.ovl.bhg5;
601     int32   bhg3 = ovl->dat.ovl.bhg3;
602 
603     int32   a5   = ahg5;
604     int32   a3   = std::max(4 * ahg5, (int32)(2 * maxRepeat));
605 
606     int32   b5   = a5 * (1 + maxErate) + overlapSlop;
607     int32   b3   = a3 * (1 + maxErate) + overlapSlop;
608 
609     int32   bbgn = std::max(0,    bhg5 - b5);
610     int32   bend = std::min(blen, bhg5 + b3);
611 
612     int32   abgn = std::max(0,    ahg5 - a5);    //  Now zero.
613     int32   aend = std::min(alen, ahg5 + a3);
614 
615     if (_verboseAlign > 0)
616       fprintf(stderr, "computeOverlapAlignment()-- ahg5:  A %d-%d onto B %d-%d\n", abgn, aend, bbgn, bend);
617 
618     if (computeAlignment(_aRead, abgn, aend, alen, "A", _aID,    //  Align all of sequence A into
619                          _bRead, bbgn, bend, blen, "B", _bID,    //  sequence B with free ends.
620                          maxErate,
621                          editDist,
622                          alignLen, _verboseAlign) == true) {
623       if (_verboseAlign > 0)
624         fprintf(stderr, "computeOverlapAlignment()--        A %d-%d onto B %d-%d\n", abgn, aend, bbgn, bend);
625 
626       ovl->dat.ovl.ahg5 = abgn;
627       ovl->dat.ovl.bhg5 = bbgn;
628 
629       if ((aend == alen) ||                      //  Update the other end if the A read is contained, or
630           (blen - ovl->dat.ovl.bhg3 < bend)) {   //  if the alignment extends past our existing end.
631         ovl->dat.ovl.ahg3 = alen - aend;
632         ovl->dat.ovl.bhg3 = blen - bend;
633       }
634     }
635 
636     else {
637     }
638   }
639 
640 
641 
642 
643   //  A    ...------}------------
644   //  B    ...------}------
645   if (ovl->dat.ovl.bhg3 < ovl->dat.ovl.ahg3) {
646     int32   ahg5 = ovl->dat.ovl.ahg5;
647     int32   ahg3 = ovl->dat.ovl.ahg3;
648     int32   bhg5 = ovl->dat.ovl.bhg5;
649     int32   bhg3 = ovl->dat.ovl.bhg3;
650 
651     int32   b5   = std::max(4 * bhg3, (int32)(2 * maxRepeat));
652     int32   b3   = bhg3;
653 
654     int32   a5   = b5 * (1 + maxErate) + overlapSlop;
655     int32   a3   = b3 * (1 + maxErate) + overlapSlop;
656 
657     int32   bbgn = std::max(0,    blen - bhg3 - b5);
658     int32   bend = std::min(blen, blen - bhg3 + b3);    //  Now blen.
659 
660     int32   abgn = std::max(0,    alen - ahg3 - a5);
661     int32   aend = std::min(alen, alen - ahg3 + a3);
662 
663     if (_verboseAlign > 0)
664       fprintf(stderr, "computeOverlapAlignment()-- bhg3:  B %d-%d onto A %d-%d\n", bbgn, bend, abgn, aend);
665 
666     if (computeAlignment(_bRead, bbgn, bend, blen, "B", _bID,    //  Align all of sequence B into
667                          _aRead, abgn, aend, alen, "A", _aID,    //  sequence A with free ends.
668                          maxErate,
669                          editDist,
670                          alignLen, _verboseAlign) == true) {
671       if (_verboseAlign > 0)
672         fprintf(stderr, "computeOverlapAlignment()--        B %d-%d onto A %d-%d\n", bbgn, bend, abgn, aend);
673 
674       if ((bbgn == 0) ||                  //  Update the other end if the B read is contained, or
675           (abgn < ovl->dat.ovl.ahg5)) {   //  if the alignment extends past our existing end.
676         ovl->dat.ovl.ahg5 = abgn;
677         ovl->dat.ovl.bhg5 = bbgn;
678       }
679 
680       ovl->dat.ovl.ahg3 = alen - aend;
681       ovl->dat.ovl.bhg3 = blen - bend;
682     }
683 
684     else {
685     }
686   }
687 
688   //  A    ...------}------
689   //  B    ...------}------------
690   else {
691     int32   ahg5 = ovl->dat.ovl.ahg5;
692     int32   ahg3 = ovl->dat.ovl.ahg3;
693     int32   bhg5 = ovl->dat.ovl.bhg5;
694     int32   bhg3 = ovl->dat.ovl.bhg3;
695 
696     int32   a5   = std::max(4 * ahg3, (int32)(2 * maxRepeat));
697     int32   a3   = ahg3;
698 
699     int32   b5   = a5 * (1 + maxErate) + overlapSlop;
700     int32   b3   = a3 * (1 + maxErate) + overlapSlop;
701 
702     int32   bbgn = std::max(0,    blen - bhg3 - b5);
703     int32   bend = std::min(blen, blen - bhg3 + b3);
704 
705     int32   abgn = std::max(0,    alen - ahg3 - a5);    //  Now alen.
706     int32   aend = std::min(alen, alen - ahg3 + a3);
707 
708     if (_verboseAlign > 0)
709       fprintf(stderr, "computeOverlapAlignment()-- ahg3:  A %d-%d onto B %d-%d\n", abgn, aend, bbgn, bend);
710 
711     if (computeAlignment(_aRead, abgn, aend, alen, "A", _aID,    //  Align all of sequence A into
712                          _bRead, bbgn, bend, blen, "B", _bID,    //  sequence B with free ends.
713                          maxErate,
714                          editDist,
715                          alignLen, _verboseAlign) == true) {
716       if (_verboseAlign > 0)
717         fprintf(stderr, "computeOverlapAlignment()--        A %d-%d onto B %d-%d\n", abgn, aend, bbgn, bend);
718 
719       if ((abgn == 0) ||                  //  Update the other end if the A read is contained, or
720           (bbgn < ovl->dat.ovl.bhg5)) {   //  if the alignment extends past our existing end.
721         ovl->dat.ovl.ahg5 = abgn;
722         ovl->dat.ovl.bhg5 = bbgn;
723       }
724 
725       ovl->dat.ovl.ahg3 = alen - aend;
726       ovl->dat.ovl.bhg3 = blen - bend;
727     }
728 
729     else {
730     }
731   }
732 
733   //
734   //  Compute the final alignment, then copy it to our output buffer if it is
735   //  of good quality.
736   //
737 
738   if (1) {
739     int32   abgn      = (int32)       ovl->dat.ovl.ahg5;
740     int32   aend      = (int32)alen - ovl->dat.ovl.ahg3;
741 
742     int32   bbgn      = (int32)       ovl->dat.ovl.bhg5;
743     int32   bend      = (int32)blen - ovl->dat.ovl.bhg3;
744 
745     if (_verboseAlign > 0)
746       fprintf(stderr, "computeOverlapAlignment()-- final:   A %d-%d vs B %d-%d\n", abgn, aend, bbgn, bend);
747 
748     EdlibAlignResult result = edlibAlign(_aRead + abgn, aend - abgn,
749                                          _bRead + bbgn, bend - bbgn,
750                                          edlibNewAlignConfig((int32)ceil(1.1 * maxErate * ((aend - abgn) + (bend - bbgn)) / 2.0),
751                                                              EDLIB_MODE_NW,
752                                                              EDLIB_TASK_PATH));
753 
754     //  Decide, based on the edit distance and alignment length, if we should
755     //  retain or discard the overlap.
756 
757     if ((result.alignmentLength < minOverlapLength) ||                  //  Alignment is too short, or
758         (result.editDistance > maxErate * result.alignmentLength)) {    //               too noisy.
759       ovl->dat.ovl.forOBT = false;
760       ovl->dat.ovl.forDUP = false;
761       ovl->dat.ovl.forUTG = false;
762 
763       ovl->evalue(AS_MAX_EVALUE);
764     }
765 
766     else {
767       ovl->dat.ovl.forOBT = false;                                //  Flag as useful for contigging
768       ovl->dat.ovl.forDUP = false;                                //  if it is a dovetail overlap.
769       ovl->dat.ovl.forUTG = (ovl->overlapIsDovetail() == true);
770 
771       ovl->erate(editDist / (double)alignLen);
772 
773       _alignsA  [ovlid] = new char [alen + 1];                    //  Allocate space for the alignment output.
774       _alignsB  [ovlid] = new char [alen + 1];
775 
776       for (uint32 ii=0; ii<alen; ii++) {
777         _alignsA[ovlid][ii] = '-';
778         _alignsB[ovlid][ii] = '-';
779       }
780 
781       _alignsA[ovlid][alen] = 0;
782       _alignsB[ovlid][alen] = 0;
783 
784       char *aaln = new char [result.alignmentLength + 1];         //  Convert the alignment to a string.
785       char *baln = new char [result.alignmentLength + 1];
786 
787       memset(aaln, 0, sizeof(char) * (result.alignmentLength + 1));
788       memset(baln, 0, sizeof(char) * (result.alignmentLength + 1));
789 
790       edlibAlignmentToStrings(result.alignment,                   //  Alignment
791                               result.alignmentLength,             //    and length
792                               result.startLocations[0],           //  tgtStart (_bRead)
793                               result.endLocations[0]+1,           //  tgtEnd
794                               0,                                  //  qryStart (_aRead)
795                               alen,                               //  qryEnd
796                               _bRead + bbgn,                      //  tgt sequence (_bRead)
797                               _aRead + abgn,                      //  qry sequence (_aRead)
798                               baln,                               //  output tgt alignment string
799                               aaln);                              //  output qry alignment string
800 
801       //  Dump
802 
803       //fprintf(stdout, "B %u    abgn %d aend %d  ahg5 %d ahg5 %d alen %d\n", _bID, abgn, aend, (int32)ovl->dat.ovl.ahg5, (int32)ovl->dat.ovl.ahg3, alen);
804       //fprintf(stdout, "aaln %s\n", aaln);
805       //fprintf(stdout, "baln %s\n", baln);
806 
807       //  Copy it into the output space.
808 
809       uint32 pp = 0;   //  Position in the emitted alignment string.
810       uint32 aa = 0;   //  Position in sequence A.
811 
812       for (uint32 ii=0; ii<ovl->dat.ovl.ahg5; ii++, pp++) {
813         _alignsA[ovlid][pp] = _aRead[aa];
814         _alignsB[ovlid][pp] = '-';
815         aa++;
816       }
817 
818       for (uint32 ii=0; ii<result.alignmentLength; ii++) {
819         if (aaln[ii] != '-') {
820           _alignsA[ovlid][pp] = aaln[ii];
821           _alignsB[ovlid][pp] = baln[ii];
822           aa++;
823           pp++;
824         }
825       }
826 
827       for (uint32 ii=0; ii<ovl->dat.ovl.ahg3; ii++, pp++) {
828         _alignsA[ovlid][pp] = _aRead[aa];
829         _alignsB[ovlid][pp] = '-';
830         aa++;
831       }
832 
833       assert(pp == alen);     //  If correct, we should have walked over the
834       assert(aa == alen);     //  full trimmed read with both indices.
835 
836       delete [] aaln;
837       delete [] baln;
838 
839       _alignsA[ovlid][alen] = 0;
840       _alignsB[ovlid][alen] = 0;
841     }
842 
843     //  Trash the alignment results.
844 
845     edlibFreeAlignResult(result);
846   }
847 
848   //  More logging.
849 
850   if (_verboseAlign > 0) {
851     fprintf(stderr, "computeOverlapAlignment()-- A %7u %6d-%-6d -> %6d-%-6d\n",       _aID, ori.a_bgn(), ori.a_end(), ovl->a_bgn(), ovl->a_end());
852     fprintf(stderr, "computeOverlapAlignment()-- B %7u %6d-%-6d -> %6d-%-6d %5.4f\n", _bID, ori.b_bgn(), ori.b_end(), ovl->b_bgn(), ovl->b_end(), ovl->erate());
853   }
854 }
855 
856 
857