1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "prefixEditDistance.H"
19 
20 #include "Binomial_Bound.H"
21 
22 
prefixEditDistance(bool doingPartialOverlaps_,double maxErate_,double maxAlignErate_)23 prefixEditDistance::prefixEditDistance(bool doingPartialOverlaps_, double maxErate_, double maxAlignErate_) {
24   maxErate             = maxErate_;
25   doingPartialOverlaps = doingPartialOverlaps_;
26 
27   MAX_ERRORS             = (1 + (int)ceil(maxErate * AS_MAX_READLEN));
28   MIN_BRANCH_END_DIST    = 20;
29   MIN_BRANCH_TAIL_SLOPE  = ((maxErate > 0.06) ? 1.0 : 0.20);
30 
31   Left_Delta_Len  = 0;
32   Left_Delta      = new int  [MAX_ERRORS];
33 
34   Right_Delta_Len = 0;
35   Right_Delta     = new int  [MAX_ERRORS];
36 
37   allocated  = 3 * MAX_ERRORS * sizeof(int);
38 
39   Delta_Stack = new int  [MAX_ERRORS];
40 
41   Edit_Space_Lazy = new int *  [MAX_ERRORS];
42   Edit_Array_Lazy = new int *  [MAX_ERRORS];
43 
44   memset(Edit_Space_Lazy, 0, sizeof(int *) * MAX_ERRORS);
45   memset(Edit_Array_Lazy, 0, sizeof(int *) * MAX_ERRORS);
46 
47   allocated += MAX_ERRORS * sizeof (int);
48   allocated += MAX_ERRORS * sizeof (int);
49 
50   //
51 
52   Edit_Match_Limit_Allocation = new int32 [MAX_ERRORS + 1];
53   Edit_Match_Limit = Edit_Match_Limit_Allocation;
54 
55   Initialize_Match_Limit(Edit_Match_Limit_Allocation, maxAlignErate_, MAX_ERRORS);
56 
57 
58   for (int32 i=0; i <= AS_MAX_READLEN; i++) {
59     //Error_Bound[i] = (int32) (i * maxErate + 0.0000000000001);
60     Error_Bound[i] = (int32)ceil(i * maxErate);
61   }
62 
63 
64   //  Value to add for a match in finding branch points.
65   //
66   //  ALH: Note that maxErate also affects what overlaps get found
67   //
68   //  ALH: Scoring seems to be unusual: given an alignment
69   //  of length l with k mismatches, the score seems to be
70   //  computed as l + k * error value and NOT (l-k)*match+k*error
71   //
72   //  I.e. letting x := DEFAULT_BRANCH_MATCH_VAL,
73   //  the max mismatch fraction p to give a non-negative score
74   //  would be p = x/(1-x); conversely, to compute x for a
75   //  goal p, we have x = p/(1+p).  E.g.
76   //
77   //  for p=0.06, x = .06 / (1.06) = .0566038
78   //  for p=0.35, x = .35 / (1.35) = .259259
79   //  for p=0.2,  x = .20 / (1.20) = .166667
80   //  for p=0.15, x = .15 / (1.15) = .130435
81   //
82   //  Value was for 6% vs 35% error discrimination.
83   //  Converting to integers didn't make it faster.
84   //  Corresponding error value is this value minus 1.0
85 
86   Branch_Match_Value = maxErate / (1 + maxErate);
87   Branch_Error_Value = Branch_Match_Value - 1.0;
88 };
89 
90 
91 
~prefixEditDistance()92 prefixEditDistance::~prefixEditDistance() {
93   delete [] Left_Delta;
94   delete [] Right_Delta;
95 
96   delete [] Delta_Stack;
97 
98   for (uint32 i=0; i<MAX_ERRORS; i++)
99     if (Edit_Space_Lazy[i])
100       delete [] Edit_Space_Lazy[i];
101 
102   delete [] Edit_Space_Lazy;
103   delete [] Edit_Array_Lazy;
104 
105   delete [] Edit_Match_Limit_Allocation;
106 };
107 
108