1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "prefixEditDistance.H"
19
20 #include "Binomial_Bound.H"
21
22
prefixEditDistance(bool doingPartialOverlaps_,double maxErate_,double maxAlignErate_)23 prefixEditDistance::prefixEditDistance(bool doingPartialOverlaps_, double maxErate_, double maxAlignErate_) {
24 maxErate = maxErate_;
25 doingPartialOverlaps = doingPartialOverlaps_;
26
27 MAX_ERRORS = (1 + (int)ceil(maxErate * AS_MAX_READLEN));
28 MIN_BRANCH_END_DIST = 20;
29 MIN_BRANCH_TAIL_SLOPE = ((maxErate > 0.06) ? 1.0 : 0.20);
30
31 Left_Delta_Len = 0;
32 Left_Delta = new int [MAX_ERRORS];
33
34 Right_Delta_Len = 0;
35 Right_Delta = new int [MAX_ERRORS];
36
37 allocated = 3 * MAX_ERRORS * sizeof(int);
38
39 Delta_Stack = new int [MAX_ERRORS];
40
41 Edit_Space_Lazy = new int * [MAX_ERRORS];
42 Edit_Array_Lazy = new int * [MAX_ERRORS];
43
44 memset(Edit_Space_Lazy, 0, sizeof(int *) * MAX_ERRORS);
45 memset(Edit_Array_Lazy, 0, sizeof(int *) * MAX_ERRORS);
46
47 allocated += MAX_ERRORS * sizeof (int);
48 allocated += MAX_ERRORS * sizeof (int);
49
50 //
51
52 Edit_Match_Limit_Allocation = new int32 [MAX_ERRORS + 1];
53 Edit_Match_Limit = Edit_Match_Limit_Allocation;
54
55 Initialize_Match_Limit(Edit_Match_Limit_Allocation, maxAlignErate_, MAX_ERRORS);
56
57
58 for (int32 i=0; i <= AS_MAX_READLEN; i++) {
59 //Error_Bound[i] = (int32) (i * maxErate + 0.0000000000001);
60 Error_Bound[i] = (int32)ceil(i * maxErate);
61 }
62
63
64 // Value to add for a match in finding branch points.
65 //
66 // ALH: Note that maxErate also affects what overlaps get found
67 //
68 // ALH: Scoring seems to be unusual: given an alignment
69 // of length l with k mismatches, the score seems to be
70 // computed as l + k * error value and NOT (l-k)*match+k*error
71 //
72 // I.e. letting x := DEFAULT_BRANCH_MATCH_VAL,
73 // the max mismatch fraction p to give a non-negative score
74 // would be p = x/(1-x); conversely, to compute x for a
75 // goal p, we have x = p/(1+p). E.g.
76 //
77 // for p=0.06, x = .06 / (1.06) = .0566038
78 // for p=0.35, x = .35 / (1.35) = .259259
79 // for p=0.2, x = .20 / (1.20) = .166667
80 // for p=0.15, x = .15 / (1.15) = .130435
81 //
82 // Value was for 6% vs 35% error discrimination.
83 // Converting to integers didn't make it faster.
84 // Corresponding error value is this value minus 1.0
85
86 Branch_Match_Value = maxErate / (1 + maxErate);
87 Branch_Error_Value = Branch_Match_Value - 1.0;
88 };
89
90
91
~prefixEditDistance()92 prefixEditDistance::~prefixEditDistance() {
93 delete [] Left_Delta;
94 delete [] Right_Delta;
95
96 delete [] Delta_Stack;
97
98 for (uint32 i=0; i<MAX_ERRORS; i++)
99 if (Edit_Space_Lazy[i])
100 delete [] Edit_Space_Lazy[i];
101
102 delete [] Edit_Space_Lazy;
103 delete [] Edit_Array_Lazy;
104
105 delete [] Edit_Match_Limit_Allocation;
106 };
107
108