1 #ifndef VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
2 #define VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
3
4 #include <stdint.h>
5 #include <string>
6 #include <vector>
7
8 namespace vcflib {
9
10 namespace StripedSmithWaterman {
11
12 struct Alignment {
13 uint16_t sw_score; // The best alignment score
14 uint16_t sw_score_next_best; // The next best alignment score
15 int32_t ref_begin; // Reference begin position of the best alignment
16 int32_t ref_end; // Reference end position of the best alignment
17 int32_t query_begin; // Query begin position of the best alignment
18 int32_t query_end; // Query end position of the best alignment
19 int32_t ref_end_next_best; // Reference end position of the next best alignment
20 int32_t mismatches; // Number of mismatches of the alignment
21 std::string cigar_string; // Cigar string of the best alignment
22 std::vector<uint32_t> cigar; // Cigar stored in the BAM format
23 // high 28 bits: length
24 // low 4 bits: M/I/D/S/X (0/1/2/4/8);
Clearvcflib::StripedSmithWaterman::Alignment25 void Clear() {
26 sw_score = 0;
27 sw_score_next_best = 0;
28 ref_begin = 0;
29 ref_end = 0;
30 query_begin = 0;
31 query_end = 0;
32 ref_end_next_best = 0;
33 mismatches = 0;
34 cigar_string.clear();
35 cigar.clear();
36 };
37 };
38
39 struct Filter {
40 // NOTE: No matter the filter, those five fields of Alignment will be given anyway.
41 // sw_score; sw_score_next_best; ref_end; query_end; ref_end_next_best.
42 // NOTE: Only need score of alignments, please set 'report_begin_position'
43 // and 'report_cigar' false.
44
45 bool report_begin_position; // Give ref_begin and query_begin.
46 // If it is not set, ref_begin and query_begin are -1.
47 bool report_cigar; // Give cigar_string and cigar.
48 // report_begin_position is automatically TRUE.
49
50 // When *report_cigar* is true and alignment passes these two filters,
51 // cigar_string and cigar will be given.
52 uint16_t score_filter; // score >= score_filter
53 uint16_t distance_filter; // ((ref_end - ref_begin) < distance_filter) &&
54 // ((query_end - read_begin) < distance_filter)
55
Filtervcflib::StripedSmithWaterman::Filter56 Filter()
57 : report_begin_position(true)
58 , report_cigar(true)
59 , score_filter(0)
60 , distance_filter(32767)
61 {};
62
Filtervcflib::StripedSmithWaterman::Filter63 Filter(const bool& pos, const bool& cigar, const uint16_t& score, const uint16_t& dis)
64 : report_begin_position(pos)
65 , report_cigar(cigar)
66 , score_filter(score)
67 , distance_filter(dis)
68 {};
69 };
70
71 class Aligner {
72 public:
73 // =========
74 // @function Construct an Aligner on default values.
75 // The function will build the {A.C,G,T,N} aligner.
76 // If you target for other character aligners, then please
77 // use the other constructor and pass the corresponding matrix in.
78 // =========
79 Aligner(void);
80
81 // =========
82 // @function Construct an Aligner by assigning scores.
83 // The function will build the {A.C,G,T,N} aligner.
84 // If you target for other character aligners, then please
85 // use the other constructor and pass the corresponding matrix in.
86 // =========
87 Aligner(const uint8_t& match_score,
88 const uint8_t& mismatch_penalty,
89 const uint8_t& gap_opening_penalty,
90 const uint8_t& gap_extending_penalty);
91
92 // =========
93 // @function Construct an Aligner by the specific matrixs.
94 // =========
95 Aligner(const int8_t* score_matrix,
96 const int& score_matrix_size,
97 const int8_t* translation_matrix,
98 const int& translation_matrix_size);
99
100 ~Aligner(void);
101
102 // =========
103 // @function Build the reference sequence and thus make
104 // Align(const char* query, s_align* alignment) function;
105 // otherwise the reference should be given when aligning.
106 // [NOTICE] If there exists a sequence, that one will be deleted
107 // and replaced.
108 // @param seq The reference bases;
109 // [NOTICE] It is not necessary null terminated.
110 // @param length The length of bases will be be built.
111 // @return The length of the built bases.
112 // =========
113 int SetReferenceSequence(const char* seq, const int& length);
114
115 void CleanReferenceSequence(void);
116
117 // =========
118 // @function Set penalties for opening and extending gaps
119 // [NOTICE] The defaults are 3 and 1 respectively.
120 // =========
SetGapPenalty(const uint8_t & opening,const uint8_t & extending)121 void SetGapPenalty(const uint8_t& opening, const uint8_t& extending) {
122 gap_opening_penalty_ = opening;
123 gap_extending_penalty_ = extending;
124 };
125
126 // =========
127 // @function Align the query againt the reference that is set by
128 // SetReferenceSequence.
129 // @param query The query sequence.
130 // @param filter The filter for the alignment.
131 // @param alignment The container contains the result.
132 // @return True: succeed; false: fail.
133 // =========
134 bool Align(const char* query, const Filter& filter, Alignment* alignment) const;
135
136 // =========
137 // @function Align the query againt the reference.
138 // [NOTICE] The reference won't replace the reference
139 // set by SetReferenceSequence.
140 // @param query The query sequence.
141 // @param ref The reference sequence.
142 // [NOTICE] It is not necessary null terminated.
143 // @param ref_len The length of the reference sequence.
144 // @param filter The filter for the alignment.
145 // @param alignment The container contains the result.
146 // @return True: succeed; false: fail.
147 // =========
148 bool Align(const char* query, const char* ref, const int& ref_len,
149 const Filter& filter, Alignment* alignment) const;
150
151 // @function Clear up all containers and thus the aligner is disabled.
152 // To rebuild the aligner please use Build functions.
153 void Clear(void);
154
155 // =========
156 // @function Rebuild the aligner's ability on default values.
157 // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
158 // @return True: succeed; false: fail.
159 // =========
160 bool ReBuild(void);
161
162 // =========
163 // @function Rebuild the aligner's ability by the specific matrixs.
164 // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
165 // @return True: succeed; false: fail.
166 // =========
167 bool ReBuild(
168 const uint8_t& match_score,
169 const uint8_t& mismatch_penalty,
170 const uint8_t& gap_opening_penalty,
171 const uint8_t& gap_extending_penalty);
172
173 // =========
174 // @function Construct an Aligner by the specific matrixs.
175 // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
176 // @return True: succeed; false: fail.
177 // =========
178 bool ReBuild(
179 const int8_t* score_matrix,
180 const int& score_matrix_size,
181 const int8_t* translation_matrix,
182 const int& translation_matrix_size);
183
184 private:
185 int8_t* score_matrix_;
186 int score_matrix_size_;
187 int8_t* translation_matrix_;
188
189 uint8_t match_score_; // default: 2
190 uint8_t mismatch_penalty_; // default: 2
191 uint8_t gap_opening_penalty_; // default: 3
192 uint8_t gap_extending_penalty_; // default: 1
193
194 int8_t* translated_reference_;
195 int32_t reference_length_;
196
197 int TranslateBase(const char* bases, const int& length, int8_t* translated) const;
198 void SetAllDefault(void);
199 void BuildDefaultMatrix(void);
200 void ClearMatrices(void);
201
202 Aligner& operator= (const Aligner&);
203 Aligner (const Aligner&);
204 }; // class Aligner
205
206
207 // ================
208 // inline functions
209 // ================
CleanReferenceSequence(void)210 inline void Aligner::CleanReferenceSequence(void) {
211 if (reference_length_ == 0) return;
212
213 // delete the current buffer
214 if (reference_length_ > 1) delete [] translated_reference_;
215 else delete translated_reference_;
216
217 reference_length_ = 0;
218 }
219 } // namespace StripedSmithWaterman
220
221 }
222
223 #endif // VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
224