1 #ifndef VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
2 #define VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
3 
4 #include <stdint.h>
5 #include <string>
6 #include <vector>
7 
8 namespace vcflib {
9 
10 namespace StripedSmithWaterman {
11 
12 struct Alignment {
13   uint16_t sw_score;           // The best alignment score
14   uint16_t sw_score_next_best; // The next best alignment score
15   int32_t  ref_begin;          // Reference begin position of the best alignment
16   int32_t  ref_end;            // Reference end position of the best alignment
17   int32_t  query_begin;        // Query begin position of the best alignment
18   int32_t  query_end;          // Query end position of the best alignment
19   int32_t  ref_end_next_best;  // Reference end position of the next best alignment
20   int32_t  mismatches;         // Number of mismatches of the alignment
21   std::string cigar_string;    // Cigar string of the best alignment
22   std::vector<uint32_t> cigar; // Cigar stored in the BAM format
23                                //   high 28 bits: length
24 			       //   low 4 bits: M/I/D/S/X (0/1/2/4/8);
Clearvcflib::StripedSmithWaterman::Alignment25   void Clear() {
26     sw_score           = 0;
27     sw_score_next_best = 0;
28     ref_begin          = 0;
29     ref_end            = 0;
30     query_begin        = 0;
31     query_end          = 0;
32     ref_end_next_best  = 0;
33     mismatches         = 0;
34     cigar_string.clear();
35     cigar.clear();
36   };
37 };
38 
39 struct Filter {
40   // NOTE: No matter the filter, those five fields of Alignment will be given anyway.
41   //       sw_score; sw_score_next_best; ref_end; query_end; ref_end_next_best.
42   // NOTE: Only need score of alignments, please set 'report_begin_position'
43   //       and 'report_cigar' false.
44 
45   bool report_begin_position;    // Give ref_begin and query_begin.
46                                  //   If it is not set, ref_begin and query_begin are -1.
47   bool report_cigar;             // Give cigar_string and cigar.
48                                  //   report_begin_position is automatically TRUE.
49 
50   // When *report_cigar* is true and alignment passes these two filters,
51   //   cigar_string and cigar will be given.
52   uint16_t score_filter;         // score >= score_filter
53   uint16_t distance_filter;      // ((ref_end - ref_begin) < distance_filter) &&
54                                  // ((query_end - read_begin) < distance_filter)
55 
Filtervcflib::StripedSmithWaterman::Filter56   Filter()
57     : report_begin_position(true)
58     , report_cigar(true)
59     , score_filter(0)
60     , distance_filter(32767)
61   {};
62 
Filtervcflib::StripedSmithWaterman::Filter63   Filter(const bool& pos, const bool& cigar, const uint16_t& score, const uint16_t& dis)
64     : report_begin_position(pos)
65     , report_cigar(cigar)
66     , score_filter(score)
67     , distance_filter(dis)
68     {};
69 };
70 
71 class Aligner {
72  public:
73   // =========
74   // @function Construct an Aligner on default values.
75   //             The function will build the {A.C,G,T,N} aligner.
76   //             If you target for other character aligners, then please
77   //             use the other constructor and pass the corresponding matrix in.
78   // =========
79   Aligner(void);
80 
81   // =========
82   // @function Construct an Aligner by assigning scores.
83   //             The function will build the {A.C,G,T,N} aligner.
84   //             If you target for other character aligners, then please
85   //             use the other constructor and pass the corresponding matrix in.
86   // =========
87   Aligner(const uint8_t& match_score,
88           const uint8_t& mismatch_penalty,
89 	  const uint8_t& gap_opening_penalty,
90 	  const uint8_t& gap_extending_penalty);
91 
92   // =========
93   // @function Construct an Aligner by the specific matrixs.
94   // =========
95   Aligner(const int8_t* score_matrix,
96           const int&    score_matrix_size,
97           const int8_t* translation_matrix,
98 	  const int&    translation_matrix_size);
99 
100   ~Aligner(void);
101 
102   // =========
103   // @function Build the reference sequence and thus make
104   //             Align(const char* query, s_align* alignment) function;
105   //             otherwise the reference should be given when aligning.
106   //           [NOTICE] If there exists a sequence, that one will be deleted
107   //                    and replaced.
108   // @param    seq    The reference bases;
109   //                  [NOTICE] It is not necessary null terminated.
110   // @param    length The length of bases will be be built.
111   // @return   The length of the built bases.
112   // =========
113   int SetReferenceSequence(const char* seq, const int& length);
114 
115   void CleanReferenceSequence(void);
116 
117   // =========
118   // @function Set penalties for opening and extending gaps
119   //           [NOTICE] The defaults are 3 and 1 respectively.
120   // =========
SetGapPenalty(const uint8_t & opening,const uint8_t & extending)121   void SetGapPenalty(const uint8_t& opening, const uint8_t& extending) {
122     gap_opening_penalty_ = opening;
123     gap_extending_penalty_ = extending;
124   };
125 
126   // =========
127   // @function Align the query againt the reference that is set by
128   //             SetReferenceSequence.
129   // @param    query     The query sequence.
130   // @param    filter    The filter for the alignment.
131   // @param    alignment The container contains the result.
132   // @return   True: succeed; false: fail.
133   // =========
134   bool Align(const char* query, const Filter& filter, Alignment* alignment) const;
135 
136   // =========
137   // @function Align the query againt the reference.
138   //           [NOTICE] The reference won't replace the reference
139   //                      set by SetReferenceSequence.
140   // @param    query     The query sequence.
141   // @param    ref       The reference sequence.
142   //                     [NOTICE] It is not necessary null terminated.
143   // @param    ref_len   The length of the reference sequence.
144   // @param    filter    The filter for the alignment.
145   // @param    alignment The container contains the result.
146   // @return   True: succeed; false: fail.
147   // =========
148   bool Align(const char* query, const char* ref, const int& ref_len,
149              const Filter& filter, Alignment* alignment) const;
150 
151   // @function Clear up all containers and thus the aligner is disabled.
152   //             To rebuild the aligner please use Build functions.
153   void Clear(void);
154 
155   // =========
156   // @function Rebuild the aligner's ability on default values.
157   //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
158   // @return   True: succeed; false: fail.
159   // =========
160   bool ReBuild(void);
161 
162   // =========
163   // @function Rebuild the aligner's ability by the specific matrixs.
164   //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
165   // @return   True: succeed; false: fail.
166   // =========
167   bool ReBuild(
168           const uint8_t& match_score,
169           const uint8_t& mismatch_penalty,
170 	  const uint8_t& gap_opening_penalty,
171 	  const uint8_t& gap_extending_penalty);
172 
173   // =========
174   // @function Construct an Aligner by the specific matrixs.
175   //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
176   // @return   True: succeed; false: fail.
177   // =========
178   bool ReBuild(
179           const int8_t* score_matrix,
180           const int&    score_matrix_size,
181           const int8_t* translation_matrix,
182 	  const int&    translation_matrix_size);
183 
184  private:
185   int8_t* score_matrix_;
186   int     score_matrix_size_;
187   int8_t* translation_matrix_;
188 
189   uint8_t match_score_;           // default: 2
190   uint8_t mismatch_penalty_;      // default: 2
191   uint8_t gap_opening_penalty_;   // default: 3
192   uint8_t gap_extending_penalty_; // default: 1
193 
194   int8_t* translated_reference_;
195   int32_t reference_length_;
196 
197   int TranslateBase(const char* bases, const int& length, int8_t* translated) const;
198   void SetAllDefault(void);
199   void BuildDefaultMatrix(void);
200   void ClearMatrices(void);
201 
202   Aligner& operator= (const Aligner&);
203   Aligner (const Aligner&);
204 }; // class Aligner
205 
206 
207 // ================
208 // inline functions
209 // ================
CleanReferenceSequence(void)210 inline void Aligner::CleanReferenceSequence(void) {
211   if (reference_length_ == 0) return;
212 
213   // delete the current buffer
214   if (reference_length_ > 1) delete [] translated_reference_;
215   else delete translated_reference_;
216 
217   reference_length_ = 0;
218 }
219 } // namespace StripedSmithWaterman
220 
221 }
222 
223 #endif // VCFLIB_COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
224