1 /**** 2 DIAMOND protein aligner 3 Copyright (C) 2013-2017 Benjamin Buchfink <buchfink@gmail.com> 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. 17 ****/ 18 19 #ifndef PACKED_TRANSCRIPT_H_ 20 #define PACKED_TRANSCRIPT_H_ 21 22 #include "../util/binary_buffer.h" 23 #include "../basic/value.h" 24 #include "diagonal_segment.h" 25 #include "sequence.h" 26 27 typedef enum { op_match = 0, op_insertion = 1, op_deletion = 2, op_substitution = 3, op_frameshift_forward = 4, op_frameshift_reverse = 5 } Edit_operation; 28 29 struct Reversed {}; 30 31 struct Packed_operation 32 { 33 enum { OP_BITS = 2, COUNT_BITS = 8 - OP_BITS, MAX_COUNT = (1 << COUNT_BITS) - 1 }; Packed_operationPacked_operation34 Packed_operation(uint8_t code): 35 code (code) 36 { } Packed_operationPacked_operation37 Packed_operation(Edit_operation op, unsigned count): 38 code ((op<<COUNT_BITS) | count) 39 { } Packed_operationPacked_operation40 Packed_operation(Edit_operation op, Letter v) : 41 code((op << COUNT_BITS) | (int)v) 42 { } uint8_tPacked_operation43 operator uint8_t() const 44 { return code; } opPacked_operation45 Edit_operation op() const 46 { 47 uint8_t o = code >> COUNT_BITS; 48 switch (o) { 49 case op_substitution: 50 switch (letter()) { 51 case (Letter)AMINO_ACID_COUNT: 52 return op_frameshift_reverse; 53 case (Letter)(AMINO_ACID_COUNT + 1) : 54 return op_frameshift_forward; 55 default: 56 return op_substitution; 57 } 58 default: 59 return (Edit_operation)o; 60 } 61 } countPacked_operation62 unsigned count() const 63 { 64 switch (op()) { 65 case op_match: 66 case op_insertion: 67 return code & MAX_COUNT; 68 default: 69 return 1; 70 } 71 } letterPacked_operation72 Letter letter() const 73 { 74 return code&MAX_COUNT; 75 } terminatorPacked_operation76 static Packed_operation terminator() 77 { return Packed_operation(op_match, 0u); } frameshift_forwardPacked_operation78 static Packed_operation frameshift_forward() 79 { 80 return Packed_operation(op_substitution, (unsigned)AMINO_ACID_COUNT + 1); 81 } frameshift_reversePacked_operation82 static Packed_operation frameshift_reverse() 83 { 84 return Packed_operation(op_substitution, (unsigned)AMINO_ACID_COUNT); 85 } 86 uint8_t code; 87 }; 88 89 struct Combined_operation 90 { 91 Edit_operation op; 92 unsigned count; 93 Letter letter; 94 }; 95 96 struct Packed_transcript 97 { 98 99 struct Const_iterator 100 { Const_iteratorPacked_transcript::Const_iterator101 Const_iterator(const Packed_operation *op): 102 ptr_ (op) 103 { gather(); } goodPacked_transcript::Const_iterator104 bool good() const 105 { return *ptr_ != Packed_operation::terminator(); } 106 Const_iterator& operator++() 107 { ++ptr_; gather(); return *this; } 108 const Combined_operation& operator*() const 109 { return op_; } 110 const Combined_operation* operator->() const 111 { return &op_; } 112 private: gatherPacked_transcript::Const_iterator113 void gather() 114 { 115 if(!good()) 116 return; 117 op_.op = ptr_->op(); 118 if(op_.op == op_deletion || op_.op == op_substitution || op_.op == op_frameshift_forward || op_.op==op_frameshift_reverse) { 119 op_.letter = ptr_->letter(); 120 op_.count = 1; 121 } else { 122 op_.count = 0; 123 do { 124 op_.count += ptr_->count(); 125 ++ptr_; 126 } while(good() && ptr_->op() == op_.op); 127 --ptr_; 128 } 129 } 130 const Packed_operation *ptr_; 131 Combined_operation op_; 132 }; 133 readPacked_transcript134 void read(BinaryBuffer::Iterator &it) 135 { 136 data_.clear(); 137 uint8_t code; 138 do { 139 it >> code; 140 data_.push_back(code); 141 } while (code != Packed_operation::terminator()); 142 } 143 beginPacked_transcript144 Const_iterator begin() const 145 { return Const_iterator (data_.data()); } 146 dataPacked_transcript147 const vector<Packed_operation>& data() const 148 { return data_; } 149 ptrPacked_transcript150 const Packed_operation* ptr() const 151 { 152 return &data_[0]; 153 } 154 push_backPacked_transcript155 void push_back(Edit_operation op) 156 { 157 if (op == op_frameshift_forward) 158 data_.push_back(Packed_operation::frameshift_forward()); 159 else if (op == op_frameshift_reverse) 160 data_.push_back(Packed_operation::frameshift_reverse()); 161 else if (data_.empty() || data_.back().op() != op || (data_.back().op() == op && data_.back().count() == Packed_operation::MAX_COUNT)) 162 data_.push_back(Packed_operation(op, 1u)); 163 else 164 ++data_.back().code; 165 } 166 push_backPacked_transcript167 void push_back(Edit_operation op, Letter l) 168 { 169 data_.push_back(Packed_operation(op, l)); 170 } 171 push_backPacked_transcript172 void push_back(Edit_operation op, unsigned count) 173 { 174 while (count > 0) { 175 const unsigned n = std::min(count, (unsigned)Packed_operation::MAX_COUNT); 176 data_.push_back(Packed_operation(op, n)); 177 count -= n; 178 } 179 } 180 181 void reverse(size_t begin = 0) 182 { 183 std::reverse(data_.begin() + begin, data_.end()); 184 } 185 raw_lengthPacked_transcript186 size_t raw_length() const 187 { 188 return data_.size(); 189 } 190 push_terminatorPacked_transcript191 void push_terminator() 192 { 193 data_.push_back(Packed_operation::terminator()); 194 } 195 clearPacked_transcript196 void clear() 197 { 198 data_.clear(); 199 } 200 push_backPacked_transcript201 void push_back(const Sequence &s, const Reversed&) 202 { 203 const int l = (int)s.length(); 204 for (int i = l - 1; i >= 0; --i) 205 push_back(op_deletion, s[i]); 206 } 207 push_backPacked_transcript208 void push_back(const Sequence &s) 209 { 210 const int l = (int)s.length(); 211 for (int i = 0; i < l; ++i) 212 push_back(op_deletion, s[i]); 213 } 214 reservePacked_transcript215 void reserve(size_t n) 216 { 217 data_.reserve(n); 218 } 219 220 private: 221 222 vector<Packed_operation> data_; 223 224 friend struct Hsp; 225 226 }; 227 228 #endif /* PACKED_TRANSCRIPT_H_ */ 229