1 /* $Id: gnomon_seq.cpp 626288 2021-02-25 14:08:59Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software / database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software / database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Alexandre Souvorov
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34 #include "gnomon_seq.hpp"
35
36 BEGIN_NCBI_SCOPE
37 BEGIN_SCOPE(gnomon)
38
39 const EResidue k_toMinus[5] = { enT, enG, enC, enA, enN };
40 const char *const k_aa_table = "KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX*Y*YXSSSSS*CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
41
Convert(const CResidueVec & src,CEResidueVec & dst)42 void Convert(const CResidueVec& src, CEResidueVec& dst)
43 {
44 int len = src.size();
45 dst.clear();
46 dst.reserve(len);
47 for(int i = 0; i < len; ++i)
48 dst.push_back( fromACGT(src[i]) );
49 }
50
Convert(const CResidueVec & src,CDoubleStrandSeq & dst)51 void Convert(const CResidueVec& src, CDoubleStrandSeq& dst)
52 {
53 Convert(src,dst[ePlus]);
54 ReverseComplement(dst[ePlus],dst[eMinus]);
55 }
56
Convert(const CEResidueVec & src,CResidueVec & dst)57 void Convert(const CEResidueVec& src, CResidueVec& dst)
58 {
59 int len = src.size();
60 dst.clear();
61 dst.reserve(len);
62 for(int i = 0; i < len; ++i)
63 dst.push_back( toACGT(src[i]) );
64 }
65
ReverseComplement(const CEResidueVec & src,CEResidueVec & dst)66 void ReverseComplement(const CEResidueVec& src, CEResidueVec& dst)
67 {
68 int len = src.size();
69 dst.clear();
70 dst.reserve(len);
71 for(int i = len-1; i >= 0; --i)
72 dst.push_back(k_toMinus[(int)src[i]]);
73 }
74
75 const TResidue codons[4][4] = {"ATG", "TAA", "TAG", "TGA"};
76 const TResidue rev_codons[4][4] = {"CAT", "TTA", "CTA", "TCA"};
77 static const EResidue s_ecodons0 [3] = { enA, enT, enG };
78 static const EResidue s_ecodons1 [3] = { enT, enA, enA };
79 static const EResidue s_ecodons2 [3] = { enT, enA, enG };
80 static const EResidue s_ecodons3 [3] = { enT, enG, enA };
81 static const EResidue s_ecodons0r[3] = { enC, enA, enT };
82 static const EResidue s_ecodons1r[3] = { enT, enT, enA };
83 static const EResidue s_ecodons2r[3] = { enC, enT, enA };
84 static const EResidue s_ecodons3r[3] = { enT, enC, enA };
85 const EResidue *ecodons[4] = {s_ecodons0, s_ecodons1, s_ecodons2, s_ecodons3};
86 const EResidue *rev_ecodons[4] = {s_ecodons0r, s_ecodons1r, s_ecodons2r, s_ecodons3r};
87
88 template <typename Res>
89 class res_traits {
90 public:
_fromACGT(TResidue x)91 static Res _fromACGT(TResidue x)
92 { return x; }
_codons(int i)93 static const Res* _codons(int i) { return codons[i]; }
_rev_codons(int i)94 static const Res* _rev_codons(int i) { return rev_codons[i]; }
95 };
96
97 template<>
98 class res_traits<EResidue> {
99 public:
_fromACGT(TResidue x)100 static EResidue _fromACGT(TResidue x)
101 { return fromACGT(x); }
_codons(int i)102 static const EResidue* _codons(int i) { return ecodons[i]; }
_rev_codons(int i)103 static const EResidue* _rev_codons(int i) { return rev_ecodons[i]; }
104 };
105
106 template <class Res>
IsStartCodon(const Res * seq,int strand)107 bool IsStartCodon(const Res * seq, int strand) // seq points to A for both strands
108 {
109 const Res * start_codon;
110 if(strand == ePlus)
111 start_codon = res_traits<Res>::_codons(0);
112 else {
113 start_codon = res_traits<Res>::_rev_codons(0);
114 seq -= 2;
115 }
116 return equal(start_codon,start_codon+3,seq);
117 }
118
119 template bool IsStartCodon<EResidue>(const EResidue * seq, int strand);
120 template bool IsStartCodon<TResidue>(const TResidue * seq, int strand);
121
122 template <class Res>
IsStopCodon(const Res * seq,int strand)123 bool IsStopCodon(const Res * seq, int strand) // seq points to T for both strands
124 {
125 if(strand == ePlus) {
126 if (*seq != res_traits<Res>::_codons(1)[0]) // T
127 return false;
128 ++seq;
129 for (int i = 1; i <= 3; ++i)
130 if(equal(res_traits<Res>::_codons(i)+1,res_traits<Res>::_codons(i)+3,seq))
131 return true;
132 return false;
133 } else {
134 if (*seq != res_traits<Res>::_rev_codons(1)[2]) // A
135 return false;
136 seq -= 2;
137 for (int i = 1; i <= 3; ++i)
138 if(equal(res_traits<Res>::_rev_codons(i)+1,res_traits<Res>::_rev_codons(i)+3,seq))
139 return true;
140 return false;
141 }
142 }
143 template bool IsStopCodon<EResidue>(const EResidue * seq, int strand);
144 template bool IsStopCodon<TResidue>(const TResidue * seq, int strand);
145
146
FindAllCodonInstances(TIVec positions[],const EResidue codon[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)147 void FindAllCodonInstances(TIVec positions[], const EResidue codon[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
148 {
149 for (CEResidueVec::const_iterator pos = mrna.begin()+search_region.GetFrom(); (pos = search(pos,mrna.end(),codon,codon+3)) < mrna.begin()+search_region.GetTo(); ++pos) {
150 int l = pos-mrna.begin();
151 int frame = l%3;
152 if (fixed_frame==-1 || fixed_frame==frame)
153 positions[frame].push_back(l);
154 }
155 }
156
FindAllStarts(TIVec starts[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)157 void FindAllStarts(TIVec starts[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
158 {
159 FindAllCodonInstances(starts, ecodons[0], mrna, search_region, fixed_frame);
160 }
161
FindAllStops(TIVec stops[],const CEResidueVec & mrna,TSignedSeqRange search_region,int fixed_frame)162 void FindAllStops(TIVec stops[], const CEResidueVec& mrna, TSignedSeqRange search_region, int fixed_frame)
163 {
164 for (int i=1; i <=3; ++i)
165 FindAllCodonInstances(stops, ecodons[i], mrna, search_region, fixed_frame);
166 for (int f = 0; f < 3; ++f)
167 sort(stops[f].begin(), stops[f].end());
168 }
169
170
Partial5pCodonIsStop(const CEResidueVec & seq_strand,int start,int frame)171 bool Partial5pCodonIsStop(const CEResidueVec& seq_strand, int start, int frame) {
172 if(frame == 0) // no partial codon
173 return false;
174
175 int codon_start = start+frame-3;
176 if(codon_start >= 0 && IsStopCodon(&seq_strand[codon_start]))
177 return true;
178
179 return false;
180 }
181
FindStartsStops(const CGeneModel & model,const CEResidueVec & contig_seq,const CEResidueVec & mrna,const CAlignMap & mrnamap,TIVec starts[3],TIVec stops[3],int & frame,bool obeystart)182 void FindStartsStops(const CGeneModel& model, const CEResidueVec& contig_seq, const CEResidueVec& mrna, const CAlignMap& mrnamap, TIVec starts[3], TIVec stops[3], int& frame, bool obeystart)
183 {
184 int left_cds_limit = -1;
185 int reading_frame_start = mrna.size();
186 int reading_frame_stop = mrna.size();
187 int right_cds_limit = mrna.size();
188 frame = -1;
189 EStrand strand = model.Strand();
190
191 if (!model.ReadingFrame().Empty()) {
192 // left_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetFrom());
193 // _ASSERT(left_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetFrom() == TSignedSeqRange::GetWholeFrom());
194 // right_cds_limit = mrnamap.MapOrigToEdited(model.GetCdsInfo().MaxCdsLimits().GetTo());
195 // _ASSERT(right_cds_limit >= 0 || model.GetCdsInfo().MaxCdsLimits().GetTo() == TSignedSeqRange::GetWholeTo());
196
197 // if(strand == eMinus) {
198 // std::swap(left_cds_limit,right_cds_limit);
199 // }
200 // if(right_cds_limit < 0) right_cds_limit = mrna.size();
201
202 TSignedSeqRange rf = mrnamap.MapRangeOrigToEdited(model.ReadingFrame(),true);
203 reading_frame_start = rf.GetFrom();
204 _ASSERT(reading_frame_start >= 0);
205 reading_frame_stop = rf.GetTo();
206 _ASSERT(reading_frame_stop >= 0);
207
208 if (reading_frame_start == 0 && IsStartCodon(&mrna[reading_frame_start]) && reading_frame_start+3 < reading_frame_stop)
209 reading_frame_start += 3;
210
211 _ASSERT( -1 <= left_cds_limit && left_cds_limit <= reading_frame_start );
212 _ASSERT( 0 <= reading_frame_start && reading_frame_start <= reading_frame_stop && reading_frame_stop < int(mrna.size()) );
213 _ASSERT( reading_frame_stop <= right_cds_limit && right_cds_limit <= int(mrna.size()) );
214
215 frame = reading_frame_start%3;
216
217 if (left_cds_limit<0) {
218 if (reading_frame_start >= 3) {
219 FindAllStops(stops,mrna,TSignedSeqRange(0,reading_frame_start),frame); // 5' inframe stops
220 }
221
222 if (stops[frame].size()>0)
223 left_cds_limit = stops[frame].back()+3; // earliest start of CDS
224 } else {
225 FindAllStops(stops,mrna,TSignedSeqRange(0,left_cds_limit),frame);
226 }
227
228 reading_frame_start = reading_frame_stop-5; // allow starts inside reading frame if not protein
229 if(model.GetCdsInfo().ProtReadingFrame().NotEmpty()) {
230 TSignedSeqRange protrf = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().ProtReadingFrame(),true);
231 reading_frame_start = min(protrf.GetFrom(),reading_frame_start);
232 }
233 }
234
235 if (left_cds_limit<0) { // don't know frame or no upstream stop
236 int model_start = mrnamap.MapEditedToOrig(0);
237
238 if(Include(model.GetCdsInfo().ProtReadingFrame(),model_start) && reading_frame_start < 3) {
239 starts[0].push_back(-3); // proteins are scored no matter what
240 } else {
241 if(strand == eMinus)
242 model_start = contig_seq.size()-1-model_start;
243 for (int i = 0; i<3; ++i) {
244 if (frame == -1 || frame == i) {
245
246 if(Partial5pCodonIsStop(contig_seq,model_start,i))
247 stops[i].push_back(i-3); // stop to limit MaxCds
248 else
249 starts[i].push_back(i-3); // fake start(s) for open cds
250
251
252 /*
253 if (UpstreamStartOrSplice(contig_seq,model_start,i))
254 starts[i].push_back(i-3);
255 else
256 stops[i].push_back(i-3); // bogus stop to limit MaxCds
257 */
258 }
259 }
260 }
261
262 left_cds_limit = 0;
263 }
264
265 if(obeystart && model.HasStart()) {
266 TSignedSeqRange start = mrnamap.MapRangeOrigToEdited(model.GetCdsInfo().Start(),false);
267 starts[frame].push_back(start.GetFrom());
268 } else if(reading_frame_start-left_cds_limit >= 3) {
269 FindAllStarts(starts,mrna,TSignedSeqRange(left_cds_limit,reading_frame_start-1),frame); // 5' starts or all starts
270 }
271
272 if (frame==-1) {
273 FindAllStops(stops,mrna,TSignedSeqRange(0,mrna.size()-1),frame); // all stops
274 } else if (right_cds_limit - reading_frame_stop >= 3) {
275 FindAllStops(stops,mrna,TSignedSeqRange(reading_frame_stop+1,right_cds_limit),frame); // inframe 3' stops
276 }
277
278 if (int(mrna.size()) <= right_cds_limit) { // fake stops for partials
279 stops[mrna.size()%3].push_back(mrna.size());
280 stops[(mrna.size()-1)%3].push_back(mrna.size()-1);
281 stops[(mrna.size()-2)%3].push_back(mrna.size()-2);
282 }
283
284 }
285
FindUpstreamStop(const vector<int> & stops,int start,int & stop)286 bool FindUpstreamStop(const vector<int>& stops, int start, int& stop)
287 {
288 vector<int>::const_iterator it_stop = lower_bound(stops.begin(),stops.end(),start);
289
290 if(it_stop != stops.begin()) {
291 stop = *(--it_stop);
292 return true;
293 } else
294 return false;
295 }
296
297 /*
298 TInDels CAlignMap::GetAllCorrections() const {
299
300 TInDels indels = GetInDels(false);
301 for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
302 const string& mism = m_edited_ranges[range].GetMismatch();
303 if(!mism.empty()) {
304 int len = m_orig_ranges[range].GetTo()-m_orig_ranges[range].GetFrom()+1;
305 _ASSERT(len == (int)mism.size());
306 indels.push_back(CInDelInfo(m_orig_ranges[range].GetFrom(), len, CInDelInfo::eMism, mism));
307 }
308 }
309 sort(indels.begin(), indels.end());
310
311 return indels;
312 }
313 */
314
PushInDel(TInDels & indels,bool fs_only,TSignedSeqPos p,int len,CInDelInfo::EType type,const string & seq="")315 void PushInDel(TInDels& indels, bool fs_only, TSignedSeqPos p, int len, CInDelInfo::EType type, const string& seq = "") {
316 if(!fs_only || len%3 != 0)
317 indels.push_back(CInDelInfo(p, len, type, seq));
318 }
319
320 /*
321 TInDels CAlignMap::GetInDels(bool fs_only) const {
322 TInDels indels;
323
324 if(m_orig_ranges.front().GetTypeFrom() != eGgap) {
325 if(m_orig_ranges.front().GetExtraFrom() > 0) { // everything starts from insertion
326 int len = m_orig_ranges.front().GetExtraFrom();
327 TSignedSeqPos p = m_orig_ranges.front().GetFrom()-len;
328 PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
329 }
330 if(m_edited_ranges.front().GetExtraFrom() > 0) { // everything starts from deletion
331 int len = m_edited_ranges.front().GetExtraFrom();
332 string seq = m_edited_ranges.front().GetExtraSeqFrom();
333 TSignedSeqPos p = m_orig_ranges.front().GetFrom();
334 PushInDel(indels, fs_only, p, len,CInDelInfo::eDel, seq);
335 }
336 }
337
338 for(unsigned int range = 1; range < m_orig_ranges.size(); ++range) {
339 if(m_orig_ranges[range].GetTypeFrom() == eGgap)
340 continue;
341
342 if(fs_only) {
343 int len = m_orig_ranges[range].GetExtraFrom()-m_edited_ranges[range].GetExtraFrom();
344 if(m_orig_ranges[range].GetTypeFrom() != eInDel)
345 len += m_orig_ranges[range-1].GetExtraTo()-m_edited_ranges[range-1].GetExtraTo();
346 if(len%3 == 0)
347 continue;
348 }
349
350 if(m_orig_ranges[range-1].GetTypeTo() != eInDel) {
351 if(m_orig_ranges[range-1].GetExtraTo() > 0) {
352 int len = m_orig_ranges[range-1].GetExtraTo();
353 TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+1;
354 PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
355 }
356 if(m_edited_ranges[range-1].GetExtraTo() > 0) {
357 int len = m_edited_ranges[range-1].GetExtraTo();
358 string seq = m_edited_ranges[range-1].GetExtraSeqTo();
359 TSignedSeqPos p = m_orig_ranges[range-1].GetTo()+m_orig_ranges[range-1].GetExtraTo()+1; // if there is insertion+deletion (mismatch) this is correct position
360 PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
361 }
362 }
363
364 if(m_orig_ranges[range].GetExtraFrom() > 0) { // insertion on the left side
365 int len = m_orig_ranges[range].GetExtraFrom();
366 TSignedSeqPos p = m_orig_ranges[range].GetFrom()-len;
367 PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
368 }
369 if(m_edited_ranges[range].GetExtraFrom() > 0) { // deletion on the left side
370 int len = m_edited_ranges[range].GetExtraFrom();
371 string seq = m_edited_ranges[range].GetExtraSeqFrom();
372 TSignedSeqPos p = m_orig_ranges[range].GetFrom(); // if there is insertion+deletion (mismatch) this is correct position
373 PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
374 }
375 }
376
377 if(m_orig_ranges.back().GetTypeTo() != eGgap) {
378 if(m_orig_ranges.back().GetExtraTo() > 0) { // everything ends by insertion
379 int len = m_orig_ranges.back().GetExtraTo();
380 TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
381 PushInDel(indels, fs_only, p, len, CInDelInfo::eIns);
382 }
383 if(m_edited_ranges.back().GetExtraTo() > 0) { // everything ends by deletion
384 int len = m_edited_ranges.back().GetExtraTo();
385 string seq = m_edited_ranges.back().GetExtraSeqTo();
386 TSignedSeqPos p = m_orig_ranges.back().GetTo()+1;
387 PushInDel(indels, fs_only, p, len, CInDelInfo::eDel, seq);
388 }
389 }
390
391 return indels;
392 }
393 */
394
InsertOneToOneRange(TSignedSeqPos orig_start,TSignedSeqPos edited_start,int len,const string & mism,int left_orige,int left_edite,int right_orige,int right_edite,EEdgeType left_type,EEdgeType right_type,const string & left_edit_extra_seq,const string & right_edit_extra_seq)395 void CAlignMap::InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, int len, const string& mism, int left_orige, int left_edite, int right_orige, int right_edite, EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq)
396 {
397 _ASSERT(len > 0);
398 _ASSERT(m_orig_ranges.empty() || (orig_start > m_orig_ranges.back().GetTo() && edited_start > m_edited_ranges.back().GetTo()));
399
400 CAlignMap::SMapRangeEdge orig_from(orig_start, left_orige, left_type);
401 CAlignMap::SMapRangeEdge orig_to(orig_start+len-1, right_orige, right_type);
402 m_orig_ranges.push_back(SMapRange(orig_from, orig_to, kEmptyStr));
403
404 CAlignMap::SMapRangeEdge edited_from(edited_start, left_edite, left_type, left_edit_extra_seq);
405 _ASSERT((int)left_edit_extra_seq.length() == 0 || (int)left_edit_extra_seq.length() == left_edite);
406 CAlignMap::SMapRangeEdge edited_to(edited_start+len-1, right_edite, right_type, right_edit_extra_seq);
407 _ASSERT((int)right_edit_extra_seq.length() == 0 || (int)right_edit_extra_seq.length() == right_edite);
408 m_edited_ranges.push_back(SMapRange(edited_from, edited_to, mism));
409 }
410
InsertIndelRangesForInterval(TSignedSeqPos orig_a,TSignedSeqPos orig_b,TSignedSeqPos edit_a,TInDels::const_iterator fsi_begin,TInDels::const_iterator fsi_end,EEdgeType type_a,EEdgeType type_b,const string & gseq_a,const string & gseq_b)411 TSignedSeqPos CAlignMap::InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin,
412 TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b)
413 {
414 TInDels::const_iterator fsi = fsi_begin;
415 for( ;fsi != fsi_end && fsi->Loc() < orig_a; ++fsi ) {
416 _ASSERT( !fsi->IntersectingWith(orig_a,orig_b) );
417 }
418
419 int left_orige = 0;
420 int left_edite = gseq_a.length();
421 string left_edit_extra_seq = gseq_a;
422 string mism;
423
424 for( ;fsi != fsi_end && fsi->Loc() == orig_a && !fsi->IsMismatch(); ++fsi ) { // first left end
425 if(fsi->IsInsertion()) {
426 _ASSERT(type_a != eBoundary);
427 orig_a += fsi->Len();
428 left_orige += fsi->Len();
429 } else {
430 edit_a += fsi->Len();
431 left_edite += fsi->Len();
432 left_edit_extra_seq += fsi->GetInDelV();
433 }
434 }
435 for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
436 mism += fsi->GetInDelV();
437
438 while(fsi != fsi_end && fsi->InDelEnd() <= orig_b+1) {
439 int len = (mism.empty() ? fsi->Loc()-orig_a : mism.size());
440 _ASSERT(len > 0 && orig_a+len-1 <= orig_b);
441
442 int bb = orig_a+len;
443 int right_orige = 0;
444 int right_edite = 0;
445 string right_edit_extra_seq;
446 for( ;fsi != fsi_end && fsi->Loc() == bb && !fsi->IsMismatch(); ++fsi ) { // right end
447 if (fsi->IsInsertion()) {
448 right_orige += fsi->Len();
449 bb += fsi->Len();
450 } else {
451 right_edite += fsi->Len();
452 right_edit_extra_seq += fsi->GetInDelV();
453 }
454 }
455
456 int next_orig_a = orig_a+len+right_orige;
457 EEdgeType tb = eInDel;
458 if(next_orig_a > orig_b) {
459 right_edit_extra_seq += gseq_b;
460 right_edite += gseq_b.length();
461 tb = type_b;
462 }
463 InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, right_orige, right_edite, type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
464
465 orig_a = next_orig_a;
466 edit_a += len+right_edite;
467 type_a = eInDel;
468 left_orige = right_orige;
469 left_edite = right_edite;
470 left_edit_extra_seq = right_edit_extra_seq;
471 mism.clear();
472 for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(int)mism.size(); ++fsi)
473 mism += fsi->GetInDelV();
474 }
475
476 if(!mism.empty()) {
477 int len = mism.size();
478 string right_edit_extra_seq;
479 EEdgeType tb = eInDel;
480 if(orig_a+len > orig_b) {
481 right_edit_extra_seq = gseq_b;
482 tb = type_b;
483 }
484 InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, gseq_b.length(), type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
485 orig_a += len;
486 edit_a += len;
487 type_a = eInDel;
488 left_orige = 0;
489 left_edite = 0;
490 left_edit_extra_seq.clear();
491 mism.clear();
492 }
493
494 if(orig_a <= orig_b) {
495 int len = orig_b-orig_a+1;
496 _ASSERT(len > 0);
497 InsertOneToOneRange(orig_a, edit_a, len, mism, left_orige, left_edite, 0, gseq_b.length(), type_a, type_b, left_edit_extra_seq, gseq_b);
498 edit_a += len;
499 }
500
501 return edit_a;
502 }
503
CAlignMap(const CGeneModel::TExons & exons,const vector<TSignedSeqRange> & transcript_exons,const TInDels & indels,EStrand orientation,int target_len)504 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int target_len ) : m_orientation(orientation), m_target_len(target_len) {
505
506 #ifdef _DEBUG
507 _ASSERT(transcript_exons.size() == exons.size());
508 _ASSERT(transcript_exons.size() == 1 || (orientation == ePlus && transcript_exons.front().GetFrom() < transcript_exons.back().GetFrom()) ||
509 (orientation == eMinus && transcript_exons.front().GetFrom() > transcript_exons.back().GetFrom()));
510 int diff = 0;
511 for(unsigned int i = 0; i < exons.size(); ++i) {
512 int exonlen = (exons[i].Limits().Empty()) ? exons[i].m_seq.length() : exons[i].Limits().GetLength();
513 diff += exonlen-(transcript_exons[i].GetTo()-transcript_exons[i].GetFrom()+1);
514 }
515 ITERATE(TInDels, f, indels) {
516 if(!f->IsMismatch())
517 diff += (f->IsDeletion()) ? f->Len() : -f->Len();
518 }
519 _ASSERT(diff == 0);
520 #endif
521
522 m_orig_ranges.reserve(exons.size()+indels.size());
523 m_edited_ranges.reserve(exons.size()+indels.size());
524
525 TSignedSeqPos estart = (m_orientation == ePlus) ? transcript_exons.front().GetFrom() : transcript_exons.back().GetFrom();
526 for(unsigned int i = 0; i < exons.size(); ++i) {
527 if(exons[i].Limits().Empty()) {
528 _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
529 _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
530 } else {
531 EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
532 EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
533 string gseq_a;
534 string gseq_b;
535 if(i > 0 && exons[i-1].Limits().Empty()) { // prev exon is Ggap
536 type_a = eGgap;
537 gseq_a = exons[i-1].m_seq;
538 estart += gseq_a.length();
539 }
540 if(i < exons.size()-1 && exons[i+1].Limits().Empty()) { // next exon is Ggap
541 type_b = eGgap;
542 gseq_b = exons[i+1].m_seq;
543 }
544 if(m_orientation == eMinus) {
545 ReverseComplement(gseq_a.begin(),gseq_a.end());
546 ReverseComplement(gseq_b.begin(),gseq_b.end());
547 }
548 estart = InsertIndelRangesForInterval(exons[i].GetFrom(), exons[i].GetTo(), estart, indels.begin(), indels.end(), type_a, type_b, gseq_a, gseq_b);
549 }
550
551 if(i != exons.size()-1) {
552 if(m_orientation == ePlus) {
553 estart += transcript_exons[i+1].GetFrom()-transcript_exons[i].GetTo()-1;
554 } else {
555 estart += transcript_exons[i].GetFrom()-transcript_exons[i+1].GetTo()-1;
556 }
557 }
558 }
559 }
560
CAlignMap(const CGeneModel::TExons & exons,const TInDels & indels,EStrand strand,TSignedSeqRange lim,int holelen,int polyalen)561 CAlignMap::CAlignMap(const CGeneModel::TExons& exons, const TInDels& indels, EStrand strand, TSignedSeqRange lim, int holelen, int polyalen) : m_orientation(strand) {
562
563 TInDels::const_iterator fsi_begin = indels.begin();
564 TInDels::const_iterator fsi_end = indels.end();
565
566 m_orig_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
567 m_edited_ranges.reserve(exons.size()+(fsi_end-fsi_begin));
568
569 TSignedSeqPos estart = 0;
570 for(unsigned int i = 0; i < exons.size(); ++i) {
571 if(exons[i].Limits().Empty()) {
572 _ASSERT(i == 0 || exons[i-1].Limits().NotEmpty());
573 _ASSERT(i == exons.size()-1 || exons[i+1].Limits().NotEmpty());
574 } else {
575 TSignedSeqPos start = exons[i].GetFrom();
576 TSignedSeqPos stop = exons[i].GetTo();
577 EEdgeType type_a = exons[i].m_fsplice ? eSplice : eBoundary;
578 EEdgeType type_b = exons[i].m_ssplice ? eSplice : eBoundary;
579 string gseq_a;
580 string gseq_b;
581 if(i > 0 && exons[i-1].Limits().Empty()) { // prev exon is Ggap
582 type_a = eGgap;
583 gseq_a = exons[i-1].m_seq;
584 estart += gseq_a.length();
585 }
586 if(i < exons.size()-1 && exons[i+1].Limits().Empty()) { // next exon is Ggap
587 type_b = eGgap;
588 gseq_b = exons[i+1].m_seq;
589 }
590 if(m_orientation == eMinus) {
591 ReverseComplement(gseq_a.begin(),gseq_a.end());
592 ReverseComplement(gseq_b.begin(),gseq_b.end());
593 }
594
595 if(stop < lim.GetFrom()) continue;
596 if(lim.GetTo() < start) break;
597
598 if(lim.GetFrom() >= start) {
599 start = lim.GetFrom();
600 type_a = eBoundary;
601 }
602 if(lim.GetTo() <= stop) {
603 stop = lim.GetTo();
604 type_b = eBoundary;
605 }
606
607 estart = InsertIndelRangesForInterval(start, stop, estart, fsi_begin, fsi_end, type_a, type_b, gseq_a, gseq_b);
608 if(i != exons.size()-1 && (!exons[i+1].m_fsplice || !exons[i].m_ssplice))
609 estart += holelen;
610 }
611 }
612
613 if(!m_edited_ranges.empty())
614 m_target_len = m_edited_ranges.back().GetExtendedTo()+1+polyalen;
615
616 _ASSERT(m_edited_ranges.size() == m_orig_ranges.size());
617 }
618
619 template <class Vec>
EditedSequence(const Vec & original_sequence,Vec & edited_sequence,bool includeholes) const620 void CAlignMap::EditedSequence(const Vec& original_sequence, Vec& edited_sequence, bool includeholes) const
621 {
622 edited_sequence.clear();
623
624 string s;
625 if(includeholes) {
626 int l = (m_orientation == ePlus) ? m_edited_ranges.front().GetFrom() : CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1;
627 s.insert(s.end(), l, 'N');
628 } else {
629 s = m_edited_ranges.front().GetExtraSeqFrom();
630 }
631 ITERATE(string, i, s)
632 edited_sequence.push_back(res_traits<typename Vec::value_type>::_fromACGT(*i));
633
634 for(int range = 0; range < (int)m_orig_ranges.size(); ++range) {
635 string seq = m_edited_ranges[range].GetMismatch();
636
637 if(seq.empty()) {
638 int a = m_orig_ranges[range].GetFrom();
639 int b = m_orig_ranges[range].GetTo()+1;
640 edited_sequence.insert(edited_sequence.end(),original_sequence.begin()+a, original_sequence.begin()+b);
641 }
642
643 if(range < (int)m_orig_ranges.size()-1) {
644 if(m_edited_ranges[range].GetTypeTo() == eBoundary) {
645 if(includeholes) {
646 int l = m_edited_ranges[range+1].GetFrom()-m_edited_ranges[range].GetTo()-1; // missed part
647 seq.insert(seq.end(), l, 'N');
648 }
649 } else if(m_edited_ranges[range].GetTypeTo() == eSplice) {
650 seq += m_edited_ranges[range].GetExtraSeqTo() + m_edited_ranges[range+1].GetExtraSeqFrom(); // indels from two exon ends
651 } else {
652 seq += m_edited_ranges[range].GetExtraSeqTo(); // indel inside exon or Ggap
653 }
654 } else {
655 if(includeholes) {
656 int l = (m_orientation == ePlus) ? CAlignMap::TargetLen()-m_edited_ranges.back().GetTo()-1 : m_edited_ranges.front().GetFrom();
657 seq.insert(seq.end(), l, 'N');
658 } else {
659 seq += m_edited_ranges.back().GetExtraSeqTo();
660 }
661 }
662 ITERATE(string, i, seq)
663 edited_sequence.push_back(res_traits<typename Vec::value_type>::_fromACGT(*i));
664 }
665
666 if(m_orientation == eMinus)
667 ReverseComplement(edited_sequence.begin(), edited_sequence.end());
668 }
669
FindLowerRange(const vector<CAlignMap::SMapRange> & a,TSignedSeqPos p)670 int CAlignMap::FindLowerRange(const vector<CAlignMap::SMapRange>& a, TSignedSeqPos p) {
671 int num = lower_bound(a.begin(), a.end(), CAlignMap::SMapRange(p+1, p+1, kEmptyStr))-a.begin()-1;
672 return num;
673 }
674
675 template
676 void CAlignMap::EditedSequence<CResidueVec>(const CResidueVec& original_sequence, CResidueVec& edited_sequence, bool includeholes) const;
677 template
678 void CAlignMap::EditedSequence<CEResidueVec>(const CEResidueVec& original_sequence, CEResidueVec& edited_sequence, bool includeholes) const;
679 template
680 void CAlignMap::EditedSequence<string>(const string& original_sequence, string& edited_sequence, bool includeholes) const;
681
ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const682 TSignedSeqRange CAlignMap::ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const {
683
684 if(m_orientation == eMinus) {
685 int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
686 TSignedSeqPos left = edited_range.GetTo();
687 TSignedSeqPos right = edited_range.GetFrom();
688
689 if(left == TSignedSeqRange::GetWholeTo()) {
690 left = TSignedSeqRange::GetWholeFrom();
691 } else {
692 left = offset-left;
693 }
694
695 if(right == TSignedSeqRange::GetWholeFrom()) {
696 right = TSignedSeqRange::GetWholeTo();
697 } else {
698 right = offset-right;
699 }
700
701 edited_range = TSignedSeqRange(left, right);
702 }
703
704 _ASSERT(edited_range.NotEmpty() && Include(TSignedSeqRange(m_edited_ranges.front().GetExtendedFrom(),m_edited_ranges.back().GetExtendedTo()), edited_range));
705
706 TSignedSeqPos a = edited_range.GetFrom();
707 int numa = FindLowerRange(m_edited_ranges, a);
708
709 if(numa < 0 || a > m_edited_ranges[numa].GetTo()) { // a was insertion on the genome, moved to first projectable point
710 ++numa;
711 if((int)m_edited_ranges.size() == numa)
712 return TSignedSeqRange::GetEmpty();
713 a = m_edited_ranges[numa].GetFrom();
714 }
715
716 TSignedSeqPos b = edited_range.GetTo();
717 int numb = FindLowerRange(m_edited_ranges, b);
718
719 if(b > m_edited_ranges[numb].GetTo()) { // a was insertion on the genome, moved to first projectable point
720 b = m_edited_ranges[numb].GetTo();
721 }
722
723 if(m_orientation == eMinus) {
724 int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
725 TSignedSeqPos left = b;
726 TSignedSeqPos right = a;
727
728 if(left == TSignedSeqRange::GetWholeTo()) {
729 left = TSignedSeqRange::GetWholeFrom();
730 } else {
731 left = offset-left;
732 }
733
734 if(right == TSignedSeqRange::GetWholeFrom()) {
735 right = TSignedSeqRange::GetWholeTo();
736 } else {
737 right = offset-right;
738 }
739
740 a = left;
741 b = right;
742 }
743
744 return TSignedSeqRange(a, b);
745 }
746
747
ShrinkToRealPoints(TSignedSeqRange orig_range,bool snap_to_codons) const748 TSignedSeqRange CAlignMap::ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons) const {
749
750 _ASSERT(orig_range.NotEmpty() && Include(TSignedSeqRange(m_orig_ranges.front().GetExtendedFrom(),m_orig_ranges.back().GetExtendedTo()), orig_range));
751
752 TSignedSeqPos a = orig_range.GetFrom();
753 int numa = FindLowerRange(m_orig_ranges, a);
754
755 if(numa < 0 || a > m_orig_ranges[numa].GetTo()) { // a was insertion on the genome, moved to first projectable point
756 ++numa;
757 if((int)m_orig_ranges.size() == numa)
758 return TSignedSeqRange::GetEmpty();
759 a = m_orig_ranges[numa].GetFrom();
760 }
761 if(snap_to_codons) {
762 bool snapped = false;
763 while(!snapped) {
764 TSignedSeqPos tp = m_edited_ranges[numa].GetFrom()+a-m_orig_ranges[numa].GetFrom();
765 if(m_orientation == eMinus)
766 tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
767 if((m_orientation == ePlus && tp%3 == 0) || (m_orientation == eMinus && tp%3 == 2)) {
768 snapped = true;
769 } else { // not a codon boundary
770 if(a < m_orig_ranges[numa].GetTo()) { // can move in this interval
771 ++a;
772 } else { // moved to next interval
773 ++numa;
774 if((int)m_orig_ranges.size() == numa)
775 return TSignedSeqRange::GetEmpty();
776 a = m_orig_ranges[numa].GetFrom();
777 }
778 }
779 }
780 }
781
782
783 TSignedSeqPos b = orig_range.GetTo();
784 int numb = FindLowerRange(m_orig_ranges, b);
785
786 if(b > m_orig_ranges[numb].GetTo()) { // a was insertion on the genome, moved to first projectable point
787 b = m_orig_ranges[numb].GetTo();
788 }
789 if(snap_to_codons) {
790 bool snapped = false;
791 while(!snapped) {
792 TSignedSeqPos tp = m_edited_ranges[numb].GetFrom()+b-m_orig_ranges[numb].GetFrom();
793 if(m_orientation == eMinus)
794 tp = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-tp;
795 if((m_orientation == ePlus && tp%3 == 2) || (m_orientation == eMinus && tp%3==0)) {
796 snapped = true;
797 } else { // not a codon boundary
798 if(b > m_orig_ranges[numb].GetFrom()) { // can move in this interval
799 --b;
800 } else { // moved to next interval
801 --numb;
802 if(numb < 0)
803 return TSignedSeqRange::GetEmpty();
804 b = m_orig_ranges[numb].GetTo();
805 }
806 }
807 }
808 }
809
810 return TSignedSeqRange(a, b);
811 }
812
FShiftedMove(TSignedSeqPos orig_pos,int len) const813 TSignedSeqPos CAlignMap::FShiftedMove(TSignedSeqPos orig_pos, int len) const {
814 orig_pos = MapOrigToEdited(orig_pos);
815 if(orig_pos < 0)
816 return orig_pos;
817 if(m_orientation == ePlus)
818 orig_pos += len;
819 else
820 orig_pos -= len;
821 orig_pos = MapEditedToOrig(orig_pos);
822 return orig_pos;
823 }
824
825
MapAtoB(const vector<CAlignMap::SMapRange> & a,const vector<CAlignMap::SMapRange> & b,TSignedSeqPos p,ERangeEnd move_mode)826 TSignedSeqPos CAlignMap::MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode) {
827 if(p < a.front().GetExtendedFrom() || p > a.back().GetExtendedTo()) return -1;
828
829 if(p < a.front().GetFrom()) {
830 if(move_mode == eLeftEnd && b.front().GetTypeFrom() != eGgap) {
831 return b.front().GetExtendedFrom();
832 } else {
833 return -1;
834 }
835 }
836
837 if(p > a.back().GetTo()) {
838 if(move_mode == eRightEnd && b.back().GetTypeTo() != eGgap) {
839 return b.back().GetExtendedTo();
840 } else {
841 return -1;
842 }
843 }
844
845 int num = FindLowerRange(a, p); // range a[num] exists and its start <= p
846 // if a[num+1] exists all points are > p
847 if(p > a[num].GetTo()) { // between ranges (insertion or intron in a), num+1 exists
848 if(a[num].GetTypeTo() == eGgap)
849 return -1;
850
851 switch(move_mode) {
852 case eLeftEnd:
853 return b[num+1].GetExtendedFrom();
854 case eRightEnd:
855 return b[num].GetExtendedTo();
856 default:
857 return -1;
858 }
859 } else if(p == a[num].GetTo()) {
860 if(move_mode == eRightEnd && b[num].GetTypeTo() != eGgap) {
861 return b[num].GetExtendedTo();
862 } else if(p == a[num].GetFrom() && move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) { // one base interval
863 return b[num].GetExtendedFrom();
864 } else {
865 return b[num].GetTo();
866 }
867 } else if(p == a[num].GetFrom()) {
868 if(move_mode == eLeftEnd && b[num].GetTypeFrom() != eGgap) {
869 return b[num].GetExtendedFrom();
870 } else {
871 return b[num].GetFrom();
872 }
873 } else {
874 return b[num].GetFrom()+p-a[num].GetFrom();
875 }
876 }
877
MapOrigToEdited(TSignedSeqPos orig_pos) const878 TSignedSeqPos CAlignMap::MapOrigToEdited(TSignedSeqPos orig_pos) const {
879 TSignedSeqPos p = MapAtoB(m_orig_ranges, m_edited_ranges, orig_pos, eSinglePoint);
880
881 if(m_orientation == eMinus && p >= 0) {
882 p = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-p;
883 }
884 return p;
885 }
886
MapEditedToOrig(TSignedSeqPos edited_pos) const887 TSignedSeqPos CAlignMap::MapEditedToOrig(TSignedSeqPos edited_pos) const {
888 if(m_orientation == eMinus) {
889 edited_pos = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo()-edited_pos;
890 }
891
892 return MapAtoB(m_edited_ranges, m_orig_ranges, edited_pos, eSinglePoint);
893 }
894
895
MapRangeAtoB(const vector<CAlignMap::SMapRange> & a,const vector<CAlignMap::SMapRange> & b,TSignedSeqRange r,ERangeEnd lend,ERangeEnd rend)896 TSignedSeqRange CAlignMap::MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend) {
897
898 if(r.Empty()) return TSignedSeqRange::GetEmpty();
899
900 TSignedSeqPos left;
901 if(r.GetFrom() == TSignedSeqRange::GetWholeFrom()) {
902 left = TSignedSeqRange::GetWholeFrom();
903 } else {
904 left = MapAtoB(a, b, r.GetFrom(), lend);
905 if(left < 0)
906 return TSignedSeqRange::GetEmpty();
907 }
908 TSignedSeqPos right;
909 if(r.GetTo() == TSignedSeqRange::GetWholeTo()) {
910 right = TSignedSeqRange::GetWholeTo();
911 } else {
912 right = MapAtoB(a, b, r.GetTo(), rend);
913 if(right < 0)
914 return TSignedSeqRange::GetEmpty();
915 }
916
917 _ASSERT(right >= left);
918
919 return TSignedSeqRange(left, right);
920 }
921
MapRangeOrigToEdited(TSignedSeqRange orig_range,ERangeEnd lend,ERangeEnd rend) const922 TSignedSeqRange CAlignMap::MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const {
923
924 if(orig_range.Empty()) return TSignedSeqRange::GetEmpty();
925
926 TSignedSeqRange er = MapRangeAtoB(m_orig_ranges, m_edited_ranges, orig_range, lend, rend);
927
928 if(er.Empty() || m_orientation == ePlus)
929 return er;
930
931 int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
932 TSignedSeqPos left = er.GetTo();
933 TSignedSeqPos right = er.GetFrom();
934
935 if(left == TSignedSeqRange::GetWholeTo()) {
936 left = TSignedSeqRange::GetWholeFrom();
937 } else {
938 left = offset-left;
939 }
940
941 if(right == TSignedSeqRange::GetWholeFrom()) {
942 right = TSignedSeqRange::GetWholeTo();
943 } else {
944 right = offset-right;
945 }
946
947 return TSignedSeqRange(left, right);
948 }
949
MapRangeEditedToOrig(TSignedSeqRange edited_range,bool withextras) const950 TSignedSeqRange CAlignMap::MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras) const {
951
952 if(edited_range.Empty()) return TSignedSeqRange::GetEmpty();
953
954 if(m_orientation == eMinus) {
955 int offset = m_edited_ranges.front().GetExtendedFrom()+m_edited_ranges.back().GetExtendedTo();
956 TSignedSeqPos left = edited_range.GetTo();
957 TSignedSeqPos right = edited_range.GetFrom();
958
959 if(left == TSignedSeqRange::GetWholeTo()) {
960 left = TSignedSeqRange::GetWholeFrom();
961 } else {
962 left = offset-left;
963 }
964
965 if(right == TSignedSeqRange::GetWholeFrom()) {
966 right = TSignedSeqRange::GetWholeTo();
967 } else {
968 right = offset-right;
969 }
970
971 edited_range = TSignedSeqRange(left, right);
972 }
973
974 return MapRangeAtoB(m_edited_ranges, m_orig_ranges, edited_range, withextras);
975 }
976
FShiftedLen(TSignedSeqRange ab,ERangeEnd lend,ERangeEnd rend) const977 int CAlignMap::FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const {
978
979 int len = MapRangeOrigToEdited(ab, lend, rend).GetLength();
980
981 for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
982 if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i].GetFrom()))
983 len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
984 }
985
986 return len;
987 }
988
FShiftedLen(TSignedSeqRange ab,bool withextras) const989 int CAlignMap::FShiftedLen(TSignedSeqRange ab, bool withextras) const {
990
991 int len = MapRangeOrigToEdited(ab, withextras).GetLength();
992
993 for(int i = 1; i < (int)m_edited_ranges.size(); ++i) {
994 if(m_edited_ranges[i].GetTypeFrom() == eBoundary && Include(ab,m_orig_ranges[i-1].GetTo()) && Include(ab,m_orig_ranges[i].GetFrom()))
995 len -= m_edited_ranges[i].GetFrom()-m_edited_ranges[i-1].GetTo()-1;
996 }
997
998 return len;
999 }
1000
1001
1002
1003 END_SCOPE(gnomon)
1004 END_NCBI_SCOPE
1005