1 /* $Id: loc_edit.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * functions for editing and working with locations
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objtools/edit/loc_edit.hpp>
35 #include <objtools/edit/cds_fix.hpp>
36 #include <objects/seq/Seq_descr.hpp>
37 #include <objects/seq/Seqdesc.hpp>
38 #include <objects/seq/Delta_ext.hpp>
39 #include <objects/seq/Delta_seq.hpp>
40 #include <objects/seq/Seq_ext.hpp>
41 #include <objects/seq/Seq_inst.hpp>
42 #include <objects/seqfeat/Code_break.hpp>
43 #include <objects/seqfeat/Cdregion.hpp>
44 #include <objects/seqfeat/RNA_ref.hpp>
45 #include <objects/seqfeat/Trna_ext.hpp>
46 #include <objects/seqloc/Packed_seqint.hpp>
47 #include <objects/seqloc/Packed_seqpnt.hpp>
48 #include <objects/seqloc/Seq_bond.hpp>
49 #include <objects/seqloc/Seq_interval.hpp>
50 #include <objects/seqloc/Seq_loc_equiv.hpp>
51 #include <objects/seqloc/Seq_loc_mix.hpp>
52 #include <objects/seqloc/Seq_point.hpp>
53 #include <objects/general/Int_fuzz.hpp>
54 #include <objects/misc/sequence_util_macros.hpp>
55 #include <objmgr/bioseq_handle.hpp>
56 #include <util/sequtil/sequtil_convert.hpp>
57 #include <objmgr/util/sequence.hpp>
58 #include <objmgr/seq_vector.hpp>
59
60 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)61 BEGIN_SCOPE(objects)
62 BEGIN_SCOPE(edit)
63
64 string PrintBestSeqId(const CSeq_id& sid, CScope& scope)
65 {
66 string best_id(kEmptyStr);
67
68 // Best seq_id
69 CSeq_id_Handle sid_hl = sequence::GetId(sid, scope, sequence::eGetId_Best);
70 if (sid_hl) {
71 CConstRef <CSeq_id> new_id = sid_hl.GetSeqId();
72 if (new_id) {
73 best_id = sid_hl.GetSeqId()->AsFastaString();
74 }
75 }
76 else best_id = sid.AsFastaString();
77
78 return best_id;
79 };
80
81 static const string strand_symbol[] = {"", "", "c","b", "r"};
PrintSeqIntUseBestID(const CSeq_interval & seq_int,CScope & scope,bool range_only)82 string PrintSeqIntUseBestID(const CSeq_interval& seq_int, CScope& scope, bool range_only)
83 {
84 string location(kEmptyStr);
85
86 // Best seq_id
87 if (!range_only) {
88 location = PrintBestSeqId(seq_int.GetId(), scope) + ":";
89 }
90
91 // strand
92 ENa_strand
93 this_strand
94 = (seq_int.CanGetStrand()) ? seq_int.GetStrand(): eNa_strand_unknown;
95 location += strand_symbol[(int)this_strand];
96 int from, to;
97 string lab_from(kEmptyStr), lab_to(kEmptyStr);
98 if (eNa_strand_minus == this_strand
99 || eNa_strand_both_rev == this_strand) {
100 to = seq_int.GetFrom();
101 from = seq_int.GetTo();
102 if (seq_int.CanGetFuzz_to()) {
103 const CInt_fuzz& f_from = seq_int.GetFuzz_to();
104 f_from.GetLabel(&lab_from, from, false);
105 }
106 else lab_from = NStr::IntToString(++from);
107 if (seq_int.CanGetFuzz_from()) {
108 const CInt_fuzz& f_to = seq_int.GetFuzz_from();
109 f_to.GetLabel(&lab_to, to);
110 }
111 else lab_to = NStr::IntToString(++to);
112 }
113 else {
114 to = seq_int.GetTo();
115 from = seq_int.GetFrom();
116 if (seq_int.CanGetFuzz_from()) {
117 const CInt_fuzz& f_from = seq_int.GetFuzz_from();
118 f_from.GetLabel(&lab_from, from, false);
119 }
120 else lab_from = NStr::IntToString(++from);
121 if (seq_int.CanGetFuzz_to()) {
122 const CInt_fuzz& f_to = seq_int.GetFuzz_to();
123 f_to.GetLabel(&lab_to, to);
124 }
125 else lab_to = NStr::IntToString(++to);
126 }
127 location += lab_from + "-" + lab_to;
128 return location;
129 };
130
PrintPntAndPntsUseBestID(const CSeq_loc & seq_loc,CScope & scope,bool range_only)131 string PrintPntAndPntsUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
132 {
133 string location(kEmptyStr);
134
135 // Best seq_id
136 if (!range_only) {
137 if (seq_loc.IsPnt()) {
138 location = PrintBestSeqId(seq_loc.GetPnt().GetId(), scope) + ":";
139 }
140 else if (seq_loc.IsPacked_pnt()) {
141 location = PrintBestSeqId(seq_loc.GetPacked_pnt().GetId(), scope) + ":";
142 }
143 }
144
145 if (!location.empty()) {
146 string strtmp;
147 seq_loc.GetLabel(&strtmp);
148 location += strtmp.substr(strtmp.find(":")+1);
149 }
150 return location;
151 }
152
SeqLocPrintUseBestID(const CSeq_loc & seq_loc,CScope & scope,bool range_only)153 string SeqLocPrintUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
154 {
155 string location(kEmptyStr);
156 if (seq_loc.IsInt()) {
157 location = PrintSeqIntUseBestID(seq_loc.GetInt(), scope, range_only);
158 }
159 else if (seq_loc.IsMix() || seq_loc.IsEquiv()) {
160 location = "(";
161 const list <CRef <CSeq_loc> >* seq_loc_ls;
162 if (seq_loc.IsMix()) {
163 seq_loc_ls = &(seq_loc.GetMix().Get());
164 }
165 else {
166 seq_loc_ls = &(seq_loc.GetEquiv().Get());
167 }
168 ITERATE (list <CRef <CSeq_loc> >, it, *seq_loc_ls) {
169 if (it == seq_loc.GetMix().Get().begin()) {
170 location += SeqLocPrintUseBestID(**it, scope, range_only);
171 }
172 else location += SeqLocPrintUseBestID(**it, scope, true);
173 location += ", ";
174 }
175 if (!location.empty()) {
176 location = location.substr(0, location.size()-2);
177 }
178 location += ")";
179 }
180 else if (seq_loc.IsPacked_int()) {
181 location = "(";
182 ITERATE (list <CRef <CSeq_interval> >, it, seq_loc.GetPacked_int().Get()) {
183 if (it == seq_loc.GetPacked_int().Get().begin()) {
184 location += PrintSeqIntUseBestID(**it, scope, range_only);
185 }
186 else {
187 location += PrintSeqIntUseBestID(**it, scope, true);
188 }
189 location += ", ";
190 }
191 if (!location.empty()) {
192 location = location.substr(0, location.size()-2);
193 }
194 location += ")";
195 }
196 else if (seq_loc.IsPnt() || seq_loc.IsPacked_pnt()) {
197 location = PrintPntAndPntsUseBestID(seq_loc, scope, range_only);
198 }
199 else if (seq_loc.IsBond()) {
200 CSeq_loc tmp_loc;
201 tmp_loc.SetBond().Assign(seq_loc.GetBond().GetA());
202 location = PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
203 if (seq_loc.GetBond().CanGetB()) {
204 tmp_loc.SetBond().Assign(seq_loc.GetBond().GetB());
205 location
206 += "=" + PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
207 }
208 }
209 else {
210 seq_loc.GetLabel(&location);
211 }
212 return location;
213 }
214
215
Is5AtEndOfSeq(const CSeq_loc & loc,CBioseq_Handle bsh)216 bool CLocationEditPolicy::Is5AtEndOfSeq(const CSeq_loc& loc, CBioseq_Handle bsh)
217 {
218 bool rval = false;
219
220 ENa_strand strand = loc.GetStrand();
221 if (strand == eNa_strand_minus) {
222 if (bsh && loc.GetStart(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
223 rval = true;
224 }
225 } else {
226 if (loc.GetStart(eExtreme_Biological) == 0) {
227 rval = true;
228 }
229 }
230 return rval;
231 }
232
233
Is3AtEndOfSeq(const CSeq_loc & loc,CBioseq_Handle bsh)234 bool CLocationEditPolicy::Is3AtEndOfSeq(const CSeq_loc& loc, CBioseq_Handle bsh)
235 {
236 bool rval = false;
237 ENa_strand strand = loc.GetStrand();
238
239 if (strand == eNa_strand_minus) {
240 if (loc.GetStop(eExtreme_Biological) == 0) {
241 rval = true;
242 }
243 } else {
244 if (bsh && loc.GetStop(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
245 rval = true;
246 }
247 }
248 return rval;
249 }
250
251
Is5AtEndOfSeq(const CSeq_loc & loc,CScope & scope,bool & confident)252 bool CLocationEditPolicy::Is5AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
253 {
254 bool rval = false;
255 confident = true;
256 CSeq_loc_CI first_l(loc);
257 if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
258 // positive strand, just need to know if it's at zero
259 rval = (first_l.GetRange().GetFrom() == 0);
260 } else {
261 // negative strand
262 try {
263 CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
264 rval = (first_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
265 } catch (CException&) {
266 confident = false;
267 }
268 }
269 return rval;
270 }
271
272
Is3AtEndOfSeq(const CSeq_loc & loc,CScope & scope,bool & confident)273 bool CLocationEditPolicy::Is3AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
274 {
275 bool rval = false;
276 confident = true;
277 CSeq_loc_CI last_l(loc);
278 size_t num_intervals = last_l.GetSize();
279 last_l.SetPos(num_intervals - 1);
280 if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
281 // positive strand
282 try {
283 CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
284 if (bsh) {
285 rval = (last_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
286 } else {
287 confident = false;
288 }
289 } catch (CException&) {
290 confident = false;
291 }
292 } else {
293 // negative strand, just need to know if it's at zero
294 rval = (last_l.GetRange().GetFrom() == 0);
295 }
296 return rval;
297 }
298
299
Interpret5Policy(const CSeq_feat & orig_feat,CScope & scope,bool & do_set_5_partial,bool & do_clear_5_partial) const300 bool CLocationEditPolicy::Interpret5Policy
301 (const CSeq_feat& orig_feat,
302 CScope& scope,
303 bool& do_set_5_partial,
304 bool& do_clear_5_partial) const
305 {
306 do_set_5_partial = false;
307 do_clear_5_partial = false;
308 const CSeq_loc& loc = orig_feat.GetLocation();
309
310 switch (m_PartialPolicy5) {
311 case ePartialPolicy_eNoChange:
312 // do nothing
313 break;
314 case ePartialPolicy_eSet:
315 if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
316 do_set_5_partial = true;
317 } else if (m_Extend5) {
318 bool confident = false;
319 bool at_5 = Is5AtEndOfSeq(loc, scope, confident);
320 if (confident && !at_5) {
321 do_set_5_partial = true;
322 }
323 }
324 break;
325 case ePartialPolicy_eSetAtEnd:
326 if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
327 bool confident = false;
328 if (Is5AtEndOfSeq(loc, scope, confident) && confident) {
329 do_set_5_partial = true;
330 }
331 }
332 break;
333 case ePartialPolicy_eSetForBadEnd:
334 if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
335 && orig_feat.GetData().IsCdregion()) {
336 string transl_prot;
337 bool confident = true;
338 try {
339 CSeqTranslator::Translate(orig_feat, scope, transl_prot,
340 false, // do not include stop codons
341 false); // do not remove trailing X/B/Z
342
343 } catch ( const CException& ) {
344 confident = false;
345 }
346 if (confident && !NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
347 do_set_5_partial = true;
348 }
349 }
350 break;
351 case ePartialPolicy_eSetForFrame:
352 if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
353 && orig_feat.GetData().IsCdregion()
354 && orig_feat.GetData().GetCdregion().IsSetFrame()
355 && orig_feat.GetData().GetCdregion().GetFrame() != CCdregion::eFrame_not_set
356 && orig_feat.GetData().GetCdregion().GetFrame() != CCdregion::eFrame_one) {
357 do_set_5_partial = true;
358 }
359 break;
360 case ePartialPolicy_eClear:
361 if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
362 do_clear_5_partial = true;
363 }
364 break;
365 case ePartialPolicy_eClearNotAtEnd:
366 if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
367 bool confident = false;
368 if (!Is5AtEndOfSeq(loc, scope, confident) && confident) {
369 do_clear_5_partial = true;
370 }
371 }
372 break;
373 case ePartialPolicy_eClearForGoodEnd:
374 if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
375 && orig_feat.GetData().IsCdregion()
376 && (!orig_feat.GetData().GetCdregion().IsSetFrame() ||
377 orig_feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_not_set ||
378 orig_feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_one)) {
379 string transl_prot;
380 bool confident = true;
381 try {
382 CSeqTranslator::Translate(orig_feat, scope, transl_prot,
383 false, // do not include stop codons
384 false); // do not remove trailing X/B/Z
385
386 } catch ( const CException& ) {
387 confident = false;
388 }
389 if (confident && NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
390 do_clear_5_partial = true;
391 }
392 }
393 break;
394 }
395 return do_set_5_partial || do_clear_5_partial;
396 }
397
398
Interpret3Policy(const CSeq_feat & orig_feat,CScope & scope,bool & do_set_3_partial,bool & do_clear_3_partial) const399 bool CLocationEditPolicy::Interpret3Policy
400 (const CSeq_feat& orig_feat,
401 CScope& scope,
402 bool& do_set_3_partial,
403 bool& do_clear_3_partial) const
404 {
405 do_set_3_partial = false;
406 do_clear_3_partial = false;
407 const CSeq_loc& loc = orig_feat.GetLocation();
408
409 switch (m_PartialPolicy3) {
410 case ePartialPolicy_eNoChange:
411 // do nothing
412 break;
413 case ePartialPolicy_eSet:
414 if (!loc.IsPartialStop(eExtreme_Biological)) {
415 do_set_3_partial = true;
416 } else if (m_Extend3) {
417 bool confident = false;
418 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
419 if (confident && !at_3) {
420 do_set_3_partial = true;
421 }
422 }
423 break;
424 case ePartialPolicy_eSetAtEnd:
425 if (!loc.IsPartialStop(eExtreme_Biological)) {
426 bool confident = false;
427 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
428 if (at_3 && confident) {
429 do_set_3_partial = true;
430 }
431 }
432 break;
433 case ePartialPolicy_eSetForBadEnd:
434 if (!loc.IsPartialStop(eExtreme_Biological)
435 && orig_feat.GetData().IsCdregion()) {
436 string transl_prot;
437 bool confident = true;
438 try {
439 CSeqTranslator::Translate(orig_feat, scope, transl_prot,
440 true, // include stop codons
441 false); // do not remove trailing X/B/Z
442
443 } catch ( const CException& ) {
444 confident = false;
445 }
446 if (confident && !NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
447 do_set_3_partial = true;
448 }
449 }
450 break;
451 case ePartialPolicy_eSetForFrame:
452 // not allowed for 3' end
453 break;
454 case ePartialPolicy_eClear:
455 if (loc.IsPartialStop(eExtreme_Biological)) {
456 do_clear_3_partial = true;
457 }
458 break;
459 case ePartialPolicy_eClearNotAtEnd:
460 if (loc.IsPartialStop(eExtreme_Biological)) {
461 bool confident = false;
462 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
463 if (!at_3 && confident) {
464 do_clear_3_partial = true;
465 }
466 }
467 break;
468 case ePartialPolicy_eClearForGoodEnd:
469 if (loc.IsPartialStop(eExtreme_Biological)
470 && orig_feat.GetData().IsCdregion()) {
471 string transl_prot;
472 bool confident = true;
473 try {
474 CSeqTranslator::Translate(orig_feat, scope, transl_prot,
475 true, // include stop codons
476 false); // do not remove trailing X/B/Z
477
478 } catch ( const CException& ) {
479 confident = false;
480 }
481 if (confident && NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
482 do_clear_3_partial = true;
483 }
484 }
485 break;
486 }
487 return do_set_3_partial || do_clear_3_partial;
488 }
489
490
SeqLocExtend5(const CSeq_loc & loc,TSeqPos pos,CScope * scope)491 CRef<CSeq_loc> SeqLocExtend5(const CSeq_loc& loc, TSeqPos pos, CScope* scope)
492 {
493 CSeq_loc_CI first_l(loc);
494 CConstRef<CSeq_loc> first_loc = first_l.GetRangeAsSeq_loc();
495
496 TSeqPos loc_start = first_loc->GetStart(eExtreme_Biological);
497 bool partial_start = first_loc->IsPartialStart(eExtreme_Biological);
498 ENa_strand strand = first_loc->IsSetStrand() ? first_loc->GetStrand() : eNa_strand_plus;
499 CRef<CSeq_loc> new_loc(NULL);
500
501 CRef<CSeq_id> id(new CSeq_id());
502 id->Assign(first_l.GetSeq_id());
503
504 if (pos < loc_start && strand != eNa_strand_minus) {
505 CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_start - 1, strand));
506 add->SetPartialStart(partial_start, eExtreme_Positional);
507 new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
508 } else if (pos > loc_start && strand == eNa_strand_minus) {
509 CRef<CSeq_loc> add(new CSeq_loc(*id, loc_start + 1, pos, strand));
510 add->SetPartialStop(partial_start, eExtreme_Positional);
511 new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
512 }
513 return new_loc;
514 }
515
516
SeqLocExtend3(const CSeq_loc & loc,TSeqPos pos,CScope * scope)517 CRef<CSeq_loc> SeqLocExtend3(const CSeq_loc& loc, TSeqPos pos, CScope* scope)
518 {
519 CSeq_loc_CI last_l(loc);
520 size_t num_intervals = last_l.GetSize();
521 last_l.SetPos(num_intervals - 1);
522 CConstRef<CSeq_loc> last_loc = last_l.GetRangeAsSeq_loc();
523
524 TSeqPos loc_stop = last_loc->GetStop(eExtreme_Biological);
525 bool partial_stop = last_loc->IsPartialStop(eExtreme_Biological);
526 ENa_strand strand = last_loc->IsSetStrand() ? last_loc->GetStrand() : eNa_strand_plus;
527 CRef<CSeq_loc> new_loc(NULL);
528
529 CRef<CSeq_id> id(new CSeq_id());
530 id->Assign(last_l.GetSeq_id());
531
532 if (pos > loc_stop && strand != eNa_strand_minus) {
533 CRef<CSeq_loc> add(new CSeq_loc(*id, loc_stop + 1, pos, strand));
534 add->SetPartialStop(partial_stop, eExtreme_Positional);
535 new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
536 } else if (pos < loc_stop && strand == eNa_strand_minus) {
537 CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_stop - 1, strand));
538 add->SetPartialStart(partial_stop, eExtreme_Positional);
539 new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
540 }
541 return new_loc;
542 }
543
544
545
SeqLocExtend(const CSeq_loc & loc,size_t pos,CScope * scope)546 CRef<CSeq_loc> SeqLocExtend(const CSeq_loc& loc, size_t pos, CScope* scope)
547 {
548 TSeqPos loc_start = loc.GetStart(eExtreme_Positional);
549 TSeqPos loc_stop = loc.GetStop(eExtreme_Positional);
550 bool partial_start = loc.IsPartialStart(eExtreme_Positional);
551 bool partial_stop = loc.IsPartialStop(eExtreme_Positional);
552 ENa_strand strand = loc.GetStrand();
553 CRef<CSeq_loc> new_loc(NULL);
554
555 if (pos < loc_start) {
556 CRef<CSeq_id> id(new CSeq_id());
557 id->Assign(*(loc.GetId()));
558 CRef<CSeq_loc> add(
559 new CSeq_loc(*id, static_cast<TSeqPos>(pos), loc_start - 1, strand));
560 add->SetPartialStart(partial_start, eExtreme_Positional);
561 new_loc = sequence::Seq_loc_Add(
562 loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
563 } else if (pos > loc_stop) {
564 CRef<CSeq_id> id(new CSeq_id());
565 id->Assign(*(loc.GetId()));
566 CRef<CSeq_loc> add(new CSeq_loc(
567 *id, loc_stop + 1, static_cast<TSeqPos>(pos), strand));
568 add->SetPartialStop(partial_stop, eExtreme_Positional);
569 new_loc = sequence::Seq_loc_Add(
570 loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
571 }
572 return new_loc;
573 }
574
575
ApplyPolicyToFeature(CSeq_feat & feat,CScope & scope) const576 bool CLocationEditPolicy::ApplyPolicyToFeature(CSeq_feat& feat, CScope& scope) const
577 {
578 if (m_PartialPolicy5 == ePartialPolicy_eNoChange
579 && m_PartialPolicy3 == ePartialPolicy_eNoChange
580 && m_MergePolicy == eMergePolicy_NoChange) {
581 return false;
582 }
583
584 bool any_change = false;
585
586 // make changes to 5' end
587 bool do_set_5_partial = false;
588 bool do_clear_5_partial = false;
589 any_change |= Interpret5Policy(feat, scope, do_set_5_partial, do_clear_5_partial);
590 if (do_set_5_partial) {
591 feat.SetLocation().SetPartialStart(true, eExtreme_Biological);
592 if (m_Extend5) {
593 // extend end
594 Extend5(feat, scope);
595 }
596 } else if (do_clear_5_partial) {
597 feat.SetLocation().SetPartialStart(false, eExtreme_Biological);
598 }
599
600 // make changes to 3' end
601 bool do_set_3_partial = false;
602 bool do_clear_3_partial = false;
603 any_change |= Interpret3Policy(feat, scope, do_set_3_partial, do_clear_3_partial);
604 if (do_set_3_partial) {
605 feat.SetLocation().SetPartialStop(true, eExtreme_Biological);
606 if (m_Extend3) {
607 // extend end
608 Extend3(feat, scope);
609 }
610 } else if (do_clear_3_partial) {
611 feat.SetLocation().SetPartialStop(false, eExtreme_Biological);
612 }
613
614 // make merge changes
615 switch (m_MergePolicy) {
616 case CLocationEditPolicy::eMergePolicy_Join:
617 {
618 // remove NULLS between, if present
619 bool changed = false;
620 CRef<CSeq_loc> new_loc = ConvertToJoin(feat.GetLocation(), changed);
621 if (changed) {
622 feat.SetLocation().Assign(*new_loc);
623 any_change = true;
624 }
625 }
626 break;
627 case CLocationEditPolicy::eMergePolicy_Order:
628 {
629 // add NULLS between if not present
630 bool changed = false;
631 CRef<CSeq_loc> new_loc = ConvertToOrder(feat.GetLocation(), changed);
632 if (changed) {
633 feat.SetLocation().Assign(*new_loc);
634 any_change = true;
635 }
636 }
637 break;
638 case CLocationEditPolicy::eMergePolicy_SingleInterval:
639 {
640 CRef<CSeq_loc> new_loc = sequence::Seq_loc_Merge(feat.GetLocation(), CSeq_loc::fMerge_SingleRange, &scope);
641 if (sequence::Compare(*new_loc, feat.GetLocation(), &scope, sequence::fCompareOverlapping) != sequence::eSame) {
642 feat.SetLocation().Assign(*new_loc);
643 any_change = true;
644 }
645 }
646 break;
647 case CLocationEditPolicy::eMergePolicy_NoChange:
648 break;
649 }
650
651 any_change |= feature::AdjustFeaturePartialFlagForLocation(feat);
652
653 return any_change;
654 }
655
656
HasNulls(const CSeq_loc & orig_loc)657 bool CLocationEditPolicy::HasNulls(const CSeq_loc& orig_loc)
658 {
659 if (orig_loc.Which() == CSeq_loc::e_Mix) {
660 ITERATE(CSeq_loc_mix::Tdata, it, orig_loc.GetMix().Get()) {
661 if ((*it)->IsNull()) {
662 return true;
663 }
664 }
665 }
666 return false;
667 }
668
669
ConvertToJoin(const CSeq_loc & orig_loc,bool & changed)670 CRef<CSeq_loc> CLocationEditPolicy::ConvertToJoin(const CSeq_loc& orig_loc, bool &changed)
671 {
672 changed = false;
673 CRef<CSeq_loc> new_loc(new CSeq_loc());
674 if (!HasNulls(orig_loc)) {
675 new_loc->Assign(orig_loc);
676 } else {
677 CSeq_loc_CI ci(orig_loc);
678 new_loc->SetMix();
679 while (ci) {
680 CConstRef<CSeq_loc> subloc = ci.GetRangeAsSeq_loc();
681 if (subloc && !subloc->IsNull()) {
682 CRef<CSeq_loc> add(new CSeq_loc());
683 add->Assign(*subloc);
684 new_loc->SetMix().Set().push_back(add);
685 }
686 ++ci;
687 }
688 changed = true;
689 }
690 return new_loc;
691 }
692
693
ConvertToOrder(const CSeq_loc & orig_loc,bool & changed)694 CRef<CSeq_loc> CLocationEditPolicy::ConvertToOrder(const CSeq_loc& orig_loc, bool &changed)
695 {
696 changed = false;
697 CRef<CSeq_loc> new_loc(new CSeq_loc());
698 if (HasNulls(orig_loc)) {
699 new_loc->Assign(orig_loc);
700 return new_loc;
701 }
702 switch(orig_loc.Which()) {
703 case CSeq_loc::e_not_set:
704 case CSeq_loc::e_Null:
705 case CSeq_loc::e_Empty:
706 case CSeq_loc::e_Whole:
707 case CSeq_loc::e_Int:
708 case CSeq_loc::e_Pnt:
709 case CSeq_loc::e_Bond:
710 case CSeq_loc::e_Feat:
711 case CSeq_loc::e_Equiv:
712 new_loc->Assign(orig_loc);
713 break;
714 case CSeq_loc::e_Packed_int:
715 case CSeq_loc::e_Packed_pnt:
716 case CSeq_loc::e_Mix:
717 {
718 new_loc->SetMix();
719 CSeq_loc_CI ci(orig_loc);
720 CRef<CSeq_loc> first(new CSeq_loc());
721 first->Assign(*(ci.GetRangeAsSeq_loc()));
722 new_loc->SetMix().Set().push_back(first);
723 ++ci;
724 while (ci) {
725 CRef<CSeq_loc> null_loc(new CSeq_loc());
726 null_loc->SetNull();
727 new_loc->SetMix().Set().push_back(null_loc);
728 CRef<CSeq_loc> add(new CSeq_loc());
729 add->Assign(*(ci.GetRangeAsSeq_loc()));
730 new_loc->SetMix().Set().push_back(add);
731 ++ci;
732 }
733 changed = true;
734 }
735 break;
736
737 }
738 return new_loc;
739 }
740
AdjustFrameFor5Extension(CSeq_feat & feat,size_t diff)741 void AdjustFrameFor5Extension(CSeq_feat& feat, size_t diff)
742 {
743 // adjust frame to maintain consistency
744 if (diff % 3 > 0 && feat.GetData().IsCdregion()) {
745 int orig_frame = 1;
746 if (feat.GetData().GetCdregion().IsSetFrame()) {
747 if (feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_two) {
748 orig_frame = 2;
749 } else if (feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_three) {
750 orig_frame = 3;
751 }
752 }
753 CCdregion::EFrame new_frame = CCdregion::eFrame_not_set;
754 switch ((orig_frame + diff % 3) % 3) {
755 case 1:
756 new_frame = CCdregion::eFrame_not_set;
757 break;
758 case 2:
759 new_frame = CCdregion::eFrame_two;
760 break;
761 case 0:
762 new_frame = CCdregion::eFrame_three;
763 break;
764 }
765 feat.SetData().SetCdregion().SetFrame(new_frame);
766 }
767 }
768
769
Extend5(CSeq_feat & feat,CScope & scope)770 bool CLocationEditPolicy::Extend5(CSeq_feat& feat, CScope& scope)
771 {
772 bool extend = false;
773
774 bool confident = false;
775 if (!Is5AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
776 int diff = 0;
777 CSeq_loc_CI first_l(feat.GetLocation());
778 if (first_l.IsSetStrand() && first_l.GetStrand() == eNa_strand_minus) {
779 CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
780 diff = bsh.GetInst().GetLength() - first_l.GetRange().GetTo() - 1;
781 CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
782 if (new_loc) {
783 feat.SetLocation().Assign(*new_loc);
784 extend = true;
785 } else {
786 diff = 0;
787 }
788 } else {
789 diff = first_l.GetRange().GetFrom();
790 CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), 0, &scope);
791 if (new_loc) {
792 feat.SetLocation().Assign(*new_loc);
793 extend = true;
794 } else {
795 diff = 0;
796 }
797 }
798
799 // adjust frame to maintain consistency
800 AdjustFrameFor5Extension(feat, diff);
801 }
802 return extend;
803 }
804
Extend3(CSeq_feat & feat,CScope & scope)805 bool CLocationEditPolicy::Extend3(CSeq_feat& feat, CScope& scope)
806 {
807 bool extend = false;
808
809 bool confident = false;
810 if (!Is3AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
811 CSeq_loc_CI last_l(feat.GetLocation());
812 size_t num_intervals = last_l.GetSize();
813 last_l.SetPos(num_intervals - 1);
814
815 ENa_strand strand = last_l.GetStrand();
816 if (strand == eNa_strand_minus) {
817 CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), 0, &scope);
818 if (new_loc) {
819 feat.SetLocation().Assign(*new_loc);
820 extend = true;
821 }
822 } else {
823 CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
824 CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
825 if (new_loc) {
826 feat.SetLocation().Assign(*new_loc);
827 extend = true;
828 }
829 }
830 }
831 return extend;
832 }
833
834
ApplyPolicyToFeature(const CLocationEditPolicy & policy,const CSeq_feat & orig_feat,CScope & scope,bool adjust_gene,bool retranslate_cds)835 bool ApplyPolicyToFeature(const CLocationEditPolicy& policy, const CSeq_feat& orig_feat,
836 CScope& scope, bool adjust_gene, bool retranslate_cds)
837 {
838 CRef<CSeq_feat> new_feat(new CSeq_feat());
839 new_feat->Assign(orig_feat);
840
841 bool any_change = policy.ApplyPolicyToFeature(*new_feat, scope);
842 if (any_change) {
843 CSeq_feat_Handle fh = scope.GetSeq_featHandle(orig_feat);
844 // This is necessary, to make sure that we are in "editing mode"
845 const CSeq_annot_Handle& annot_handle = fh.GetAnnot();
846 CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
847 CSeq_feat_EditHandle feh(fh);
848
849 // adjust gene feature
850 if (adjust_gene) {
851 CConstRef<CSeq_feat> old_gene = sequence::GetOverlappingGene(orig_feat.GetLocation(), scope);
852 if (old_gene) {
853 TSeqPos feat_start = new_feat->GetLocation().GetStart(eExtreme_Biological);
854 TSeqPos feat_stop = new_feat->GetLocation().GetStop(eExtreme_Biological);
855 CRef<CSeq_feat> new_gene(new CSeq_feat());
856 new_gene->Assign(*old_gene);
857 bool gene_change = false;
858 // adjust ends of gene to match ends of feature
859 CRef<CSeq_loc> new_loc = SeqLocExtend5(new_gene->GetLocation(), feat_start, &scope);
860 if (new_loc) {
861 new_gene->SetLocation().Assign(*new_loc);
862 gene_change = true;
863 }
864 new_loc = SeqLocExtend3(new_gene->GetLocation(), feat_stop, &scope);
865 if (new_loc) {
866 new_gene->SetLocation().Assign(*new_loc);
867 gene_change = true;
868 }
869 if (gene_change) {
870 CSeq_feat_Handle gh = scope.GetSeq_featHandle(*old_gene);
871 // This is necessary, to make sure that we are in "editing mode"
872 const CSeq_annot_Handle& ah = gh.GetAnnot();
873 CSeq_entry_EditHandle egh = ah.GetParentEntry().GetEditHandle();
874 CSeq_feat_EditHandle geh(gh);
875 geh.Replace(*new_gene);
876 }
877 }
878 }
879 feh.Replace(*new_feat);
880
881 // retranslate or resynch if coding region
882 if (new_feat->IsSetProduct() && new_feat->GetData().IsCdregion()) {
883 if (!retranslate_cds || !feature::RetranslateCDS(*new_feat, scope)) {
884 CSeq_loc_CI l(new_feat->GetLocation());
885 feature::AdjustForCDSPartials(*new_feat, scope);
886 }
887 }
888 }
889 return any_change;
890 }
891
892
ReverseComplementLocation(CSeq_interval & interval,CScope & scope)893 void ReverseComplementLocation(CSeq_interval& interval, CScope& scope)
894 {
895 // flip strand
896 interval.FlipStrand();
897 if (interval.IsSetId()) {
898 CBioseq_Handle bsh = scope.GetBioseqHandle(interval.GetId());
899 if (bsh) {
900 if (interval.IsSetFrom()) {
901 interval.SetFrom(bsh.GetInst_Length() - interval.GetFrom() - 1);
902 }
903 if (interval.IsSetTo()) {
904 interval.SetTo(bsh.GetInst_Length() - interval.GetTo() - 1);
905 }
906
907 // reverse from and to
908 if (interval.IsSetFrom()) {
909 TSeqPos from = interval.GetFrom();
910 if (interval.IsSetTo()) {
911 interval.SetFrom(interval.GetTo());
912 } else {
913 interval.ResetFrom();
914 }
915 interval.SetTo(from);
916 } else if (interval.IsSetTo()) {
917 interval.SetFrom(interval.GetTo());
918 interval.ResetTo();
919 }
920
921 if (interval.IsSetFuzz_from()) {
922 interval.SetFuzz_from().Negate(bsh.GetInst_Length());
923 }
924 if (interval.IsSetFuzz_to()) {
925 interval.SetFuzz_to().Negate(bsh.GetInst_Length());
926 }
927
928 // swap fuzz
929 if (interval.IsSetFuzz_from()) {
930 CRef<CInt_fuzz> swap(new CInt_fuzz());
931 swap->Assign(interval.GetFuzz_from());
932 if (interval.IsSetFuzz_to()) {
933 interval.SetFuzz_from().Assign(interval.GetFuzz_to());
934 } else {
935 interval.ResetFuzz_from();
936 }
937 interval.SetFuzz_to(*swap);
938 } else if (interval.IsSetFuzz_to()) {
939 interval.SetFuzz_from().Assign(interval.GetFuzz_to());
940 interval.ResetFuzz_to();
941 }
942 }
943 }
944 }
945
946
ReverseComplementLocation(CSeq_point & pnt,CScope & scope)947 void ReverseComplementLocation(CSeq_point& pnt, CScope& scope)
948 {
949 // flip strand
950 pnt.FlipStrand();
951 if (pnt.IsSetId()) {
952 CBioseq_Handle bsh = scope.GetBioseqHandle(pnt.GetId());
953 if (bsh) {
954 if (pnt.IsSetPoint()) {
955 pnt.SetPoint(bsh.GetInst_Length() - pnt.GetPoint() - 1);
956 }
957 if (pnt.IsSetFuzz()) {
958 pnt.SetFuzz().Negate(bsh.GetInst_Length());
959 }
960 }
961 }
962 }
963
964
ReverseComplementLocation(CPacked_seqpnt & ppnt,CScope & scope)965 void ReverseComplementLocation(CPacked_seqpnt& ppnt, CScope& scope)
966 {
967 // flip strand
968 ppnt.FlipStrand();
969 CBioseq_Handle bsh = scope.GetBioseqHandle(ppnt.GetId());
970 if (bsh) {
971 // flip fuzz
972 if (ppnt.IsSetFuzz()) {
973 ppnt.SetFuzz().Negate(bsh.GetInst_Length());
974 }
975 //complement points
976 if (ppnt.IsSetPoints()) {
977 vector<int> new_pnts;
978 ITERATE(CPacked_seqpnt::TPoints, it, ppnt.SetPoints()) {
979 new_pnts.push_back(bsh.GetInst_Length() - *it - 1);
980 }
981 ppnt.ResetPoints();
982 ITERATE(vector<int>, it, new_pnts) {
983 ppnt.SetPoints().push_back(*it);
984 }
985 }
986 }
987
988 }
989
990
ReverseComplementLocation(CSeq_loc & loc,CScope & scope)991 void ReverseComplementLocation(CSeq_loc& loc, CScope& scope)
992 {
993 switch (loc.Which()) {
994 case CSeq_loc::e_Empty:
995 case CSeq_loc::e_Whole:
996 case CSeq_loc::e_Feat:
997 case CSeq_loc::e_Null:
998 case CSeq_loc::e_not_set:
999 // do nothing
1000 break;
1001 case CSeq_loc::e_Int:
1002 ReverseComplementLocation(loc.SetInt(), scope);
1003 loc.InvalidateCache();
1004 break;
1005 case CSeq_loc::e_Pnt:
1006 ReverseComplementLocation(loc.SetPnt(), scope);
1007 loc.InvalidateCache();
1008 break;
1009 case CSeq_loc::e_Bond:
1010 if (loc.GetBond().IsSetA()) {
1011 ReverseComplementLocation(loc.SetBond().SetA(), scope);
1012 }
1013 if (loc.GetBond().IsSetB()) {
1014 ReverseComplementLocation(loc.SetBond().SetB(), scope);
1015 }
1016 loc.InvalidateCache();
1017 break;
1018 case CSeq_loc::e_Mix:
1019 // revcomp individual elements
1020 NON_CONST_ITERATE(CSeq_loc_mix::Tdata, it, loc.SetMix().Set()) {
1021 ReverseComplementLocation(**it, scope);
1022 }
1023 loc.InvalidateCache();
1024 break;
1025 case CSeq_loc::e_Equiv:
1026 // revcomp individual elements
1027 NON_CONST_ITERATE(CSeq_loc_equiv::Tdata, it, loc.SetEquiv().Set()) {
1028 ReverseComplementLocation(**it, scope);
1029 }
1030 loc.InvalidateCache();
1031 break;
1032 case CSeq_loc::e_Packed_int:
1033 // revcomp individual elements
1034 NON_CONST_ITERATE(CPacked_seqint::Tdata, it, loc.SetPacked_int().Set()) {
1035 ReverseComplementLocation(**it, scope);
1036 }
1037 loc.InvalidateCache();
1038 break;
1039 case CSeq_loc::e_Packed_pnt:
1040 ReverseComplementLocation(loc.SetPacked_pnt(), scope);
1041 loc.InvalidateCache();
1042 break;
1043
1044 }
1045 }
1046
1047
ReverseComplementCDRegion(CCdregion & cdr,CScope & scope)1048 void ReverseComplementCDRegion(CCdregion& cdr, CScope& scope)
1049 {
1050 if (cdr.IsSetCode_break()) {
1051 NON_CONST_ITERATE(CCdregion::TCode_break, it, cdr.SetCode_break()) {
1052 if ((*it)->IsSetLoc()) {
1053 ReverseComplementLocation((*it)->SetLoc(), scope);
1054 }
1055 }
1056 }
1057 }
1058
1059
ReverseComplementTrna(CTrna_ext & trna,CScope & scope)1060 void ReverseComplementTrna(CTrna_ext& trna, CScope& scope)
1061 {
1062 if (trna.IsSetAnticodon()) {
1063 ReverseComplementLocation(trna.SetAnticodon(), scope);
1064 }
1065 }
1066
1067
ReverseComplementFeature(CSeq_feat & feat,CScope & scope)1068 void ReverseComplementFeature(CSeq_feat& feat, CScope& scope)
1069 {
1070 if (feat.IsSetLocation()) {
1071 ReverseComplementLocation(feat.SetLocation(), scope);
1072 }
1073 if (feat.IsSetData()) {
1074 switch (feat.GetData().GetSubtype()) {
1075 case CSeqFeatData::eSubtype_cdregion:
1076 ReverseComplementCDRegion(feat.SetData().SetCdregion(), scope);
1077 break;
1078 case CSeqFeatData::eSubtype_tRNA:
1079 ReverseComplementTrna(feat.SetData().SetRna().SetExt().SetTRNA(), scope);
1080 break;
1081 default:
1082 break;
1083 }
1084 }
1085 }
1086
1087
OkToAdjustLoc(const CSeq_interval & interval,const CSeq_id * seqid)1088 bool OkToAdjustLoc(const CSeq_interval& interval, const CSeq_id* seqid)
1089 {
1090 bool rval = true;
1091 if (seqid) {
1092 if (!interval.IsSetId() || interval.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1093 rval = false;
1094 }
1095 }
1096 return rval;
1097 }
1098
1099
OkToAdjustLoc(const CSeq_point & pnt,const CSeq_id * seqid)1100 bool OkToAdjustLoc(const CSeq_point& pnt, const CSeq_id* seqid)
1101 {
1102 bool rval = true;
1103 if (seqid) {
1104 if (!pnt.IsSetId() || pnt.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1105 rval = false;
1106 }
1107 }
1108 return rval;
1109 }
1110
1111
OkToAdjustLoc(const CPacked_seqpnt & pack,const CSeq_id * seqid)1112 bool OkToAdjustLoc(const CPacked_seqpnt& pack, const CSeq_id* seqid)
1113 {
1114 bool rval = true;
1115 if (seqid) {
1116 if (!pack.IsSetId() || pack.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1117 rval = false;
1118 }
1119 }
1120 return rval;
1121 }
1122
1123
NormalizeLoc(CSeq_loc & loc)1124 void NormalizeLoc(CSeq_loc& loc)
1125 {
1126 switch (loc.Which()) {
1127 case CSeq_loc::e_Equiv:
1128 {{
1129 CSeq_loc::TEquiv::Tdata::iterator it = loc.SetEquiv().Set().begin();
1130 while (it != loc.SetEquiv().Set().end()) {
1131 NormalizeLoc(**it);
1132 if (loc.Which() == CSeq_loc::e_not_set) {
1133 it = loc.SetEquiv().Set().erase(it);
1134 } else {
1135 ++it;
1136 }
1137 }
1138
1139 // if only one, make regular loc
1140 if (loc.GetEquiv().Get().size() == 1) {
1141 CRef<CSeq_loc> sub(new CSeq_loc());
1142 sub->Assign(*(loc.GetEquiv().Get().front()));
1143 loc.Assign(*sub);
1144 } else if (loc.GetEquiv().Get().size() == 0) {
1145 // no sub intervals, reset
1146 loc.Reset();
1147 }
1148 }}
1149 break;
1150 case CSeq_loc::e_Mix:
1151 {{
1152 CSeq_loc::TMix::Tdata::iterator it = loc.SetMix().Set().begin();
1153 while (it != loc.SetMix().Set().end()) {
1154 NormalizeLoc(**it);
1155 if (loc.Which() == CSeq_loc::e_not_set) {
1156 it = loc.SetMix().Set().erase(it);
1157 } else {
1158 ++it;
1159 }
1160 }
1161
1162 // if only one, make regular loc
1163 if (loc.GetMix().Get().size() == 1) {
1164 CRef<CSeq_loc> sub(new CSeq_loc());
1165 sub->Assign(*(loc.GetMix().Get().front()));
1166 loc.Assign(*sub);
1167 } else if (loc.GetMix().Get().size() == 0) {
1168 // no sub intervals, reset
1169 loc.Reset();
1170 }
1171 }}
1172 break;
1173 case CSeq_loc::e_Packed_int:
1174 if (loc.GetPacked_int().Get().size() == 0) {
1175 loc.Reset();
1176 } else if (loc.GetPacked_int().Get().size() == 1) {
1177 CRef<CSeq_interval> sub(new CSeq_interval());
1178 sub->Assign(*(loc.GetPacked_int().Get().front()));
1179 loc.SetInt().Assign(*sub);
1180 }
1181 break;
1182 case CSeq_loc::e_Packed_pnt:
1183 if (loc.GetPacked_pnt().GetPoints().size() == 0) {
1184 loc.Reset();
1185 } else if (loc.GetPacked_pnt().GetPoints().size() == 1) {
1186 CRef<CSeq_point> sub(new CSeq_point());
1187 if (loc.GetPacked_pnt().IsSetStrand()) {
1188 sub->SetStrand(loc.GetPacked_pnt().GetStrand());
1189 }
1190 if (loc.GetPacked_pnt().IsSetId()) {
1191 sub->SetId().Assign(loc.GetPacked_pnt().GetId());
1192 }
1193 if (loc.GetPacked_pnt().IsSetFuzz()) {
1194 sub->SetFuzz().Assign(loc.GetPacked_pnt().GetFuzz());
1195 }
1196 sub->SetPoint(loc.GetPacked_pnt().GetPoints()[0]);
1197 loc.SetPnt().Assign(*sub);
1198 }
1199 break;
1200 default:
1201 // do nothing
1202 break;
1203 }
1204 }
1205
1206
SeqLocAdjustForTrim(CSeq_interval & interval,TSeqPos cut_from,TSeqPos cut_to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1207 void SeqLocAdjustForTrim(CSeq_interval& interval,
1208 TSeqPos cut_from, TSeqPos cut_to,
1209 const CSeq_id* seqid,
1210 bool& bCompleteCut,
1211 TSeqPos& trim5,
1212 bool& bAdjusted)
1213 {
1214 if (!OkToAdjustLoc(interval, seqid)) {
1215 return;
1216 }
1217
1218 // These are required fields
1219 if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1220 {
1221 return;
1222 }
1223
1224 // Feature location
1225 TSeqPos feat_from = interval.GetFrom();
1226 TSeqPos feat_to = interval.GetTo();
1227
1228 // Size of the cut
1229 TSeqPos cut_size = cut_to - cut_from + 1;
1230
1231 // Case 1: feature is located completely before the cut
1232 if (feat_to < cut_from)
1233 {
1234 // Nothing needs to be done - cut does not affect feature
1235 return;
1236 }
1237
1238 // Case 2: feature is completely within the cut
1239 if (feat_from >= cut_from && feat_to <= cut_to)
1240 {
1241 // Feature should be deleted
1242 bCompleteCut = true;
1243 trim5 += feat_from - feat_to + 1;
1244 return;
1245 }
1246
1247 // Case 3: feature is completely past the cut
1248 if (feat_from > cut_to)
1249 {
1250 // Shift the feature by the cut_size
1251 feat_from -= cut_size;
1252 feat_to -= cut_size;
1253 interval.SetFrom(feat_from);
1254 interval.SetTo(feat_to);
1255 bAdjusted = true;
1256 return;
1257 }
1258
1259 /***************************************************************************
1260 * Cases below are partial overlapping cases
1261 ***************************************************************************/
1262 // Case 4: Cut is completely inside the feature
1263 // OR
1264 // Cut is to the "left" side of the feature (i.e., feat_from is
1265 // inside the cut)
1266 // OR
1267 // Cut is to the "right" side of the feature (i.e., feat_to is
1268 // inside the cut)
1269 if (feat_to > cut_to) {
1270 // Left side cut or cut is completely inside feature
1271 feat_to -= cut_size;
1272 }
1273 else {
1274 // Right side cut
1275 if (interval.IsSetStrand() && interval.GetStrand() == eNa_strand_minus) {
1276 TSeqPos diff = cut_from - 1 - feat_to;
1277 trim5 += diff;
1278 }
1279 feat_to = cut_from - 1;
1280 }
1281
1282 // Take care of the feat_from from the left side cut case
1283 if (feat_from >= cut_from) {
1284 if (!interval.IsSetStrand() || interval.GetStrand() != eNa_strand_minus) {
1285 TSeqPos diff = cut_to + 1 - feat_from;
1286 trim5 += diff;
1287 }
1288 feat_from = cut_to + 1;
1289 feat_from -= cut_size;
1290 }
1291
1292 interval.SetFrom(feat_from);
1293 interval.SetTo(feat_to);
1294 bAdjusted = true;
1295 }
1296
1297
SeqLocAdjustForTrim(CPacked_seqint & packint,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1298 void SeqLocAdjustForTrim(CPacked_seqint& packint,
1299 TSeqPos from, TSeqPos to,
1300 const CSeq_id* seqid,
1301 bool& bCompleteCut,
1302 TSeqPos& trim5,
1303 bool& bAdjusted)
1304 {
1305 if (packint.IsSet()) {
1306 bool from5 = true;
1307 // Process each interval in the list
1308 CPacked_seqint::Tdata::iterator it;
1309 for (it = packint.Set().begin();
1310 it != packint.Set().end(); )
1311 {
1312 bool bDeleted = false;
1313 TSeqPos this_trim = 0;
1314 SeqLocAdjustForTrim(**it, from, to, seqid,
1315 bDeleted, this_trim, bAdjusted);
1316
1317 if (from5) {
1318 trim5 += this_trim;
1319 }
1320 // Should interval be deleted from list?
1321 if (bDeleted) {
1322 it = packint.Set().erase(it);
1323 }
1324 else {
1325 from5 = false;
1326 ++it;
1327 }
1328 }
1329 if (packint.Get().empty()) {
1330 packint.Reset();
1331 }
1332 }
1333 if (!packint.IsSet()) {
1334 bCompleteCut = true;
1335 }
1336 }
1337
1338
SeqLocAdjustForTrim(CSeq_loc_mix & mix,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1339 void SeqLocAdjustForTrim(CSeq_loc_mix& mix,
1340 TSeqPos from, TSeqPos to,
1341 const CSeq_id* seqid,
1342 bool& bCompleteCut,
1343 TSeqPos& trim5,
1344 bool& bAdjusted)
1345 {
1346 if (mix.IsSet()) {
1347 bool from5 = true;
1348 // Process each seqloc in the list
1349 CSeq_loc_mix::Tdata::iterator it;
1350 for (it = mix.Set().begin();
1351 it != mix.Set().end(); )
1352 {
1353 bool bDeleted = false;
1354 TSeqPos this_trim = 0;
1355 SeqLocAdjustForTrim(**it, from, to, seqid, bDeleted, this_trim, bAdjusted);
1356
1357 if (from5) {
1358 trim5 += this_trim;
1359 }
1360 // Should seqloc be deleted from list?
1361 if (bDeleted) {
1362 it = mix.Set().erase(it);
1363 }
1364 else {
1365 from5 = false;
1366 ++it;
1367 }
1368 }
1369 }
1370 if (!mix.IsSet() || mix.Set().empty()) {
1371 bCompleteCut = true;
1372 }
1373 }
1374
1375
SeqLocAdjustForTrim(CSeq_point & pnt,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1376 void SeqLocAdjustForTrim(CSeq_point& pnt,
1377 TSeqPos from, TSeqPos to,
1378 const CSeq_id* seqid,
1379 bool& bCompleteCut,
1380 TSeqPos& trim5,
1381 bool& bAdjusted)
1382 {
1383 if (!OkToAdjustLoc(pnt, seqid)) {
1384 return;
1385 }
1386
1387 if (to < pnt.GetPoint()) {
1388 auto diff = to - from + 1;
1389 pnt.SetPoint(pnt.GetPoint() - diff);
1390 bAdjusted = true;
1391 } else if (from < pnt.GetPoint()) {
1392 bCompleteCut = true;
1393 trim5 += 1;
1394 }
1395 }
1396
1397
SeqLocAdjustForTrim(CPacked_seqpnt & pack,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1398 void SeqLocAdjustForTrim(CPacked_seqpnt& pack,
1399 TSeqPos from, TSeqPos to,
1400 const CSeq_id* seqid,
1401 bool& bCompleteCut, TSeqPos& trim5, bool& bAdjusted)
1402 {
1403 if (!OkToAdjustLoc(pack, seqid)) {
1404 return;
1405 }
1406
1407 if (pack.IsSetPoints()) {
1408 bool from5 = true;
1409 auto it = pack.SetPoints().begin();
1410 while (it != pack.SetPoints().end()) {
1411 if (to < *it) {
1412 auto diff = to - from + 1;
1413 *it -= diff;
1414 it++;
1415 bAdjusted = true;
1416 from5 = false;
1417 } else if (from < *it) {
1418 it = pack.SetPoints().erase(it);
1419 bAdjusted = true;
1420 if (from5) {
1421 trim5 += 1;
1422 }
1423 } else {
1424 it++;
1425 from5 = false;
1426 }
1427 }
1428 }
1429 if (pack.SetPoints().empty()) {
1430 bCompleteCut = true;
1431 }
1432 }
1433
1434
SeqLocAdjustForTrim(CSeq_bond & bond,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1435 void SeqLocAdjustForTrim(CSeq_bond& bond,
1436 TSeqPos from, TSeqPos to,
1437 const CSeq_id* seqid,
1438 bool& bCompleteCut,
1439 TSeqPos& trim5,
1440 bool& bAdjusted)
1441 {
1442 bool cutA = false, cutB = false;
1443 if (bond.IsSetA()) {
1444 SeqLocAdjustForTrim(bond.SetA(), from, to, seqid, cutA, trim5, bAdjusted);
1445 } else {
1446 cutA = true;
1447 }
1448
1449 if (bond.IsSetB()) {
1450 SeqLocAdjustForTrim(bond.SetB(), from, to, seqid, cutB, trim5, bAdjusted);
1451 } else {
1452 cutB = true;
1453 }
1454 if (cutA && cutB) {
1455 bCompleteCut = true;
1456 }
1457 }
1458
1459
SeqLocAdjustForTrim(CSeq_loc_equiv & equiv,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1460 void SeqLocAdjustForTrim(CSeq_loc_equiv& equiv,
1461 TSeqPos from, TSeqPos to,
1462 const CSeq_id* seqid,
1463 bool& bCompleteCut,
1464 TSeqPos& trim5,
1465 bool& bAdjusted)
1466 {
1467 TSeqPos max_trim5 = 0;
1468 CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1469 while (it != equiv.Set().end()) {
1470 bool cut = false;
1471 TSeqPos this_trim5 = 0;
1472 SeqLocAdjustForTrim(**it, from, to, seqid, cut, this_trim5, bAdjusted);
1473 if (this_trim5 > max_trim5) {
1474 max_trim5 = this_trim5;
1475 }
1476 if (cut) {
1477 it = equiv.Set().erase(it);
1478 } else {
1479 it++;
1480 }
1481 }
1482 if (equiv.Set().empty()) {
1483 bCompleteCut = true;
1484 }
1485 trim5 = max_trim5;
1486 }
1487
1488
SeqLocAdjustForTrim(CSeq_loc & loc,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1489 void SeqLocAdjustForTrim(CSeq_loc& loc,
1490 TSeqPos from, TSeqPos to,
1491 const CSeq_id* seqid,
1492 bool& bCompleteCut,
1493 TSeqPos& trim5, bool& bAdjusted)
1494 {
1495 // Given a seqloc and a range, cut the seqloc
1496
1497 switch(loc.Which())
1498 {
1499 // Single interval
1500 case CSeq_loc::e_Int:
1501 SeqLocAdjustForTrim(loc.SetInt(), from, to, seqid,
1502 bCompleteCut, trim5, bAdjusted);
1503 break;
1504
1505 // Multiple intervals
1506 case CSeq_loc::e_Packed_int:
1507 SeqLocAdjustForTrim(loc.SetPacked_int(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1508 break;
1509
1510 // Multiple seqlocs
1511 case CSeq_loc::e_Mix:
1512 SeqLocAdjustForTrim(loc.SetMix(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1513 break;
1514 case CSeq_loc::e_Pnt:
1515 SeqLocAdjustForTrim(loc.SetPnt(), from, to , seqid, bCompleteCut, trim5, bAdjusted);
1516 break;
1517 case CSeq_loc::e_Packed_pnt:
1518 SeqLocAdjustForTrim(loc.SetPacked_pnt(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1519 break;
1520 case CSeq_loc::e_Bond:
1521 SeqLocAdjustForTrim(loc.SetBond(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1522 break;
1523 case CSeq_loc::e_Equiv:
1524 SeqLocAdjustForTrim(loc.SetEquiv(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1525 break;
1526 case CSeq_loc::e_Empty:
1527 case CSeq_loc::e_Null:
1528 case CSeq_loc::e_not_set:
1529 case CSeq_loc::e_Whole:
1530 case CSeq_loc::e_Feat:
1531 // no adjustment needeed
1532 break;
1533 }
1534 if (!bCompleteCut) {
1535 NormalizeLoc(loc);
1536 }
1537 }
1538
1539
SeqLocAdjustForInsert(CSeq_interval & interval,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1540 void SeqLocAdjustForInsert(CSeq_interval& interval,
1541 TSeqPos insert_from, TSeqPos insert_to,
1542 const CSeq_id* seqid)
1543 {
1544 if (!OkToAdjustLoc(interval, seqid)) {
1545 return;
1546 }
1547
1548 // These are required fields
1549 if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1550 {
1551 return;
1552 }
1553
1554 // Feature location
1555 TSeqPos feat_from = interval.GetFrom();
1556 TSeqPos feat_to = interval.GetTo();
1557
1558 // Size of the insert
1559 TSeqPos insert_size = insert_to - insert_from + 1;
1560
1561 // Case 1: feature is located before the insert
1562 if (feat_to < insert_from)
1563 {
1564 // Nothing needs to be done - cut does not affect feature
1565 return;
1566 }
1567
1568 // Case 2: feature is located after the insert
1569 if (feat_from > insert_from) {
1570 feat_from += insert_size;
1571 feat_to += insert_size;
1572 interval.SetFrom(feat_from);
1573 interval.SetTo(feat_to);
1574 return;
1575 }
1576
1577 // Case 3: insert occurs within interval
1578 if (feat_from <= insert_from && feat_to >= insert_from)
1579 {
1580 feat_to += insert_size;
1581 interval.SetTo(feat_to);
1582 return;
1583 }
1584 }
1585
1586
SeqLocAdjustForInsert(CPacked_seqint & packint,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1587 void SeqLocAdjustForInsert(CPacked_seqint& packint,
1588 TSeqPos insert_from, TSeqPos insert_to,
1589 const CSeq_id* seqid)
1590 {
1591 if (packint.IsSet()) {
1592 // Process each interval in the list
1593 CPacked_seqint::Tdata::iterator it;
1594 for (it = packint.Set().begin();
1595 it != packint.Set().end(); it++)
1596 {
1597 SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1598 }
1599 }
1600 }
1601
1602
SeqLocAdjustForInsert(CSeq_loc_mix & mix,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1603 void SeqLocAdjustForInsert(CSeq_loc_mix& mix,
1604 TSeqPos insert_from, TSeqPos insert_to,
1605 const CSeq_id* seqid)
1606 {
1607 if (mix.IsSet()) {
1608 // Process each seqloc in the list
1609 CSeq_loc_mix::Tdata::iterator it;
1610 for (it = mix.Set().begin();
1611 it != mix.Set().end(); it++)
1612 {
1613 SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1614 }
1615 }
1616 }
1617
1618
SeqLocAdjustForInsert(CSeq_point & pnt,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1619 void SeqLocAdjustForInsert(CSeq_point& pnt,
1620 TSeqPos insert_from, TSeqPos insert_to,
1621 const CSeq_id* seqid)
1622 {
1623 if (!OkToAdjustLoc(pnt, seqid)) {
1624 return;
1625 }
1626 if (!pnt.IsSetPoint()) {
1627 return;
1628 }
1629
1630 if (insert_from < pnt.GetPoint()) {
1631 auto diff = insert_to - insert_from + 1;
1632 pnt.SetPoint(pnt.GetPoint() + diff);
1633 }
1634 }
1635
1636
SeqLocAdjustForInsert(CPacked_seqpnt & packpnt,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1637 void SeqLocAdjustForInsert(CPacked_seqpnt& packpnt,
1638 TSeqPos from, TSeqPos to,
1639 const CSeq_id* seqid)
1640 {
1641 if (!OkToAdjustLoc(packpnt, seqid)) {
1642 return;
1643 }
1644
1645 auto it = packpnt.SetPoints().begin();
1646 while (it != packpnt.SetPoints().end()) {
1647 if (from < *it) {
1648 auto diff = to - from + 1;
1649 *it += diff;
1650 }
1651 it++;
1652 }
1653 }
1654
1655
SeqLocAdjustForInsert(CSeq_bond & bond,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1656 void SeqLocAdjustForInsert(CSeq_bond& bond,
1657 TSeqPos from, TSeqPos to,
1658 const CSeq_id* seqid)
1659 {
1660 if (bond.IsSetA()) {
1661 SeqLocAdjustForInsert(bond.SetA(), from, to, seqid);
1662 }
1663
1664 if (bond.IsSetB()) {
1665 SeqLocAdjustForInsert(bond.SetB(), from, to, seqid);
1666 }
1667 }
1668
1669
SeqLocAdjustForInsert(CSeq_loc_equiv & equiv,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1670 void SeqLocAdjustForInsert(CSeq_loc_equiv& equiv,
1671 TSeqPos from, TSeqPos to,
1672 const CSeq_id* seqid)
1673 {
1674 CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1675 while (it != equiv.Set().end()) {
1676 SeqLocAdjustForInsert(**it, from, to, seqid);
1677 it++;
1678 }
1679 }
1680
1681
SeqLocAdjustForInsert(CSeq_loc & loc,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1682 void SeqLocAdjustForInsert(CSeq_loc& loc,
1683 TSeqPos from, TSeqPos to,
1684 const CSeq_id* seqid)
1685 {
1686 // Given a seqloc and a range, insert into the seqloc
1687
1688 switch(loc.Which())
1689 {
1690 // Single interval
1691 case CSeq_loc::e_Int:
1692 SeqLocAdjustForInsert(loc.SetInt(), from, to, seqid);
1693 break;
1694
1695 // Multiple intervals
1696 case CSeq_loc::e_Packed_int:
1697 SeqLocAdjustForInsert(loc.SetPacked_int(), from, to, seqid);
1698 break;
1699
1700 // Multiple seqlocs
1701 case CSeq_loc::e_Mix:
1702 SeqLocAdjustForInsert(loc.SetMix(), from, to, seqid);
1703 break;
1704 case CSeq_loc::e_Pnt:
1705 SeqLocAdjustForInsert(loc.SetPnt(), from, to, seqid);
1706 break;
1707
1708 case CSeq_loc::e_Packed_pnt:
1709 SeqLocAdjustForInsert(loc.SetPacked_pnt(), from, to, seqid);
1710 break;
1711 case CSeq_loc::e_Bond:
1712 SeqLocAdjustForInsert(loc.SetBond(), from, to, seqid);
1713 break;
1714 case CSeq_loc::e_Equiv:
1715 SeqLocAdjustForInsert(loc.SetEquiv(), from, to, seqid);
1716 break;
1717 case CSeq_loc::e_Empty:
1718 case CSeq_loc::e_Null:
1719 case CSeq_loc::e_not_set:
1720 case CSeq_loc::e_Whole:
1721 case CSeq_loc::e_Feat:
1722 // no adjustment needeed
1723 break;
1724 }
1725 }
1726
1727
SplitLocationForGap(CSeq_interval & before,TSeqPos start,TSeqPos stop,const CSeq_id * seqid,bool & cut,unsigned int options)1728 CRef<CSeq_interval> SplitLocationForGap(CSeq_interval& before,
1729 TSeqPos start, TSeqPos stop,
1730 const CSeq_id* seqid, bool& cut,
1731 unsigned int options)
1732 {
1733 cut = false;
1734 if (!OkToAdjustLoc(before, seqid)) {
1735 return CRef<CSeq_interval>(NULL);
1736 }
1737 // These are required fields
1738 if (!(before.CanGetFrom() && before.CanGetTo()))
1739 {
1740 return CRef<CSeq_interval>(NULL);
1741 }
1742
1743 // Feature location
1744 TSeqPos feat_from = before.GetFrom();
1745 TSeqPos feat_to = before.GetTo();
1746 CRef<CSeq_interval> after(NULL);
1747 if (feat_to < start) {
1748 // gap completely after location
1749 return after;
1750 }
1751
1752 if (feat_from > stop && !(options & eSplitLocOption_split_in_intron)) {
1753 // if gap completely before location, but not splitting in introns,
1754 // no change
1755 return after;
1756 }
1757
1758 if (feat_from < start && feat_to > stop) {
1759 // gap entirely in inteval
1760 if (!(options & eSplitLocOption_split_in_exon)) {
1761 return after;
1762 }
1763 }
1764
1765 if (feat_to > stop) {
1766 after.Reset(new CSeq_interval());
1767 after->Assign(before);
1768 if (stop + 1 > feat_from) {
1769 after->SetFrom(stop + 1);
1770 if (options & eSplitLocOption_make_partial) {
1771 after->SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
1772 }
1773 }
1774 }
1775 if (feat_from < start) {
1776 before.SetTo(start - 1);
1777 if (options & eSplitLocOption_make_partial) {
1778 before.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
1779 }
1780 } else {
1781 cut = true;
1782 }
1783 return after;
1784 }
1785
1786
SplitLocationForGap(CSeq_loc::TPacked_int & before_intervals,CSeq_loc::TPacked_int & after_intervals,TSeqPos start,TSeqPos stop,const CSeq_id * seqid,unsigned int options)1787 void SplitLocationForGap(CSeq_loc::TPacked_int& before_intervals,
1788 CSeq_loc::TPacked_int& after_intervals,
1789 TSeqPos start, TSeqPos stop,
1790 const CSeq_id* seqid, unsigned int options)
1791 {
1792 if (before_intervals.IsSet()) {
1793 if (before_intervals.IsReverseStrand()) {
1794 reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1795 }
1796
1797 auto it = before_intervals.Set().begin();
1798 while (it != before_intervals.Set().end()) {
1799 CSeq_interval& sub_interval = **it;
1800
1801 TSeqPos int_from = sub_interval.GetFrom();
1802 if (int_from > stop && after_intervals.IsSet() && !after_intervals.Get().empty()) {
1803 after_intervals.Set().push_back(Ref(&sub_interval));
1804 it = before_intervals.Set().erase(it);
1805 }
1806 else {
1807 bool cut = false;
1808 CRef<CSeq_interval> after = SplitLocationForGap(sub_interval, start, stop, seqid, cut, options);
1809
1810 // Should interval be deleted from list?
1811 if (cut) {
1812 it = before_intervals.Set().erase(it);
1813 }
1814 else {
1815 ++it;
1816 }
1817 if (after) {
1818 after_intervals.Set().push_back(after);
1819 }
1820 }
1821 }
1822
1823 if (after_intervals.IsReverseStrand()) {
1824 reverse(after_intervals.Set().begin(), after_intervals.Set().end());
1825 }
1826 if (before_intervals.IsReverseStrand()) {
1827 reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1828 }
1829 }
1830 }
1831
1832
SplitLocationForGap(CSeq_loc & loc1,CSeq_loc & loc2,size_t start,size_t stop,const CSeq_id * seqid,unsigned int options)1833 void SplitLocationForGap(CSeq_loc& loc1, CSeq_loc& loc2,
1834 size_t start, size_t stop,
1835 const CSeq_id* seqid, unsigned int options)
1836 {
1837 // Given a seqloc and a range, place the portion of the location before the range
1838 // into loc1 and the remainder of the location into loc2
1839
1840 switch(loc1.Which())
1841 {
1842 // Single interval
1843 case CSeq_loc::e_Int:
1844 {{
1845 bool cut = false;
1846 CRef<CSeq_interval> after = SplitLocationForGap(loc1.SetInt(),
1847 static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1848 seqid, cut, options);
1849 if (cut) {
1850 loc1.Reset();
1851 }
1852 if (after) {
1853 if (loc2.Which() == CSeq_loc::e_not_set) {
1854 loc2.SetInt(*after);
1855 } else {
1856 CRef<CSeq_loc> add(new CSeq_loc());
1857 add->SetInt(*after);
1858 loc2.Add(*add);
1859 }
1860 }
1861 }}
1862 break;
1863 // Single point
1864 case CSeq_loc::e_Pnt:
1865 if (OkToAdjustLoc(loc1.GetPnt(), seqid)) {
1866 if (stop < loc1.GetPnt().GetPoint()) {
1867 if (loc2.Which() == CSeq_loc::e_not_set) {
1868 loc2.SetPnt().Assign(loc1.GetPnt());
1869 } else {
1870 loc2.Add(loc1);
1871 }
1872 loc1.Reset();
1873 }
1874 }
1875 break;
1876
1877 // Multiple intervals
1878 case CSeq_loc::e_Packed_int:
1879 {{
1880 CSeq_loc::TPacked_int& before_intervals = loc1.SetPacked_int();
1881 CRef<CSeq_loc::TPacked_int> after_intervals(new CSeq_loc::TPacked_int);
1882 SplitLocationForGap(before_intervals, *after_intervals,
1883 static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1884 seqid, options);
1885
1886 if (before_intervals.Set().empty()) {
1887 loc1.Reset();
1888 }
1889 if (!after_intervals->Set().empty()) {
1890 if (loc2.Which() == CSeq_loc::e_not_set) {
1891 loc2.SetPacked_int().Assign(*after_intervals);
1892 } else {
1893 CRef<CSeq_loc> add(new CSeq_loc());
1894 add->SetPacked_int().Assign(*after_intervals);
1895 loc2.Add(*add);
1896 }
1897 }
1898 }}
1899 break;
1900
1901 // Multiple seqlocs
1902 case CSeq_loc::e_Mix:
1903 {{
1904 CSeq_loc_mix& before_mix = loc1.SetMix();
1905 CRef<CSeq_loc_mix> after_mix(new CSeq_loc_mix);
1906 if (before_mix.IsSet()) {
1907 if (before_mix.IsReverseStrand()) {
1908 reverse(before_mix.Set().begin(), before_mix.Set().end());
1909 }
1910 auto it = before_mix.Set().begin();
1911 while (it != before_mix.Set().end()) {
1912 CSeq_loc& sub_loc = **it;
1913
1914 TSeqPos from = sub_loc.GetStart(eExtreme_Positional);
1915 if (from > stop && after_mix->IsSet() && !after_mix->Get().empty()) {
1916 after_mix->Set().push_back(Ref(&sub_loc));
1917 it = before_mix.Set().erase(it);
1918 }
1919 else {
1920 CRef<CSeq_loc> after(new CSeq_loc());
1921 SplitLocationForGap(**it, *after, start, stop, seqid, options);
1922 // Should seqloc be deleted from list?
1923 if ((*it)->Which() == CSeq_loc::e_not_set) {
1924 it = before_mix.Set().erase(it);
1925 }
1926 else {
1927 ++it;
1928 }
1929 if (after->Which() != CSeq_loc::e_not_set) {
1930 after_mix->Set().push_back(after);
1931 }
1932 }
1933 }
1934
1935 if (after_mix->IsReverseStrand()) {
1936 reverse(after_mix->Set().begin(), after_mix->Set().end());
1937 }
1938 if (before_mix.IsReverseStrand()) {
1939 reverse(before_mix.Set().begin(), before_mix.Set().end());
1940 }
1941
1942 // Update the original list
1943 if (before_mix.Set().empty()) {
1944 loc1.Reset();
1945 }
1946 if (!after_mix->Set().empty()) {
1947 if (loc2.Which() == CSeq_loc::e_not_set) {
1948 loc2.SetMix().Assign(*after_mix);
1949 }
1950 else {
1951 CRef<CSeq_loc> add(new CSeq_loc());
1952 add->SetMix().Assign(*after_mix);
1953 loc2.Add(*add);
1954 }
1955 }
1956 }
1957 }}
1958 break;
1959 case CSeq_loc::e_Equiv:
1960 {{
1961 CSeq_loc_equiv& before_equiv = loc1.SetEquiv();
1962 CRef<CSeq_loc_equiv> after_equiv(new CSeq_loc_equiv);
1963 if (before_equiv.IsSet()) {
1964 // Process each seqloc in the list
1965 CSeq_loc_equiv::Tdata::iterator it;
1966 for (it = before_equiv.Set().begin();
1967 it != before_equiv.Set().end(); )
1968 {
1969 CRef<CSeq_loc> after(new CSeq_loc());
1970 SplitLocationForGap(**it, *after, start, stop, seqid, options);
1971 // Should seqloc be deleted from list?
1972 if ((*it)->Which() == CSeq_loc::e_not_set) {
1973 it = before_equiv.Set().erase(it);
1974 }
1975 else {
1976 ++it;
1977 }
1978 if (after->Which() != CSeq_loc::e_not_set) {
1979 after_equiv->Set().push_back(after);
1980 }
1981 }
1982
1983 // Update the original list
1984 if (before_equiv.Set().empty()) {
1985 loc1.Reset();
1986 }
1987 if (!after_equiv->Set().empty()) {
1988 if (loc2.Which() == CSeq_loc::e_not_set) {
1989 loc2.SetMix().Assign(*after_equiv);
1990 } else {
1991 CRef<CSeq_loc> add(new CSeq_loc());
1992 add->SetMix().Assign(*after_equiv);
1993 loc2.Add(*add);
1994 }
1995 }
1996 }
1997 }}
1998 break;
1999 case CSeq_loc::e_Packed_pnt:
2000 if (OkToAdjustLoc(loc1.GetPacked_pnt(), seqid)) {
2001 CPacked_seqpnt::TPoints& before_points = loc1.SetPacked_pnt().SetPoints();
2002 CPacked_seqpnt::TPoints after_points;
2003 CPacked_seqpnt::TPoints::iterator it = loc1.SetPacked_pnt().SetPoints().begin();
2004 while (it != loc1.SetPacked_pnt().SetPoints().end()) {
2005 if (stop < *it) {
2006 after_points.push_back(*it);
2007 }
2008 if (start >= *it) {
2009 it = before_points.erase(it);
2010 } else {
2011 it++;
2012 }
2013 }
2014 if (!after_points.empty()) {
2015 CRef<CPacked_seqpnt> after(new CPacked_seqpnt());
2016 after->Assign(loc1.GetPacked_pnt());
2017 after->SetPoints().assign(after_points.begin(), after_points.end());
2018 if (loc2.Which() == CSeq_loc::e_not_set) {
2019 loc2.SetPacked_pnt().Assign(*after);
2020 } else {
2021 CRef<CSeq_loc> add(new CSeq_loc());
2022 add->SetPacked_pnt().Assign(*after);
2023 loc2.Add(*add);
2024 }
2025 }
2026 if (before_points.empty()) {
2027 loc1.Reset();
2028 }
2029 }
2030 break;
2031 case CSeq_loc::e_Empty:
2032 case CSeq_loc::e_Null:
2033 case CSeq_loc::e_not_set:
2034 case CSeq_loc::e_Whole:
2035 case CSeq_loc::e_Feat:
2036 case CSeq_loc::e_Bond:
2037 // no adjustment needeed
2038 break;
2039 }
2040 NormalizeLoc(loc1);
2041 NormalizeLoc(loc2);
2042 }
2043
2044
CdregionAdjustForTrim(CCdregion & cdr,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2045 void CdregionAdjustForTrim(CCdregion& cdr,
2046 TSeqPos from, TSeqPos to,
2047 const CSeq_id* seqid)
2048 {
2049 CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2050 while (it != cdr.SetCode_break().end()) {
2051 if ((*it)->IsSetLoc()) {
2052 bool cut = false;
2053 bool adjusted = false;
2054 TSeqPos trim5 = 0;
2055 SeqLocAdjustForTrim((*it)->SetLoc(), from, to, seqid, cut, trim5, adjusted);
2056 if (cut) {
2057 it = cdr.SetCode_break().erase(it);
2058 } else {
2059 it++;
2060 }
2061 } else {
2062 it++;
2063 }
2064 }
2065 if (cdr.GetCode_break().empty()) {
2066 cdr.ResetCode_break();
2067 }
2068 }
2069
2070
TrnaAdjustForTrim(CTrna_ext & trna,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2071 void TrnaAdjustForTrim(CTrna_ext& trna,
2072 TSeqPos from, TSeqPos to,
2073 const CSeq_id* seqid)
2074 {
2075 if (trna.IsSetAnticodon()) {
2076 bool cut = false;
2077 bool trimmed = false;
2078 TSeqPos trim5 = 0;
2079 SeqLocAdjustForTrim(trna.SetAnticodon(), from, to, seqid, cut, trim5, trimmed);
2080 if (cut) {
2081 trna.ResetAnticodon();
2082 }
2083 }
2084 }
2085
2086
FeatureAdjustForTrim(CSeq_feat & feat,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,bool & bTrimmed)2087 void FeatureAdjustForTrim(CSeq_feat& feat,
2088 TSeqPos from, TSeqPos to,
2089 const CSeq_id* seqid,
2090 bool& bCompleteCut,
2091 bool& bTrimmed)
2092 {
2093 TSeqPos trim5 = 0;
2094 SeqLocAdjustForTrim (feat.SetLocation(), from, to, seqid, bCompleteCut, trim5, bTrimmed);
2095 if (bCompleteCut) {
2096 return;
2097 }
2098
2099 if (feat.IsSetData()) {
2100 switch (feat.GetData().GetSubtype()) {
2101 case CSeqFeatData::eSubtype_cdregion:
2102 CdregionAdjustForTrim(feat.SetData().SetCdregion(), from, to, seqid);
2103 break;
2104 case CSeqFeatData::eSubtype_tRNA:
2105 TrnaAdjustForTrim(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2106 break;
2107 default:
2108 break;
2109 }
2110 }
2111 }
2112
2113
CdregionAdjustForInsert(CCdregion & cdr,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2114 void CdregionAdjustForInsert(CCdregion& cdr,
2115 TSeqPos from, TSeqPos to,
2116 const CSeq_id* seqid)
2117 {
2118 CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2119 while (it != cdr.SetCode_break().end()) {
2120 if ((*it)->IsSetLoc()) {
2121 SeqLocAdjustForInsert((*it)->SetLoc(), from, to, seqid);
2122 }
2123 it++;
2124 }
2125 if (cdr.GetCode_break().empty()) {
2126 cdr.ResetCode_break();
2127 }
2128 }
2129
2130
TrnaAdjustForInsert(CTrna_ext & trna,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2131 void TrnaAdjustForInsert(CTrna_ext& trna,
2132 TSeqPos from, TSeqPos to,
2133 const CSeq_id* seqid)
2134 {
2135 if (trna.IsSetAnticodon()) {
2136 SeqLocAdjustForInsert(trna.SetAnticodon(), from, to, seqid);
2137 }
2138 }
2139
2140
FeatureAdjustForInsert(CSeq_feat & feat,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2141 void FeatureAdjustForInsert(CSeq_feat& feat,
2142 TSeqPos from, TSeqPos to,
2143 const CSeq_id* seqid)
2144 {
2145 SeqLocAdjustForInsert (feat.SetLocation(), from, to, seqid);
2146
2147 if (feat.IsSetData()) {
2148 switch (feat.GetData().GetSubtype()) {
2149 case CSeqFeatData::eSubtype_cdregion:
2150 CdregionAdjustForInsert(feat.SetData().SetCdregion(), from, to, seqid);
2151 break;
2152 case CSeqFeatData::eSubtype_tRNA:
2153 TrnaAdjustForInsert(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2154 break;
2155 default:
2156 break;
2157 }
2158 }
2159 }
2160
2161
s_PPntComparePlus(const TSeqPos & p1,const TSeqPos & p2)2162 bool s_PPntComparePlus(const TSeqPos& p1, const TSeqPos& p2)
2163 {
2164 return (p1 < p2);
2165 }
2166
2167
s_PPntCompareMinus(const TSeqPos & p1,const TSeqPos & p2)2168 bool s_PPntCompareMinus(const TSeqPos& p1, const TSeqPos& p2)
2169 {
2170 return (p1 > p2);
2171 }
2172
2173
CorrectIntervalOrder(CPacked_seqpnt & ppnt)2174 bool CorrectIntervalOrder(CPacked_seqpnt& ppnt)
2175 {
2176 bool rval = false;
2177 if (!ppnt.IsSetPoints()) {
2178 // nothing to do
2179 } else if (!ppnt.IsSetStrand() || ppnt.GetStrand() == eNa_strand_plus || ppnt.GetStrand() == eNa_strand_unknown) {
2180 if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntComparePlus)) {
2181 stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntComparePlus);
2182 rval = true;
2183 }
2184 } else if (ppnt.IsSetStrand() && ppnt.GetStrand() == eNa_strand_minus) {
2185 if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntCompareMinus)) {
2186 stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntCompareMinus);
2187 rval = true;
2188 }
2189 }
2190 return rval;
2191 }
2192
2193
s_StrandsConsistent(const CSeq_interval & a,const CSeq_interval & b)2194 bool s_StrandsConsistent(const CSeq_interval& a, const CSeq_interval& b)
2195 {
2196 if (a.IsSetStrand() && a.GetStrand() == eNa_strand_minus) {
2197 if (!b.IsSetStrand() || b.GetStrand() != eNa_strand_minus) {
2198 return false;
2199 } else {
2200 return true;
2201 }
2202 } else if (b.IsSetStrand() && b.GetStrand() == eNa_strand_minus) {
2203 return false;
2204 } else {
2205 return true;
2206 }
2207 }
2208
2209
CorrectIntervalOrder(CPacked_seqint & pint)2210 bool CorrectIntervalOrder(CPacked_seqint& pint)
2211 {
2212 if (pint.Get().size() < 2) {
2213 return false;
2214 }
2215 bool any_change = false;
2216
2217 bool this_change = true;
2218 while (this_change) {
2219 this_change = false;
2220
2221 // can only swap elements if they have the same strand and Seq-id
2222 CPacked_seqint::Tdata::iterator a = pint.Set().begin();
2223 CPacked_seqint::Tdata::iterator b = a;
2224 b++;
2225 while (b != pint.Set().end()) {
2226 if ((*a)->IsSetId() && (*b)->IsSetId() &&
2227 (*a)->GetId().Equals((*b)->GetId()) &&
2228 (*a)->IsSetFrom() && (*a)->IsSetTo() && (*a)->GetFrom() < (*a)->GetTo() &&
2229 (*b)->IsSetFrom() && (*b)->IsSetTo() && (*b)->GetFrom() < (*b)->GetTo() &&
2230 s_StrandsConsistent(**a, **b)) {
2231 if ((*a)->IsSetStrand() && (*a)->GetStrand() == eNa_strand_minus) {
2232 if ((*b)->GetTo() > (*a)->GetFrom()) {
2233 CRef<CSeq_interval> swp(a->GetPointer());
2234 a->Reset(b->GetPointer());
2235 b->Reset(swp.GetPointer());
2236 this_change = true;
2237 any_change = true;
2238 }
2239 } else {
2240 if ((*b)->GetTo() < (*a)->GetFrom()) {
2241 CRef<CSeq_interval> swp(a->GetPointer());
2242 a->Reset(b->GetPointer());
2243 b->Reset(swp.GetPointer());
2244 this_change = true;
2245 any_change = true;
2246 }
2247 }
2248 }
2249 ++a;
2250 ++b;
2251 }
2252 }
2253 return any_change;
2254 }
2255
2256
OneIdOneStrand(const CSeq_loc & loc,const CSeq_id ** id,ENa_strand & strand)2257 bool OneIdOneStrand(const CSeq_loc& loc, const CSeq_id** id, ENa_strand& strand)
2258 {
2259 try {
2260 CSeq_loc_CI li(loc);
2261 *id = &(li.GetSeq_id());
2262 strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2263 if (strand == eNa_strand_unknown) {
2264 strand = eNa_strand_plus;
2265 }
2266 if (strand != eNa_strand_plus && strand != eNa_strand_minus) {
2267 return false;
2268 }
2269 ++li;
2270 while (li) {
2271 if (!li.GetSeq_id().Equals(**id)) {
2272 return false;
2273 }
2274 ENa_strand this_strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2275 if (this_strand == eNa_strand_unknown) {
2276 this_strand = eNa_strand_plus;
2277 }
2278 if (this_strand != strand) {
2279 return false;
2280 }
2281 ++li;
2282 }
2283 return true;
2284 } catch (CException&) {
2285 return false;
2286 }
2287 }
2288
2289
CorrectIntervalOrder(CSeq_loc::TMix::Tdata & mix)2290 bool CorrectIntervalOrder(CSeq_loc::TMix::Tdata& mix)
2291 {
2292 bool any_change = false;
2293 NON_CONST_ITERATE(CSeq_loc::TMix::Tdata, it, mix) {
2294 any_change |= CorrectIntervalOrder(**it);
2295 }
2296 if (mix.size() < 2) {
2297 return any_change;
2298 }
2299 bool this_change = true;
2300 while (this_change) {
2301 this_change = false;
2302
2303 // can only swap elements if they have the same strand and Seq-id
2304 CSeq_loc::TMix::Tdata::iterator a = mix.begin();
2305 CSeq_loc::TMix::Tdata::iterator b = a;
2306 b++;
2307 while (b != mix.end()) {
2308 try {
2309 const CSeq_id* a_id;
2310 const CSeq_id* b_id;
2311 ENa_strand a_strand;
2312 ENa_strand b_strand;
2313 if (OneIdOneStrand(**a, &a_id, a_strand) &&
2314 OneIdOneStrand(**b, &b_id, b_strand) &&
2315 a_id->Equals(*b_id) &&
2316 a_strand == b_strand) {
2317 if (a_strand == eNa_strand_plus) {
2318 if ((*a)->GetStart(eExtreme_Biological) > (*b)->GetStop(eExtreme_Biological)) {
2319 CRef<CSeq_loc> swp(a->GetPointer());
2320 a->Reset(b->GetPointer());
2321 b->Reset(swp.GetPointer());
2322 this_change = true;
2323 any_change = true;
2324 }
2325 } else if (a_strand == eNa_strand_minus) {
2326 if ((*a)->GetStart(eExtreme_Biological) < (*b)->GetStop(eExtreme_Biological)) {
2327 CRef<CSeq_loc> swp(a->GetPointer());
2328 a->Reset(b->GetPointer());
2329 b->Reset(swp.GetPointer());
2330 this_change = true;
2331 any_change = true;
2332 }
2333 }
2334 }
2335 } catch (CException&) {
2336 // not just one id
2337 }
2338 ++a;
2339 ++b;
2340 }
2341 }
2342 return any_change;
2343 }
2344
2345
CorrectIntervalOrder(CSeq_loc & loc)2346 bool CorrectIntervalOrder(CSeq_loc& loc)
2347 {
2348 bool any_change = false;
2349 switch (loc.Which()) {
2350 case CSeq_loc::e_Bond:
2351 case CSeq_loc::e_Empty:
2352 case CSeq_loc::e_Equiv:
2353 case CSeq_loc::e_Feat:
2354 case CSeq_loc::e_Int:
2355 case CSeq_loc::e_not_set:
2356 case CSeq_loc::e_Null:
2357 case CSeq_loc::e_Pnt:
2358 case CSeq_loc::e_Whole:
2359 // nothing to do
2360 break;
2361 case CSeq_loc::e_Mix:
2362 any_change = CorrectIntervalOrder(loc.SetMix().Set());
2363 break;
2364 case CSeq_loc::e_Packed_int:
2365 any_change = CorrectIntervalOrder(loc.SetPacked_int());
2366 break;
2367 case CSeq_loc::e_Packed_pnt:
2368 any_change = CorrectIntervalOrder(loc.SetPacked_pnt());
2369 break;
2370 }
2371 return any_change;
2372 }
2373
2374
IsExtendableLeft(TSeqPos left,const CBioseq & seq,CScope * scope,TSeqPos & extend_len)2375 bool IsExtendableLeft(TSeqPos left, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2376 {
2377 bool rval = false;
2378 if (left < 3) {
2379 rval = true;
2380 extend_len = left;
2381 } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2382 seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2383 seq.GetInst().IsSetExt() &&
2384 seq.GetInst().GetExt().IsDelta()) {
2385 TSeqPos offset = 0;
2386 TSeqPos last_gap_stop = 0;
2387 ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2388 if ((*it)->IsLiteral()) {
2389 offset += (*it)->GetLiteral().GetLength();
2390 if (!(*it)->GetLiteral().IsSetSeq_data()) {
2391 last_gap_stop = offset;
2392 } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2393 last_gap_stop = offset;
2394 }
2395 } else if ((*it)->IsLoc()) {
2396 offset += sequence::GetLength((*it)->GetLoc(), scope);
2397 }
2398 if (offset > left) {
2399 break;
2400 }
2401 }
2402 if (left >= last_gap_stop && left - last_gap_stop <= 3) {
2403 rval = true;
2404 extend_len = left - last_gap_stop;
2405 }
2406 }
2407 return rval;
2408 }
2409
2410
IsExtendableRight(TSeqPos right,const CBioseq & seq,CScope * scope,TSeqPos & extend_len)2411 bool IsExtendableRight(TSeqPos right, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2412 {
2413 bool rval = false;
2414 if (right > seq.GetLength() - 5) {
2415 rval = true;
2416 extend_len = seq.GetLength() - right - 1;
2417 } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2418 seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2419 seq.GetInst().IsSetExt() &&
2420 seq.GetInst().GetExt().IsDelta()) {
2421 TSeqPos offset = 0;
2422 TSeqPos next_gap_start = 0;
2423 ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2424 if ((*it)->IsLiteral()) {
2425 if (!(*it)->GetLiteral().IsSetSeq_data()) {
2426 next_gap_start = offset;
2427 } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2428 next_gap_start = offset;
2429 }
2430 offset += (*it)->GetLiteral().GetLength();
2431 } else if ((*it)->IsLoc()) {
2432 offset += sequence::GetLength((*it)->GetLoc(), scope);
2433 }
2434 if (offset > right + 4) {
2435 break;
2436 }
2437 }
2438 if (next_gap_start > right && next_gap_start - right - 1 <= 3) {
2439 rval = true;
2440 extend_len = next_gap_start - right - 1;
2441 }
2442 }
2443 return rval;
2444 }
2445
2446
AdjustFeatureEnd5(CSeq_feat & cds,vector<CRef<CSeq_feat>> related_features,CScope & scope)2447 bool AdjustFeatureEnd5(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2448 {
2449 if (!cds.GetLocation().IsPartialStart(eExtreme_Biological)) {
2450 return false;
2451 }
2452
2453 bool rval = false;
2454
2455 CSeq_loc_CI first_l(cds.GetLocation());
2456 CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2457 CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2458
2459 TSeqPos start = cds.GetLocation().GetStart(eExtreme_Biological);
2460 TSeqPos new_start = start;
2461 TSeqPos extend_len = 0;
2462 bool extendable = false;
2463
2464 if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
2465 // positive strand
2466 if (start > 0) {
2467 extendable = IsExtendableLeft(start, *seq, &scope, extend_len);
2468 new_start = start - extend_len;
2469 }
2470 } else {
2471 if (start < seq->GetInst().GetLength() - 1) {
2472 extendable = IsExtendableRight(start, *seq, &scope, extend_len);
2473 new_start = start + extend_len;
2474 }
2475 }
2476 if (extendable) {
2477 CRef<CSeq_loc> cds_loc = SeqLocExtend5(cds.GetLocation(), new_start, &scope);
2478 if (cds_loc) {
2479 for (auto it : related_features) {
2480 if (it->GetLocation().GetStart(eExtreme_Biological) == start) {
2481 CRef<CSeq_loc> related_loc = SeqLocExtend5(it->GetLocation(), new_start, &scope);
2482 if (related_loc) {
2483 it->SetLocation().Assign(*related_loc);
2484 if (it->IsSetData() && it->GetData().IsCdregion()) {
2485 AdjustFrameFor5Extension(cds, extend_len);
2486 }
2487 }
2488 }
2489 }
2490 cds.SetLocation().Assign(*cds_loc);
2491 if (cds.IsSetData() && cds.GetData().IsCdregion()) {
2492 AdjustFrameFor5Extension(cds, extend_len);
2493 }
2494
2495 rval = true;
2496 }
2497 }
2498
2499 return rval;
2500 }
2501
2502
AdjustFeatureEnd3(CSeq_feat & cds,vector<CRef<CSeq_feat>> related_features,CScope & scope)2503 bool AdjustFeatureEnd3(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2504 {
2505 if (!cds.GetLocation().IsPartialStop(eExtreme_Biological)) {
2506 return false;
2507 }
2508
2509 bool rval = false;
2510
2511 CSeq_loc_CI last_l(cds.GetLocation());
2512 size_t num_intervals = last_l.GetSize();
2513 last_l.SetPos(num_intervals - 1);
2514
2515 CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2516 CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2517
2518 TSeqPos stop = cds.GetLocation().GetStop(eExtreme_Biological);
2519 TSeqPos new_stop = stop;
2520 TSeqPos extend_len = 0;
2521 bool extendable = false;
2522
2523 if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
2524 // positive strand
2525 if (stop < seq->GetInst().GetLength() - 1) {
2526 extendable = IsExtendableRight(stop, *seq, &scope, extend_len);
2527 new_stop = stop + extend_len;
2528 }
2529 } else {
2530 if (stop > 0) {
2531 extendable = IsExtendableLeft(stop, *seq, &scope, extend_len);
2532 new_stop = stop - extend_len;
2533 }
2534 }
2535 if (extendable) {
2536 CRef<CSeq_loc> cds_loc = SeqLocExtend3(cds.GetLocation(), new_stop, &scope);
2537 if (cds_loc) {
2538 for (auto it : related_features) {
2539 if (it->GetLocation().GetStop(eExtreme_Biological) == stop) {
2540 CRef<CSeq_loc> related_loc = SeqLocExtend3(it->GetLocation(), new_stop, &scope);
2541 if (related_loc) {
2542 it->SetLocation().Assign(*related_loc);
2543 }
2544 }
2545 }
2546 cds.SetLocation().Assign(*cds_loc);
2547 rval = true;
2548 }
2549 }
2550
2551 return rval;
2552 }
2553
2554
IsExtendable(const CSeq_feat & cds,CScope & scope)2555 bool IsExtendable(const CSeq_feat& cds, CScope& scope)
2556 {
2557 if (cds.GetLocation().IsPartialStart(eExtreme_Positional)) {
2558 CSeq_loc_CI first_l(cds.GetLocation());
2559 CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2560 CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2561 TSeqPos extend_len = 0;
2562 TSeqPos start = first_l.GetRange().GetFrom();
2563 if (IsExtendableLeft(start, *seq, &scope, extend_len) && extend_len > 0) {
2564 return true;
2565 }
2566 }
2567 if (cds.GetLocation().IsPartialStop(eExtreme_Positional)) {
2568 CSeq_loc_CI last_l(cds.GetLocation());
2569 size_t num_intervals = last_l.GetSize();
2570 last_l.SetPos(num_intervals - 1);
2571 CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2572 CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2573
2574 TSeqPos stop = cds.GetLocation().GetStop(eExtreme_Positional);
2575 TSeqPos extend_len = 0;
2576
2577 if (IsExtendableRight(stop, *seq, &scope, extend_len) && extend_len > 0) {
2578 return true;
2579 }
2580 }
2581 return false;
2582 }
2583
2584
ExtendPartialFeatureEnds(CBioseq_Handle bsh)2585 bool ExtendPartialFeatureEnds(CBioseq_Handle bsh)
2586 {
2587 bool any_change = false;
2588 CFeat_CI f(bsh);
2589 CRef<feature::CFeatTree> tr(new feature::CFeatTree(f));
2590 while (f) {
2591 if (f->GetData().IsCdregion()) {
2592 CMappedFeat gene = tr->GetBestGene(*f);
2593 CMappedFeat mRNA = tr->GetParent(*f, CSeqFeatData::eSubtype_mRNA);
2594 vector<CRef<CSeq_feat> > related_features;
2595 if (gene) {
2596 CRef<CSeq_feat> new_gene(new CSeq_feat());
2597 new_gene->Assign(*(gene.GetOriginalSeq_feat()));
2598 related_features.push_back(new_gene);
2599 }
2600 if (mRNA) {
2601 CRef<CSeq_feat> new_mRNA(new CSeq_feat());
2602 new_mRNA->Assign(*(mRNA.GetOriginalSeq_feat()));
2603 related_features.push_back(new_mRNA);
2604 }
2605 CRef<CSeq_feat> new_cds(new CSeq_feat());
2606 new_cds->Assign(*(f->GetOriginalSeq_feat()));
2607
2608 const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope());
2609 const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope());
2610
2611 if (adjusted_5prime || adjusted_3prime) {
2612 feature::RetranslateCDS(*new_cds, bsh.GetScope());
2613 CSeq_feat_EditHandle feh(*f);
2614 feh.Replace(*new_cds);
2615 if (gene) {
2616 CSeq_feat_EditHandle geh(gene);
2617 geh.Replace(*(related_features.front()));
2618 }
2619 if (mRNA) {
2620 CSeq_feat_EditHandle meh(mRNA);
2621 meh.Replace(*(related_features.back()));
2622 }
2623 any_change = true;
2624 }
2625 }
2626 ++f;
2627 }
2628 return any_change;
2629 }
2630
2631
2632 END_SCOPE(edit)
2633 END_SCOPE(objects)
2634 END_NCBI_SCOPE
2635
2636