1 /*  $Id: loc_edit.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 *   functions for editing and working with locations
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objtools/edit/loc_edit.hpp>
35 #include <objtools/edit/cds_fix.hpp>
36 #include <objects/seq/Seq_descr.hpp>
37 #include <objects/seq/Seqdesc.hpp>
38 #include <objects/seq/Delta_ext.hpp>
39 #include <objects/seq/Delta_seq.hpp>
40 #include <objects/seq/Seq_ext.hpp>
41 #include <objects/seq/Seq_inst.hpp>
42 #include <objects/seqfeat/Code_break.hpp>
43 #include <objects/seqfeat/Cdregion.hpp>
44 #include <objects/seqfeat/RNA_ref.hpp>
45 #include <objects/seqfeat/Trna_ext.hpp>
46 #include <objects/seqloc/Packed_seqint.hpp>
47 #include <objects/seqloc/Packed_seqpnt.hpp>
48 #include <objects/seqloc/Seq_bond.hpp>
49 #include <objects/seqloc/Seq_interval.hpp>
50 #include <objects/seqloc/Seq_loc_equiv.hpp>
51 #include <objects/seqloc/Seq_loc_mix.hpp>
52 #include <objects/seqloc/Seq_point.hpp>
53 #include <objects/general/Int_fuzz.hpp>
54 #include <objects/misc/sequence_util_macros.hpp>
55 #include <objmgr/bioseq_handle.hpp>
56 #include <util/sequtil/sequtil_convert.hpp>
57 #include <objmgr/util/sequence.hpp>
58 #include <objmgr/seq_vector.hpp>
59 
60 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)61 BEGIN_SCOPE(objects)
62 BEGIN_SCOPE(edit)
63 
64 string PrintBestSeqId(const CSeq_id& sid, CScope& scope)
65 {
66    string best_id(kEmptyStr);
67 
68    // Best seq_id
69    CSeq_id_Handle sid_hl = sequence::GetId(sid, scope, sequence::eGetId_Best);
70    if (sid_hl) {
71       CConstRef <CSeq_id> new_id = sid_hl.GetSeqId();
72       if (new_id) {
73         best_id = sid_hl.GetSeqId()->AsFastaString();
74       }
75    }
76    else best_id = sid.AsFastaString();
77 
78    return best_id;
79 };
80 
81 static const string strand_symbol[] = {"", "", "c","b", "r"};
PrintSeqIntUseBestID(const CSeq_interval & seq_int,CScope & scope,bool range_only)82 string PrintSeqIntUseBestID(const CSeq_interval& seq_int, CScope& scope, bool range_only)
83 {
84     string location(kEmptyStr);
85 
86     // Best seq_id
87     if (!range_only) {
88       location = PrintBestSeqId(seq_int.GetId(), scope) + ":";
89     }
90 
91     // strand
92     ENa_strand
93       this_strand
94          = (seq_int.CanGetStrand()) ? seq_int.GetStrand(): eNa_strand_unknown;
95     location += strand_symbol[(int)this_strand];
96     int from, to;
97     string lab_from(kEmptyStr), lab_to(kEmptyStr);
98     if (eNa_strand_minus == this_strand
99                                || eNa_strand_both_rev ==  this_strand) {
100            to = seq_int.GetFrom();
101            from = seq_int.GetTo();
102            if (seq_int.CanGetFuzz_to()) {
103                const CInt_fuzz& f_from = seq_int.GetFuzz_to();
104                f_from.GetLabel(&lab_from, from, false);
105            }
106            else lab_from = NStr::IntToString(++from);
107            if (seq_int.CanGetFuzz_from()) {
108                const CInt_fuzz& f_to = seq_int.GetFuzz_from();
109                f_to.GetLabel(&lab_to, to);
110            }
111            else lab_to = NStr::IntToString(++to);
112     }
113     else {
114            to = seq_int.GetTo();
115            from = seq_int.GetFrom();
116            if (seq_int.CanGetFuzz_from()) {
117                 const CInt_fuzz& f_from = seq_int.GetFuzz_from();
118                 f_from.GetLabel(&lab_from, from, false);
119            }
120            else lab_from = NStr::IntToString(++from);
121            if (seq_int.CanGetFuzz_to()) {
122                const CInt_fuzz& f_to = seq_int.GetFuzz_to();
123                f_to.GetLabel(&lab_to, to);
124            }
125            else lab_to = NStr::IntToString(++to);
126     }
127     location += lab_from + "-" + lab_to;
128     return location;
129 };
130 
PrintPntAndPntsUseBestID(const CSeq_loc & seq_loc,CScope & scope,bool range_only)131 string PrintPntAndPntsUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
132 {
133   string location(kEmptyStr);
134 
135    // Best seq_id
136   if (!range_only) {
137      if (seq_loc.IsPnt()) {
138        location = PrintBestSeqId(seq_loc.GetPnt().GetId(), scope) + ":";
139      }
140      else if (seq_loc.IsPacked_pnt()) {
141        location = PrintBestSeqId(seq_loc.GetPacked_pnt().GetId(), scope) + ":";
142      }
143   }
144 
145   if (!location.empty()) {
146      string strtmp;
147      seq_loc.GetLabel(&strtmp);
148      location += strtmp.substr(strtmp.find(":")+1);
149   }
150   return location;
151 }
152 
SeqLocPrintUseBestID(const CSeq_loc & seq_loc,CScope & scope,bool range_only)153 string SeqLocPrintUseBestID(const CSeq_loc& seq_loc, CScope& scope, bool range_only)
154 {
155   string location(kEmptyStr);
156   if (seq_loc.IsInt()) {
157      location = PrintSeqIntUseBestID(seq_loc.GetInt(), scope, range_only);
158   }
159   else if (seq_loc.IsMix() || seq_loc.IsEquiv()) {
160      location = "(";
161      const list <CRef <CSeq_loc> >* seq_loc_ls;
162      if (seq_loc.IsMix()) {
163         seq_loc_ls = &(seq_loc.GetMix().Get());
164      }
165      else {
166         seq_loc_ls = &(seq_loc.GetEquiv().Get());
167      }
168      ITERATE (list <CRef <CSeq_loc> >, it, *seq_loc_ls) {
169         if (it == seq_loc.GetMix().Get().begin()) {
170            location += SeqLocPrintUseBestID(**it, scope, range_only);
171         }
172         else location += SeqLocPrintUseBestID(**it, scope, true);
173         location += ", ";
174      }
175      if (!location.empty()) {
176         location = location.substr(0, location.size()-2);
177      }
178      location += ")";
179   }
180   else if (seq_loc.IsPacked_int()) {
181      location = "(";
182      ITERATE (list <CRef <CSeq_interval> >, it, seq_loc.GetPacked_int().Get()) {
183         if (it == seq_loc.GetPacked_int().Get().begin()) {
184            location += PrintSeqIntUseBestID(**it, scope, range_only);
185         }
186         else {
187             location += PrintSeqIntUseBestID(**it, scope, true);
188         }
189         location += ", ";
190      }
191      if (!location.empty()) {
192         location = location.substr(0, location.size()-2);
193      }
194      location += ")";
195   }
196   else if (seq_loc.IsPnt() || seq_loc.IsPacked_pnt()) {
197      location = PrintPntAndPntsUseBestID(seq_loc, scope, range_only);
198   }
199   else if (seq_loc.IsBond()) {
200      CSeq_loc tmp_loc;
201      tmp_loc.SetBond().Assign(seq_loc.GetBond().GetA());
202      location = PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
203      if (seq_loc.GetBond().CanGetB()) {
204        tmp_loc.SetBond().Assign(seq_loc.GetBond().GetB());
205        location
206           += "=" + PrintPntAndPntsUseBestID(tmp_loc, scope, range_only);
207      }
208   }
209   else {
210     seq_loc.GetLabel(&location);
211   }
212   return location;
213 }
214 
215 
Is5AtEndOfSeq(const CSeq_loc & loc,CBioseq_Handle bsh)216 bool CLocationEditPolicy::Is5AtEndOfSeq(const CSeq_loc& loc, CBioseq_Handle bsh)
217 {
218     bool rval = false;
219 
220     ENa_strand strand = loc.GetStrand();
221     if (strand == eNa_strand_minus) {
222         if (bsh && loc.GetStart(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
223             rval = true;
224         }
225     } else {
226         if (loc.GetStart(eExtreme_Biological) == 0) {
227             rval = true;
228         }
229     }
230     return rval;
231 }
232 
233 
Is3AtEndOfSeq(const CSeq_loc & loc,CBioseq_Handle bsh)234 bool CLocationEditPolicy::Is3AtEndOfSeq(const CSeq_loc& loc, CBioseq_Handle bsh)
235 {
236     bool rval = false;
237     ENa_strand strand = loc.GetStrand();
238 
239     if (strand == eNa_strand_minus) {
240         if (loc.GetStop(eExtreme_Biological) == 0) {
241             rval = true;
242         }
243     } else {
244         if (bsh && loc.GetStop(eExtreme_Biological) == bsh.GetInst_Length() - 1) {
245             rval = true;
246         }
247     }
248     return rval;
249 }
250 
251 
Is5AtEndOfSeq(const CSeq_loc & loc,CScope & scope,bool & confident)252 bool CLocationEditPolicy::Is5AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
253 {
254     bool rval = false;
255     confident = true;
256     CSeq_loc_CI first_l(loc);
257     if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
258         // positive strand, just need to know if it's at zero
259         rval = (first_l.GetRange().GetFrom() == 0);
260     } else {
261         // negative strand
262         try {
263             CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
264             rval = (first_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
265         } catch (CException&) {
266             confident = false;
267         }
268     }
269     return rval;
270 }
271 
272 
Is3AtEndOfSeq(const CSeq_loc & loc,CScope & scope,bool & confident)273 bool CLocationEditPolicy::Is3AtEndOfSeq(const CSeq_loc& loc, CScope& scope, bool& confident)
274 {
275     bool rval = false;
276     confident = true;
277     CSeq_loc_CI last_l(loc);
278     size_t num_intervals = last_l.GetSize();
279     last_l.SetPos(num_intervals - 1);
280     if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
281         // positive strand
282         try {
283             CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
284             if (bsh) {
285                 rval = (last_l.GetRange().GetTo() == bsh.GetBioseqLength() - 1);
286             } else {
287                 confident = false;
288             }
289         } catch (CException&) {
290             confident = false;
291         }
292     } else {
293         // negative strand, just need to know if it's at zero
294         rval = (last_l.GetRange().GetFrom() == 0);
295     }
296     return rval;
297 }
298 
299 
Interpret5Policy(const CSeq_feat & orig_feat,CScope & scope,bool & do_set_5_partial,bool & do_clear_5_partial) const300 bool CLocationEditPolicy::Interpret5Policy
301 (const CSeq_feat& orig_feat,
302  CScope& scope,
303  bool& do_set_5_partial,
304  bool& do_clear_5_partial) const
305 {
306     do_set_5_partial = false;
307     do_clear_5_partial = false;
308     const CSeq_loc& loc = orig_feat.GetLocation();
309 
310     switch (m_PartialPolicy5) {
311         case ePartialPolicy_eNoChange:
312             // do nothing
313             break;
314         case ePartialPolicy_eSet:
315             if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
316                 do_set_5_partial = true;
317             } else if (m_Extend5) {
318                 bool confident = false;
319                 bool at_5 = Is5AtEndOfSeq(loc, scope, confident);
320                 if (confident && !at_5) {
321                     do_set_5_partial = true;
322                 }
323             }
324             break;
325         case ePartialPolicy_eSetAtEnd:
326             if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
327                 bool confident = false;
328                 if (Is5AtEndOfSeq(loc, scope, confident) && confident) {
329                     do_set_5_partial = true;
330                 }
331             }
332             break;
333         case ePartialPolicy_eSetForBadEnd:
334             if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
335                 && orig_feat.GetData().IsCdregion()) {
336                 string transl_prot;
337                 bool confident = true;
338                 try {
339                     CSeqTranslator::Translate(orig_feat, scope, transl_prot,
340                                                 false,   // do not include stop codons
341                                                 false);  // do not remove trailing X/B/Z
342 
343                 } catch ( const CException& ) {
344                     confident = false;
345                 }
346                 if (confident && !NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
347                     do_set_5_partial = true;
348                 }
349             }
350             break;
351         case ePartialPolicy_eSetForFrame:
352             if (!orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
353                 && orig_feat.GetData().IsCdregion()
354                 && orig_feat.GetData().GetCdregion().IsSetFrame()
355                 && orig_feat.GetData().GetCdregion().GetFrame() != CCdregion::eFrame_not_set
356                 && orig_feat.GetData().GetCdregion().GetFrame() != CCdregion::eFrame_one) {
357                 do_set_5_partial = true;
358             }
359             break;
360         case ePartialPolicy_eClear:
361             if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
362                 do_clear_5_partial = true;
363             }
364             break;
365         case ePartialPolicy_eClearNotAtEnd:
366             if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)) {
367                 bool confident = false;
368                 if (!Is5AtEndOfSeq(loc, scope, confident) && confident) {
369                     do_clear_5_partial = true;
370                 }
371             }
372             break;
373         case ePartialPolicy_eClearForGoodEnd:
374             if (orig_feat.GetLocation().IsPartialStart(eExtreme_Biological)
375                 && orig_feat.GetData().IsCdregion()
376                 && (!orig_feat.GetData().GetCdregion().IsSetFrame() ||
377                     orig_feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_not_set ||
378                     orig_feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_one)) {
379                 string transl_prot;
380                 bool confident = true;
381                 try {
382                     CSeqTranslator::Translate(orig_feat, scope, transl_prot,
383                                                 false,   // do not include stop codons
384                                                 false);  // do not remove trailing X/B/Z
385 
386                 } catch ( const CException& ) {
387                     confident = false;
388                 }
389                 if (confident && NStr::StartsWith(transl_prot, "M", NStr::eNocase)) {
390                     do_clear_5_partial = true;
391                 }
392             }
393             break;
394     }
395     return do_set_5_partial || do_clear_5_partial;
396 }
397 
398 
Interpret3Policy(const CSeq_feat & orig_feat,CScope & scope,bool & do_set_3_partial,bool & do_clear_3_partial) const399 bool CLocationEditPolicy::Interpret3Policy
400 (const CSeq_feat& orig_feat,
401  CScope& scope,
402  bool& do_set_3_partial,
403  bool& do_clear_3_partial) const
404 {
405     do_set_3_partial = false;
406     do_clear_3_partial = false;
407     const CSeq_loc& loc = orig_feat.GetLocation();
408 
409     switch (m_PartialPolicy3) {
410         case ePartialPolicy_eNoChange:
411             // do nothing
412             break;
413         case ePartialPolicy_eSet:
414             if (!loc.IsPartialStop(eExtreme_Biological)) {
415                 do_set_3_partial = true;
416             } else if (m_Extend3) {
417                 bool confident = false;
418                 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
419                 if (confident && !at_3) {
420                     do_set_3_partial = true;
421                 }
422             }
423             break;
424         case ePartialPolicy_eSetAtEnd:
425             if (!loc.IsPartialStop(eExtreme_Biological)) {
426                 bool confident = false;
427                 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
428                 if (at_3 && confident) {
429                     do_set_3_partial = true;
430                 }
431             }
432             break;
433         case ePartialPolicy_eSetForBadEnd:
434             if (!loc.IsPartialStop(eExtreme_Biological)
435                 && orig_feat.GetData().IsCdregion()) {
436                 string transl_prot;
437                 bool confident = true;
438                 try {
439                     CSeqTranslator::Translate(orig_feat, scope, transl_prot,
440                                                 true,   // include stop codons
441                                                 false);  // do not remove trailing X/B/Z
442 
443                 } catch ( const CException& ) {
444                     confident = false;
445                 }
446                 if (confident && !NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
447                     do_set_3_partial = true;
448                 }
449             }
450             break;
451         case ePartialPolicy_eSetForFrame:
452             // not allowed for 3' end
453             break;
454         case ePartialPolicy_eClear:
455             if (loc.IsPartialStop(eExtreme_Biological)) {
456                 do_clear_3_partial = true;
457             }
458             break;
459         case ePartialPolicy_eClearNotAtEnd:
460             if (loc.IsPartialStop(eExtreme_Biological)) {
461                 bool confident = false;
462                 bool at_3 = Is3AtEndOfSeq(loc, scope, confident);
463                 if (!at_3 && confident) {
464                     do_clear_3_partial = true;
465                 }
466             }
467             break;
468         case ePartialPolicy_eClearForGoodEnd:
469             if (loc.IsPartialStop(eExtreme_Biological)
470                 && orig_feat.GetData().IsCdregion()) {
471                 string transl_prot;
472                 bool confident = true;
473                 try {
474                     CSeqTranslator::Translate(orig_feat, scope, transl_prot,
475                                                 true,   // include stop codons
476                                                 false);  // do not remove trailing X/B/Z
477 
478                 } catch ( const CException& ) {
479                     confident = false;
480                 }
481                 if (confident && NStr::EndsWith(transl_prot, "*", NStr::eNocase)) {
482                     do_clear_3_partial = true;
483                 }
484             }
485             break;
486     }
487     return do_set_3_partial || do_clear_3_partial;
488 }
489 
490 
SeqLocExtend5(const CSeq_loc & loc,TSeqPos pos,CScope * scope)491 CRef<CSeq_loc> SeqLocExtend5(const CSeq_loc& loc, TSeqPos pos, CScope* scope)
492 {
493     CSeq_loc_CI first_l(loc);
494     CConstRef<CSeq_loc> first_loc = first_l.GetRangeAsSeq_loc();
495 
496     TSeqPos loc_start = first_loc->GetStart(eExtreme_Biological);
497     bool partial_start = first_loc->IsPartialStart(eExtreme_Biological);
498     ENa_strand strand = first_loc->IsSetStrand() ? first_loc->GetStrand() : eNa_strand_plus;
499     CRef<CSeq_loc> new_loc(NULL);
500 
501     CRef<CSeq_id> id(new CSeq_id());
502     id->Assign(first_l.GetSeq_id());
503 
504     if (pos < loc_start && strand != eNa_strand_minus) {
505         CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_start - 1, strand));
506         add->SetPartialStart(partial_start, eExtreme_Positional);
507         new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
508     } else if (pos > loc_start && strand == eNa_strand_minus) {
509         CRef<CSeq_loc> add(new CSeq_loc(*id, loc_start + 1, pos, strand));
510         add->SetPartialStop(partial_start, eExtreme_Positional);
511         new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
512     }
513     return new_loc;
514 }
515 
516 
SeqLocExtend3(const CSeq_loc & loc,TSeqPos pos,CScope * scope)517 CRef<CSeq_loc> SeqLocExtend3(const CSeq_loc& loc, TSeqPos pos, CScope* scope)
518 {
519     CSeq_loc_CI last_l(loc);
520     size_t num_intervals = last_l.GetSize();
521     last_l.SetPos(num_intervals - 1);
522     CConstRef<CSeq_loc> last_loc = last_l.GetRangeAsSeq_loc();
523 
524     TSeqPos loc_stop = last_loc->GetStop(eExtreme_Biological);
525     bool partial_stop = last_loc->IsPartialStop(eExtreme_Biological);
526     ENa_strand strand = last_loc->IsSetStrand() ? last_loc->GetStrand() : eNa_strand_plus;
527     CRef<CSeq_loc> new_loc(NULL);
528 
529     CRef<CSeq_id> id(new CSeq_id());
530     id->Assign(last_l.GetSeq_id());
531 
532     if (pos > loc_stop && strand != eNa_strand_minus) {
533         CRef<CSeq_loc> add(new CSeq_loc(*id, loc_stop + 1, pos, strand));
534         add->SetPartialStop(partial_stop, eExtreme_Positional);
535         new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
536     } else if (pos < loc_stop && strand == eNa_strand_minus) {
537         CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_stop - 1, strand));
538         add->SetPartialStart(partial_stop, eExtreme_Positional);
539         new_loc = sequence::Seq_loc_Add(loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
540     }
541     return new_loc;
542 }
543 
544 
545 
SeqLocExtend(const CSeq_loc & loc,size_t pos,CScope * scope)546 CRef<CSeq_loc> SeqLocExtend(const CSeq_loc& loc, size_t pos, CScope* scope)
547 {
548     TSeqPos loc_start = loc.GetStart(eExtreme_Positional);
549     TSeqPos loc_stop = loc.GetStop(eExtreme_Positional);
550     bool partial_start = loc.IsPartialStart(eExtreme_Positional);
551     bool partial_stop = loc.IsPartialStop(eExtreme_Positional);
552     ENa_strand strand = loc.GetStrand();
553     CRef<CSeq_loc> new_loc(NULL);
554 
555     if (pos < loc_start) {
556         CRef<CSeq_id> id(new CSeq_id());
557         id->Assign(*(loc.GetId()));
558         CRef<CSeq_loc> add(
559             new CSeq_loc(*id, static_cast<TSeqPos>(pos), loc_start - 1, strand));
560         add->SetPartialStart(partial_start, eExtreme_Positional);
561         new_loc = sequence::Seq_loc_Add(
562             loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
563     } else if (pos > loc_stop) {
564         CRef<CSeq_id> id(new CSeq_id());
565         id->Assign(*(loc.GetId()));
566         CRef<CSeq_loc> add(new CSeq_loc(
567             *id, loc_stop + 1, static_cast<TSeqPos>(pos), strand));
568         add->SetPartialStop(partial_stop, eExtreme_Positional);
569         new_loc = sequence::Seq_loc_Add(
570             loc, *add, CSeq_loc::fSort | CSeq_loc::fMerge_AbuttingOnly, scope);
571     }
572     return new_loc;
573 }
574 
575 
ApplyPolicyToFeature(CSeq_feat & feat,CScope & scope) const576 bool CLocationEditPolicy::ApplyPolicyToFeature(CSeq_feat& feat, CScope& scope) const
577 {
578     if (m_PartialPolicy5 == ePartialPolicy_eNoChange
579         && m_PartialPolicy3 == ePartialPolicy_eNoChange
580         && m_MergePolicy == eMergePolicy_NoChange) {
581         return false;
582     }
583 
584     bool any_change = false;
585 
586     // make changes to 5' end
587     bool do_set_5_partial = false;
588     bool do_clear_5_partial = false;
589     any_change |= Interpret5Policy(feat, scope, do_set_5_partial, do_clear_5_partial);
590     if (do_set_5_partial) {
591         feat.SetLocation().SetPartialStart(true, eExtreme_Biological);
592         if (m_Extend5) {
593             // extend end
594             Extend5(feat, scope);
595         }
596     } else if (do_clear_5_partial) {
597         feat.SetLocation().SetPartialStart(false, eExtreme_Biological);
598     }
599 
600     // make changes to 3' end
601     bool do_set_3_partial = false;
602     bool do_clear_3_partial = false;
603     any_change |= Interpret3Policy(feat, scope, do_set_3_partial, do_clear_3_partial);
604     if (do_set_3_partial) {
605         feat.SetLocation().SetPartialStop(true, eExtreme_Biological);
606         if (m_Extend3) {
607             // extend end
608             Extend3(feat, scope);
609         }
610     } else if (do_clear_3_partial) {
611         feat.SetLocation().SetPartialStop(false, eExtreme_Biological);
612     }
613 
614     // make merge changes
615     switch (m_MergePolicy) {
616         case CLocationEditPolicy::eMergePolicy_Join:
617             {
618                 // remove NULLS between, if present
619                 bool changed = false;
620                 CRef<CSeq_loc> new_loc = ConvertToJoin(feat.GetLocation(), changed);
621                 if (changed) {
622                     feat.SetLocation().Assign(*new_loc);
623                     any_change = true;
624                 }
625             }
626             break;
627         case CLocationEditPolicy::eMergePolicy_Order:
628             {
629                 // add NULLS between if not present
630                 bool changed = false;
631                 CRef<CSeq_loc> new_loc = ConvertToOrder(feat.GetLocation(), changed);
632                 if (changed) {
633                     feat.SetLocation().Assign(*new_loc);
634                     any_change = true;
635                 }
636             }
637             break;
638         case CLocationEditPolicy::eMergePolicy_SingleInterval:
639             {
640                 CRef<CSeq_loc> new_loc = sequence::Seq_loc_Merge(feat.GetLocation(), CSeq_loc::fMerge_SingleRange, &scope);
641                 if (sequence::Compare(*new_loc, feat.GetLocation(), &scope, sequence::fCompareOverlapping) != sequence::eSame) {
642                     feat.SetLocation().Assign(*new_loc);
643                     any_change = true;
644                 }
645             }
646             break;
647         case CLocationEditPolicy::eMergePolicy_NoChange:
648             break;
649     }
650 
651     any_change |= feature::AdjustFeaturePartialFlagForLocation(feat);
652 
653     return any_change;
654 }
655 
656 
HasNulls(const CSeq_loc & orig_loc)657 bool CLocationEditPolicy::HasNulls(const CSeq_loc& orig_loc)
658 {
659     if (orig_loc.Which() == CSeq_loc::e_Mix) {
660         ITERATE(CSeq_loc_mix::Tdata, it, orig_loc.GetMix().Get()) {
661             if ((*it)->IsNull()) {
662                 return true;
663             }
664         }
665     }
666     return false;
667 }
668 
669 
ConvertToJoin(const CSeq_loc & orig_loc,bool & changed)670 CRef<CSeq_loc> CLocationEditPolicy::ConvertToJoin(const CSeq_loc& orig_loc, bool &changed)
671 {
672     changed = false;
673     CRef<CSeq_loc> new_loc(new CSeq_loc());
674     if (!HasNulls(orig_loc)) {
675         new_loc->Assign(orig_loc);
676     } else {
677         CSeq_loc_CI ci(orig_loc);
678         new_loc->SetMix();
679         while (ci) {
680             CConstRef<CSeq_loc> subloc = ci.GetRangeAsSeq_loc();
681             if (subloc && !subloc->IsNull()) {
682                 CRef<CSeq_loc> add(new CSeq_loc());
683                 add->Assign(*subloc);
684                 new_loc->SetMix().Set().push_back(add);
685             }
686             ++ci;
687         }
688         changed = true;
689     }
690     return new_loc;
691 }
692 
693 
ConvertToOrder(const CSeq_loc & orig_loc,bool & changed)694 CRef<CSeq_loc> CLocationEditPolicy::ConvertToOrder(const CSeq_loc& orig_loc, bool &changed)
695 {
696     changed = false;
697     CRef<CSeq_loc> new_loc(new CSeq_loc());
698     if (HasNulls(orig_loc)) {
699         new_loc->Assign(orig_loc);
700         return new_loc;
701     }
702     switch(orig_loc.Which()) {
703         case CSeq_loc::e_not_set:
704         case CSeq_loc::e_Null:
705         case CSeq_loc::e_Empty:
706         case CSeq_loc::e_Whole:
707         case CSeq_loc::e_Int:
708         case CSeq_loc::e_Pnt:
709         case CSeq_loc::e_Bond:
710         case CSeq_loc::e_Feat:
711         case CSeq_loc::e_Equiv:
712             new_loc->Assign(orig_loc);
713             break;
714         case CSeq_loc::e_Packed_int:
715         case CSeq_loc::e_Packed_pnt:
716         case CSeq_loc::e_Mix:
717             {
718                 new_loc->SetMix();
719                 CSeq_loc_CI ci(orig_loc);
720                 CRef<CSeq_loc> first(new CSeq_loc());
721                 first->Assign(*(ci.GetRangeAsSeq_loc()));
722                 new_loc->SetMix().Set().push_back(first);
723                 ++ci;
724                 while (ci) {
725                     CRef<CSeq_loc> null_loc(new CSeq_loc());
726                     null_loc->SetNull();
727                     new_loc->SetMix().Set().push_back(null_loc);
728                     CRef<CSeq_loc> add(new CSeq_loc());
729                     add->Assign(*(ci.GetRangeAsSeq_loc()));
730                     new_loc->SetMix().Set().push_back(add);
731                     ++ci;
732                 }
733                 changed = true;
734             }
735             break;
736 
737     }
738     return new_loc;
739 }
740 
AdjustFrameFor5Extension(CSeq_feat & feat,size_t diff)741 void AdjustFrameFor5Extension(CSeq_feat& feat, size_t diff)
742 {
743     // adjust frame to maintain consistency
744     if (diff % 3 > 0 && feat.GetData().IsCdregion()) {
745         int orig_frame = 1;
746         if (feat.GetData().GetCdregion().IsSetFrame()) {
747             if (feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_two) {
748                 orig_frame = 2;
749             } else if (feat.GetData().GetCdregion().GetFrame() == CCdregion::eFrame_three) {
750                 orig_frame = 3;
751             }
752         }
753         CCdregion::EFrame new_frame = CCdregion::eFrame_not_set;
754         switch ((orig_frame + diff % 3) % 3) {
755             case 1:
756                 new_frame = CCdregion::eFrame_not_set;
757                 break;
758             case 2:
759                 new_frame = CCdregion::eFrame_two;
760                 break;
761             case 0:
762                 new_frame = CCdregion::eFrame_three;
763                 break;
764         }
765         feat.SetData().SetCdregion().SetFrame(new_frame);
766     }
767 }
768 
769 
Extend5(CSeq_feat & feat,CScope & scope)770 bool CLocationEditPolicy::Extend5(CSeq_feat& feat, CScope& scope)
771 {
772     bool extend = false;
773 
774     bool confident = false;
775     if (!Is5AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
776         int diff = 0;
777         CSeq_loc_CI first_l(feat.GetLocation());
778         if (first_l.IsSetStrand() && first_l.GetStrand() == eNa_strand_minus) {
779             CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
780             diff = bsh.GetInst().GetLength() - first_l.GetRange().GetTo() - 1;
781             CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
782             if (new_loc) {
783                 feat.SetLocation().Assign(*new_loc);
784                 extend = true;
785             } else {
786                 diff = 0;
787             }
788         } else {
789             diff = first_l.GetRange().GetFrom();
790             CRef<CSeq_loc> new_loc = SeqLocExtend5(feat.GetLocation(), 0, &scope);
791             if (new_loc) {
792                 feat.SetLocation().Assign(*new_loc);
793                 extend = true;
794             } else {
795                 diff = 0;
796             }
797         }
798 
799         // adjust frame to maintain consistency
800         AdjustFrameFor5Extension(feat, diff);
801     }
802     return extend;
803 }
804 
Extend3(CSeq_feat & feat,CScope & scope)805 bool CLocationEditPolicy::Extend3(CSeq_feat& feat, CScope& scope)
806 {
807     bool extend = false;
808 
809     bool confident = false;
810     if (!Is3AtEndOfSeq(feat.GetLocation(), scope, confident) && confident) {
811         CSeq_loc_CI last_l(feat.GetLocation());
812         size_t num_intervals = last_l.GetSize();
813         last_l.SetPos(num_intervals - 1);
814 
815         ENa_strand strand = last_l.GetStrand();
816         if (strand == eNa_strand_minus) {
817             CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), 0, &scope);
818             if (new_loc) {
819                 feat.SetLocation().Assign(*new_loc);
820                 extend = true;
821             }
822         } else {
823             CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
824             CRef<CSeq_loc> new_loc = SeqLocExtend3(feat.GetLocation(), bsh.GetInst_Length() - 1, &scope);
825             if (new_loc) {
826                 feat.SetLocation().Assign(*new_loc);
827                 extend = true;
828             }
829         }
830     }
831     return extend;
832 }
833 
834 
ApplyPolicyToFeature(const CLocationEditPolicy & policy,const CSeq_feat & orig_feat,CScope & scope,bool adjust_gene,bool retranslate_cds)835 bool ApplyPolicyToFeature(const CLocationEditPolicy& policy, const CSeq_feat& orig_feat,
836     CScope& scope, bool adjust_gene, bool retranslate_cds)
837 {
838     CRef<CSeq_feat> new_feat(new CSeq_feat());
839     new_feat->Assign(orig_feat);
840 
841     bool any_change = policy.ApplyPolicyToFeature(*new_feat, scope);
842     if (any_change) {
843         CSeq_feat_Handle fh = scope.GetSeq_featHandle(orig_feat);
844         // This is necessary, to make sure that we are in "editing mode"
845         const CSeq_annot_Handle& annot_handle = fh.GetAnnot();
846         CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
847         CSeq_feat_EditHandle feh(fh);
848 
849         // adjust gene feature
850         if (adjust_gene) {
851             CConstRef<CSeq_feat> old_gene = sequence::GetOverlappingGene(orig_feat.GetLocation(), scope);
852             if (old_gene) {
853                 TSeqPos feat_start = new_feat->GetLocation().GetStart(eExtreme_Biological);
854                 TSeqPos feat_stop = new_feat->GetLocation().GetStop(eExtreme_Biological);
855                 CRef<CSeq_feat> new_gene(new CSeq_feat());
856                 new_gene->Assign(*old_gene);
857                 bool gene_change = false;
858                 // adjust ends of gene to match ends of feature
859                 CRef<CSeq_loc> new_loc = SeqLocExtend5(new_gene->GetLocation(), feat_start, &scope);
860                 if (new_loc) {
861                     new_gene->SetLocation().Assign(*new_loc);
862                     gene_change = true;
863                 }
864                 new_loc = SeqLocExtend3(new_gene->GetLocation(), feat_stop, &scope);
865                 if (new_loc) {
866                     new_gene->SetLocation().Assign(*new_loc);
867                     gene_change = true;
868                 }
869                 if (gene_change) {
870                     CSeq_feat_Handle gh = scope.GetSeq_featHandle(*old_gene);
871                     // This is necessary, to make sure that we are in "editing mode"
872                     const CSeq_annot_Handle& ah = gh.GetAnnot();
873                     CSeq_entry_EditHandle egh = ah.GetParentEntry().GetEditHandle();
874                     CSeq_feat_EditHandle geh(gh);
875                     geh.Replace(*new_gene);
876                 }
877             }
878         }
879         feh.Replace(*new_feat);
880 
881         // retranslate or resynch if coding region
882         if (new_feat->IsSetProduct() && new_feat->GetData().IsCdregion()) {
883             if (!retranslate_cds || !feature::RetranslateCDS(*new_feat, scope)) {
884                 CSeq_loc_CI l(new_feat->GetLocation());
885                 feature::AdjustForCDSPartials(*new_feat, scope);
886             }
887         }
888     }
889     return any_change;
890 }
891 
892 
ReverseComplementLocation(CSeq_interval & interval,CScope & scope)893 void ReverseComplementLocation(CSeq_interval& interval, CScope& scope)
894 {
895     // flip strand
896     interval.FlipStrand();
897     if (interval.IsSetId()) {
898         CBioseq_Handle bsh = scope.GetBioseqHandle(interval.GetId());
899         if (bsh) {
900             if (interval.IsSetFrom()) {
901                 interval.SetFrom(bsh.GetInst_Length() - interval.GetFrom() - 1);
902             }
903             if (interval.IsSetTo()) {
904                 interval.SetTo(bsh.GetInst_Length() - interval.GetTo() - 1);
905             }
906 
907             // reverse from and to
908             if (interval.IsSetFrom()) {
909                 TSeqPos from = interval.GetFrom();
910                 if (interval.IsSetTo()) {
911                     interval.SetFrom(interval.GetTo());
912                 } else {
913                     interval.ResetFrom();
914                 }
915                 interval.SetTo(from);
916             } else if (interval.IsSetTo()) {
917                 interval.SetFrom(interval.GetTo());
918                 interval.ResetTo();
919             }
920 
921             if (interval.IsSetFuzz_from()) {
922                 interval.SetFuzz_from().Negate(bsh.GetInst_Length());
923             }
924             if (interval.IsSetFuzz_to()) {
925                 interval.SetFuzz_to().Negate(bsh.GetInst_Length());
926             }
927 
928             // swap fuzz
929             if (interval.IsSetFuzz_from()) {
930                 CRef<CInt_fuzz> swap(new CInt_fuzz());
931                 swap->Assign(interval.GetFuzz_from());
932                 if (interval.IsSetFuzz_to()) {
933                     interval.SetFuzz_from().Assign(interval.GetFuzz_to());
934                 } else {
935                     interval.ResetFuzz_from();
936                 }
937                 interval.SetFuzz_to(*swap);
938             } else if (interval.IsSetFuzz_to()) {
939                 interval.SetFuzz_from().Assign(interval.GetFuzz_to());
940                 interval.ResetFuzz_to();
941             }
942         }
943     }
944 }
945 
946 
ReverseComplementLocation(CSeq_point & pnt,CScope & scope)947 void ReverseComplementLocation(CSeq_point& pnt, CScope& scope)
948 {
949     // flip strand
950     pnt.FlipStrand();
951     if (pnt.IsSetId()) {
952         CBioseq_Handle bsh = scope.GetBioseqHandle(pnt.GetId());
953         if (bsh) {
954             if (pnt.IsSetPoint()) {
955                 pnt.SetPoint(bsh.GetInst_Length() - pnt.GetPoint() - 1);
956             }
957             if (pnt.IsSetFuzz()) {
958                 pnt.SetFuzz().Negate(bsh.GetInst_Length());
959             }
960         }
961     }
962 }
963 
964 
ReverseComplementLocation(CPacked_seqpnt & ppnt,CScope & scope)965 void ReverseComplementLocation(CPacked_seqpnt& ppnt, CScope& scope)
966 {
967     // flip strand
968     ppnt.FlipStrand();
969     CBioseq_Handle bsh = scope.GetBioseqHandle(ppnt.GetId());
970     if (bsh) {
971         // flip fuzz
972         if (ppnt.IsSetFuzz()) {
973             ppnt.SetFuzz().Negate(bsh.GetInst_Length());
974         }
975         //complement points
976         if (ppnt.IsSetPoints()) {
977             vector<int> new_pnts;
978             ITERATE(CPacked_seqpnt::TPoints, it, ppnt.SetPoints()) {
979                 new_pnts.push_back(bsh.GetInst_Length() - *it - 1);
980             }
981             ppnt.ResetPoints();
982             ITERATE(vector<int>, it, new_pnts) {
983                 ppnt.SetPoints().push_back(*it);
984             }
985         }
986     }
987 
988 }
989 
990 
ReverseComplementLocation(CSeq_loc & loc,CScope & scope)991 void ReverseComplementLocation(CSeq_loc& loc, CScope& scope)
992 {
993     switch (loc.Which()) {
994         case CSeq_loc::e_Empty:
995         case CSeq_loc::e_Whole:
996         case CSeq_loc::e_Feat:
997         case CSeq_loc::e_Null:
998         case CSeq_loc::e_not_set:
999             // do nothing
1000             break;
1001         case CSeq_loc::e_Int:
1002             ReverseComplementLocation(loc.SetInt(), scope);
1003             loc.InvalidateCache();
1004             break;
1005         case CSeq_loc::e_Pnt:
1006             ReverseComplementLocation(loc.SetPnt(), scope);
1007             loc.InvalidateCache();
1008             break;
1009         case CSeq_loc::e_Bond:
1010             if (loc.GetBond().IsSetA()) {
1011                 ReverseComplementLocation(loc.SetBond().SetA(), scope);
1012             }
1013             if (loc.GetBond().IsSetB()) {
1014                 ReverseComplementLocation(loc.SetBond().SetB(), scope);
1015             }
1016             loc.InvalidateCache();
1017             break;
1018         case CSeq_loc::e_Mix:
1019             // revcomp individual elements
1020             NON_CONST_ITERATE(CSeq_loc_mix::Tdata, it, loc.SetMix().Set()) {
1021                 ReverseComplementLocation(**it, scope);
1022             }
1023             loc.InvalidateCache();
1024             break;
1025         case CSeq_loc::e_Equiv:
1026             // revcomp individual elements
1027             NON_CONST_ITERATE(CSeq_loc_equiv::Tdata, it, loc.SetEquiv().Set()) {
1028                 ReverseComplementLocation(**it, scope);
1029             }
1030             loc.InvalidateCache();
1031             break;
1032         case CSeq_loc::e_Packed_int:
1033             // revcomp individual elements
1034             NON_CONST_ITERATE(CPacked_seqint::Tdata, it, loc.SetPacked_int().Set()) {
1035                 ReverseComplementLocation(**it, scope);
1036             }
1037             loc.InvalidateCache();
1038             break;
1039         case CSeq_loc::e_Packed_pnt:
1040             ReverseComplementLocation(loc.SetPacked_pnt(), scope);
1041             loc.InvalidateCache();
1042             break;
1043 
1044     }
1045 }
1046 
1047 
ReverseComplementCDRegion(CCdregion & cdr,CScope & scope)1048 void ReverseComplementCDRegion(CCdregion& cdr, CScope& scope)
1049 {
1050     if (cdr.IsSetCode_break()) {
1051         NON_CONST_ITERATE(CCdregion::TCode_break, it, cdr.SetCode_break()) {
1052             if ((*it)->IsSetLoc()) {
1053                 ReverseComplementLocation((*it)->SetLoc(), scope);
1054             }
1055         }
1056     }
1057 }
1058 
1059 
ReverseComplementTrna(CTrna_ext & trna,CScope & scope)1060 void ReverseComplementTrna(CTrna_ext& trna, CScope& scope)
1061 {
1062     if (trna.IsSetAnticodon()) {
1063         ReverseComplementLocation(trna.SetAnticodon(), scope);
1064     }
1065 }
1066 
1067 
ReverseComplementFeature(CSeq_feat & feat,CScope & scope)1068 void ReverseComplementFeature(CSeq_feat& feat, CScope& scope)
1069 {
1070     if (feat.IsSetLocation()) {
1071         ReverseComplementLocation(feat.SetLocation(), scope);
1072     }
1073     if (feat.IsSetData()) {
1074         switch (feat.GetData().GetSubtype()) {
1075             case CSeqFeatData::eSubtype_cdregion:
1076                 ReverseComplementCDRegion(feat.SetData().SetCdregion(), scope);
1077                 break;
1078             case CSeqFeatData::eSubtype_tRNA:
1079                 ReverseComplementTrna(feat.SetData().SetRna().SetExt().SetTRNA(), scope);
1080                 break;
1081             default:
1082                 break;
1083         }
1084     }
1085 }
1086 
1087 
OkToAdjustLoc(const CSeq_interval & interval,const CSeq_id * seqid)1088 bool OkToAdjustLoc(const CSeq_interval& interval, const CSeq_id* seqid)
1089 {
1090     bool rval = true;
1091     if (seqid) {
1092         if (!interval.IsSetId() || interval.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1093             rval = false;
1094         }
1095     }
1096     return rval;
1097 }
1098 
1099 
OkToAdjustLoc(const CSeq_point & pnt,const CSeq_id * seqid)1100 bool OkToAdjustLoc(const CSeq_point& pnt, const CSeq_id* seqid)
1101 {
1102     bool rval = true;
1103     if (seqid) {
1104         if (!pnt.IsSetId() || pnt.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1105             rval = false;
1106         }
1107     }
1108     return rval;
1109 }
1110 
1111 
OkToAdjustLoc(const CPacked_seqpnt & pack,const CSeq_id * seqid)1112 bool OkToAdjustLoc(const CPacked_seqpnt& pack, const CSeq_id* seqid)
1113 {
1114     bool rval = true;
1115     if (seqid) {
1116         if (!pack.IsSetId() || pack.GetId().Compare(*seqid) != CSeq_id::e_YES) {
1117             rval = false;
1118         }
1119     }
1120     return rval;
1121 }
1122 
1123 
NormalizeLoc(CSeq_loc & loc)1124 void NormalizeLoc(CSeq_loc& loc)
1125 {
1126     switch (loc.Which()) {
1127         case CSeq_loc::e_Equiv:
1128             {{
1129                 CSeq_loc::TEquiv::Tdata::iterator it = loc.SetEquiv().Set().begin();
1130                 while (it != loc.SetEquiv().Set().end()) {
1131                     NormalizeLoc(**it);
1132                     if (loc.Which() == CSeq_loc::e_not_set) {
1133                         it = loc.SetEquiv().Set().erase(it);
1134                     } else {
1135                         ++it;
1136                     }
1137                 }
1138 
1139                 // if only one, make regular loc
1140                 if (loc.GetEquiv().Get().size() == 1) {
1141                     CRef<CSeq_loc> sub(new CSeq_loc());
1142                     sub->Assign(*(loc.GetEquiv().Get().front()));
1143                     loc.Assign(*sub);
1144                 } else if (loc.GetEquiv().Get().size() == 0) {
1145                     // no sub intervals, reset
1146                     loc.Reset();
1147                 }
1148             }}
1149             break;
1150         case CSeq_loc::e_Mix:
1151             {{
1152                 CSeq_loc::TMix::Tdata::iterator it = loc.SetMix().Set().begin();
1153                 while (it != loc.SetMix().Set().end()) {
1154                     NormalizeLoc(**it);
1155                     if (loc.Which() == CSeq_loc::e_not_set) {
1156                         it = loc.SetMix().Set().erase(it);
1157                     } else {
1158                         ++it;
1159                     }
1160                 }
1161 
1162                 // if only one, make regular loc
1163                 if (loc.GetMix().Get().size() == 1) {
1164                     CRef<CSeq_loc> sub(new CSeq_loc());
1165                     sub->Assign(*(loc.GetMix().Get().front()));
1166                     loc.Assign(*sub);
1167                 } else if (loc.GetMix().Get().size() == 0) {
1168                     // no sub intervals, reset
1169                     loc.Reset();
1170                 }
1171             }}
1172             break;
1173         case CSeq_loc::e_Packed_int:
1174             if (loc.GetPacked_int().Get().size() == 0) {
1175                 loc.Reset();
1176             } else if (loc.GetPacked_int().Get().size() == 1) {
1177                 CRef<CSeq_interval> sub(new CSeq_interval());
1178                 sub->Assign(*(loc.GetPacked_int().Get().front()));
1179                 loc.SetInt().Assign(*sub);
1180             }
1181             break;
1182         case CSeq_loc::e_Packed_pnt:
1183             if (loc.GetPacked_pnt().GetPoints().size() == 0) {
1184                 loc.Reset();
1185             } else if (loc.GetPacked_pnt().GetPoints().size() == 1) {
1186                 CRef<CSeq_point> sub(new CSeq_point());
1187                 if (loc.GetPacked_pnt().IsSetStrand()) {
1188                     sub->SetStrand(loc.GetPacked_pnt().GetStrand());
1189                 }
1190                 if (loc.GetPacked_pnt().IsSetId()) {
1191                     sub->SetId().Assign(loc.GetPacked_pnt().GetId());
1192                 }
1193                 if (loc.GetPacked_pnt().IsSetFuzz()) {
1194                     sub->SetFuzz().Assign(loc.GetPacked_pnt().GetFuzz());
1195                 }
1196                 sub->SetPoint(loc.GetPacked_pnt().GetPoints()[0]);
1197                 loc.SetPnt().Assign(*sub);
1198             }
1199             break;
1200         default:
1201             // do nothing
1202             break;
1203     }
1204 }
1205 
1206 
SeqLocAdjustForTrim(CSeq_interval & interval,TSeqPos cut_from,TSeqPos cut_to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1207 void SeqLocAdjustForTrim(CSeq_interval& interval,
1208                          TSeqPos cut_from, TSeqPos cut_to,
1209                          const CSeq_id* seqid,
1210                          bool& bCompleteCut,
1211                          TSeqPos& trim5,
1212                          bool& bAdjusted)
1213 {
1214     if (!OkToAdjustLoc(interval, seqid)) {
1215         return;
1216     }
1217 
1218     // These are required fields
1219     if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1220     {
1221         return;
1222     }
1223 
1224     // Feature location
1225     TSeqPos feat_from = interval.GetFrom();
1226     TSeqPos feat_to = interval.GetTo();
1227 
1228     // Size of the cut
1229     TSeqPos cut_size = cut_to - cut_from + 1;
1230 
1231     // Case 1: feature is located completely before the cut
1232     if (feat_to < cut_from)
1233     {
1234         // Nothing needs to be done - cut does not affect feature
1235         return;
1236     }
1237 
1238     // Case 2: feature is completely within the cut
1239     if (feat_from >= cut_from && feat_to <= cut_to)
1240     {
1241         // Feature should be deleted
1242         bCompleteCut = true;
1243         trim5 += feat_from - feat_to + 1;
1244         return;
1245     }
1246 
1247     // Case 3: feature is completely past the cut
1248     if (feat_from > cut_to)
1249     {
1250         // Shift the feature by the cut_size
1251         feat_from -= cut_size;
1252         feat_to -= cut_size;
1253         interval.SetFrom(feat_from);
1254         interval.SetTo(feat_to);
1255         bAdjusted = true;
1256         return;
1257     }
1258 
1259     /***************************************************************************
1260      * Cases below are partial overlapping cases
1261     ***************************************************************************/
1262     // Case 4: Cut is completely inside the feature
1263     //         OR
1264     //         Cut is to the "left" side of the feature (i.e., feat_from is
1265     //         inside the cut)
1266     //         OR
1267     //         Cut is to the "right" side of the feature (i.e., feat_to is
1268     //         inside the cut)
1269     if (feat_to > cut_to) {
1270         // Left side cut or cut is completely inside feature
1271         feat_to -= cut_size;
1272     }
1273     else {
1274         // Right side cut
1275         if (interval.IsSetStrand() && interval.GetStrand() == eNa_strand_minus) {
1276             TSeqPos diff = cut_from - 1 - feat_to;
1277             trim5 += diff;
1278         }
1279         feat_to = cut_from - 1;
1280     }
1281 
1282     // Take care of the feat_from from the left side cut case
1283     if (feat_from >= cut_from) {
1284         if (!interval.IsSetStrand() || interval.GetStrand() != eNa_strand_minus) {
1285             TSeqPos diff = cut_to + 1 - feat_from;
1286             trim5 += diff;
1287         }
1288         feat_from = cut_to + 1;
1289         feat_from -= cut_size;
1290     }
1291 
1292     interval.SetFrom(feat_from);
1293     interval.SetTo(feat_to);
1294     bAdjusted = true;
1295 }
1296 
1297 
SeqLocAdjustForTrim(CPacked_seqint & packint,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1298 void SeqLocAdjustForTrim(CPacked_seqint& packint,
1299                 TSeqPos from, TSeqPos to,
1300                 const CSeq_id* seqid,
1301                 bool& bCompleteCut,
1302                 TSeqPos& trim5,
1303                 bool& bAdjusted)
1304 {
1305     if (packint.IsSet()) {
1306         bool from5 = true;
1307         // Process each interval in the list
1308         CPacked_seqint::Tdata::iterator it;
1309         for (it = packint.Set().begin();
1310                 it != packint.Set().end(); )
1311         {
1312             bool bDeleted = false;
1313             TSeqPos this_trim = 0;
1314             SeqLocAdjustForTrim(**it, from, to, seqid,
1315                                 bDeleted, this_trim, bAdjusted);
1316 
1317             if (from5) {
1318                 trim5 += this_trim;
1319             }
1320             // Should interval be deleted from list?
1321             if (bDeleted) {
1322                 it = packint.Set().erase(it);
1323             }
1324             else {
1325                 from5 = false;
1326                 ++it;
1327             }
1328         }
1329         if (packint.Get().empty()) {
1330             packint.Reset();
1331         }
1332     }
1333     if (!packint.IsSet()) {
1334         bCompleteCut = true;
1335     }
1336 }
1337 
1338 
SeqLocAdjustForTrim(CSeq_loc_mix & mix,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1339 void SeqLocAdjustForTrim(CSeq_loc_mix& mix,
1340                 TSeqPos from, TSeqPos to,
1341                 const CSeq_id* seqid,
1342                 bool& bCompleteCut,
1343                 TSeqPos& trim5,
1344                 bool& bAdjusted)
1345 {
1346     if (mix.IsSet()) {
1347         bool from5 = true;
1348         // Process each seqloc in the list
1349         CSeq_loc_mix::Tdata::iterator it;
1350         for (it = mix.Set().begin();
1351                 it != mix.Set().end(); )
1352         {
1353             bool bDeleted = false;
1354             TSeqPos this_trim = 0;
1355             SeqLocAdjustForTrim(**it, from, to, seqid, bDeleted, this_trim, bAdjusted);
1356 
1357             if (from5) {
1358                 trim5 += this_trim;
1359             }
1360             // Should seqloc be deleted from list?
1361             if (bDeleted) {
1362                 it = mix.Set().erase(it);
1363             }
1364             else {
1365                 from5 = false;
1366                 ++it;
1367             }
1368         }
1369     }
1370     if (!mix.IsSet() || mix.Set().empty()) {
1371         bCompleteCut = true;
1372     }
1373 }
1374 
1375 
SeqLocAdjustForTrim(CSeq_point & pnt,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1376 void SeqLocAdjustForTrim(CSeq_point& pnt,
1377                 TSeqPos from, TSeqPos to,
1378                 const CSeq_id* seqid,
1379                 bool& bCompleteCut,
1380                 TSeqPos& trim5,
1381                 bool& bAdjusted)
1382 {
1383     if (!OkToAdjustLoc(pnt, seqid)) {
1384         return;
1385     }
1386 
1387     if (to < pnt.GetPoint()) {
1388         auto diff = to - from + 1;
1389         pnt.SetPoint(pnt.GetPoint() - diff);
1390         bAdjusted = true;
1391     } else if (from < pnt.GetPoint()) {
1392         bCompleteCut = true;
1393         trim5 += 1;
1394     }
1395 }
1396 
1397 
SeqLocAdjustForTrim(CPacked_seqpnt & pack,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1398 void SeqLocAdjustForTrim(CPacked_seqpnt& pack,
1399                 TSeqPos from, TSeqPos to,
1400                 const CSeq_id* seqid,
1401                 bool& bCompleteCut, TSeqPos& trim5, bool& bAdjusted)
1402 {
1403     if (!OkToAdjustLoc(pack, seqid)) {
1404         return;
1405     }
1406 
1407     if (pack.IsSetPoints()) {
1408         bool from5 = true;
1409         auto it = pack.SetPoints().begin();
1410         while (it != pack.SetPoints().end()) {
1411             if (to < *it) {
1412                 auto diff = to - from + 1;
1413                 *it -= diff;
1414                 it++;
1415                 bAdjusted = true;
1416                 from5 = false;
1417             } else if (from < *it) {
1418                 it = pack.SetPoints().erase(it);
1419                 bAdjusted = true;
1420                 if (from5) {
1421                     trim5 += 1;
1422                 }
1423             } else {
1424                 it++;
1425                 from5 = false;
1426             }
1427         }
1428     }
1429     if (pack.SetPoints().empty()) {
1430         bCompleteCut = true;
1431     }
1432 }
1433 
1434 
SeqLocAdjustForTrim(CSeq_bond & bond,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1435 void SeqLocAdjustForTrim(CSeq_bond& bond,
1436                 TSeqPos from, TSeqPos to,
1437                 const CSeq_id* seqid,
1438                 bool& bCompleteCut,
1439                 TSeqPos& trim5,
1440                 bool& bAdjusted)
1441 {
1442     bool cutA = false, cutB = false;
1443     if (bond.IsSetA()) {
1444         SeqLocAdjustForTrim(bond.SetA(), from, to, seqid, cutA, trim5, bAdjusted);
1445     } else {
1446         cutA = true;
1447     }
1448 
1449     if (bond.IsSetB()) {
1450         SeqLocAdjustForTrim(bond.SetB(), from, to, seqid, cutB, trim5, bAdjusted);
1451     } else {
1452         cutB = true;
1453     }
1454     if (cutA && cutB) {
1455         bCompleteCut = true;
1456     }
1457 }
1458 
1459 
SeqLocAdjustForTrim(CSeq_loc_equiv & equiv,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1460 void SeqLocAdjustForTrim(CSeq_loc_equiv& equiv,
1461                 TSeqPos from, TSeqPos to,
1462                 const CSeq_id* seqid,
1463                 bool& bCompleteCut,
1464                 TSeqPos& trim5,
1465                 bool& bAdjusted)
1466 {
1467     TSeqPos max_trim5 = 0;
1468     CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1469     while (it != equiv.Set().end()) {
1470         bool cut = false;
1471         TSeqPos this_trim5 = 0;
1472         SeqLocAdjustForTrim(**it, from, to, seqid, cut, this_trim5, bAdjusted);
1473         if (this_trim5 > max_trim5) {
1474             max_trim5 = this_trim5;
1475         }
1476         if (cut) {
1477             it = equiv.Set().erase(it);
1478         } else {
1479             it++;
1480         }
1481     }
1482     if (equiv.Set().empty()) {
1483         bCompleteCut = true;
1484     }
1485     trim5 = max_trim5;
1486 }
1487 
1488 
SeqLocAdjustForTrim(CSeq_loc & loc,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,TSeqPos & trim5,bool & bAdjusted)1489 void SeqLocAdjustForTrim(CSeq_loc& loc,
1490                 TSeqPos from, TSeqPos to,
1491                 const CSeq_id* seqid,
1492                 bool& bCompleteCut,
1493                 TSeqPos& trim5, bool& bAdjusted)
1494 {
1495     // Given a seqloc and a range, cut the seqloc
1496 
1497     switch(loc.Which())
1498     {
1499         // Single interval
1500         case CSeq_loc::e_Int:
1501             SeqLocAdjustForTrim(loc.SetInt(), from, to, seqid,
1502                                 bCompleteCut, trim5, bAdjusted);
1503             break;
1504 
1505         // Multiple intervals
1506         case CSeq_loc::e_Packed_int:
1507             SeqLocAdjustForTrim(loc.SetPacked_int(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1508             break;
1509 
1510         // Multiple seqlocs
1511         case CSeq_loc::e_Mix:
1512             SeqLocAdjustForTrim(loc.SetMix(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1513             break;
1514         case CSeq_loc::e_Pnt:
1515             SeqLocAdjustForTrim(loc.SetPnt(), from, to , seqid, bCompleteCut, trim5, bAdjusted);
1516             break;
1517         case CSeq_loc::e_Packed_pnt:
1518             SeqLocAdjustForTrim(loc.SetPacked_pnt(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1519             break;
1520         case CSeq_loc::e_Bond:
1521             SeqLocAdjustForTrim(loc.SetBond(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1522             break;
1523         case CSeq_loc::e_Equiv:
1524             SeqLocAdjustForTrim(loc.SetEquiv(), from, to, seqid, bCompleteCut, trim5, bAdjusted);
1525             break;
1526         case CSeq_loc::e_Empty:
1527         case CSeq_loc::e_Null:
1528         case CSeq_loc::e_not_set:
1529         case CSeq_loc::e_Whole:
1530         case CSeq_loc::e_Feat:
1531             // no adjustment needeed
1532             break;
1533     }
1534     if (!bCompleteCut) {
1535         NormalizeLoc(loc);
1536     }
1537 }
1538 
1539 
SeqLocAdjustForInsert(CSeq_interval & interval,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1540 void SeqLocAdjustForInsert(CSeq_interval& interval,
1541                          TSeqPos insert_from, TSeqPos insert_to,
1542                          const CSeq_id* seqid)
1543 {
1544     if (!OkToAdjustLoc(interval, seqid)) {
1545         return;
1546     }
1547 
1548     // These are required fields
1549     if ( !(interval.CanGetFrom() && interval.CanGetTo()) )
1550     {
1551         return;
1552     }
1553 
1554     // Feature location
1555     TSeqPos feat_from = interval.GetFrom();
1556     TSeqPos feat_to = interval.GetTo();
1557 
1558     // Size of the insert
1559     TSeqPos insert_size = insert_to - insert_from + 1;
1560 
1561     // Case 1: feature is located before the insert
1562     if (feat_to < insert_from)
1563     {
1564         // Nothing needs to be done - cut does not affect feature
1565         return;
1566     }
1567 
1568     // Case 2: feature is located after the insert
1569     if (feat_from > insert_from) {
1570         feat_from += insert_size;
1571         feat_to += insert_size;
1572         interval.SetFrom(feat_from);
1573         interval.SetTo(feat_to);
1574         return;
1575     }
1576 
1577     // Case 3: insert occurs within interval
1578     if (feat_from <= insert_from && feat_to >= insert_from)
1579     {
1580         feat_to += insert_size;
1581         interval.SetTo(feat_to);
1582         return;
1583     }
1584 }
1585 
1586 
SeqLocAdjustForInsert(CPacked_seqint & packint,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1587 void SeqLocAdjustForInsert(CPacked_seqint& packint,
1588                          TSeqPos insert_from, TSeqPos insert_to,
1589                          const CSeq_id* seqid)
1590 {
1591     if (packint.IsSet()) {
1592         // Process each interval in the list
1593         CPacked_seqint::Tdata::iterator it;
1594         for (it = packint.Set().begin();
1595                 it != packint.Set().end(); it++)
1596         {
1597             SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1598         }
1599     }
1600 }
1601 
1602 
SeqLocAdjustForInsert(CSeq_loc_mix & mix,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1603 void SeqLocAdjustForInsert(CSeq_loc_mix& mix,
1604                          TSeqPos insert_from, TSeqPos insert_to,
1605                          const CSeq_id* seqid)
1606 {
1607     if (mix.IsSet()) {
1608         // Process each seqloc in the list
1609         CSeq_loc_mix::Tdata::iterator it;
1610         for (it = mix.Set().begin();
1611                 it != mix.Set().end(); it++)
1612         {
1613             SeqLocAdjustForInsert(**it, insert_from, insert_to, seqid);
1614         }
1615     }
1616 }
1617 
1618 
SeqLocAdjustForInsert(CSeq_point & pnt,TSeqPos insert_from,TSeqPos insert_to,const CSeq_id * seqid)1619 void SeqLocAdjustForInsert(CSeq_point& pnt,
1620                          TSeqPos insert_from, TSeqPos insert_to,
1621                          const CSeq_id* seqid)
1622 {
1623     if (!OkToAdjustLoc(pnt, seqid)) {
1624         return;
1625     }
1626     if (!pnt.IsSetPoint()) {
1627         return;
1628     }
1629 
1630     if (insert_from < pnt.GetPoint()) {
1631         auto diff = insert_to - insert_from + 1;
1632         pnt.SetPoint(pnt.GetPoint() + diff);
1633     }
1634 }
1635 
1636 
SeqLocAdjustForInsert(CPacked_seqpnt & packpnt,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1637 void SeqLocAdjustForInsert(CPacked_seqpnt& packpnt,
1638                 TSeqPos from, TSeqPos to,
1639                 const CSeq_id* seqid)
1640 {
1641     if (!OkToAdjustLoc(packpnt, seqid)) {
1642         return;
1643     }
1644 
1645     auto it = packpnt.SetPoints().begin();
1646     while (it != packpnt.SetPoints().end()) {
1647         if (from < *it) {
1648             auto diff = to - from + 1;
1649             *it += diff;
1650         }
1651         it++;
1652     }
1653 }
1654 
1655 
SeqLocAdjustForInsert(CSeq_bond & bond,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1656 void SeqLocAdjustForInsert(CSeq_bond& bond,
1657                 TSeqPos from, TSeqPos to,
1658                 const CSeq_id* seqid)
1659 {
1660     if (bond.IsSetA()) {
1661         SeqLocAdjustForInsert(bond.SetA(), from, to, seqid);
1662     }
1663 
1664     if (bond.IsSetB()) {
1665         SeqLocAdjustForInsert(bond.SetB(), from, to, seqid);
1666     }
1667 }
1668 
1669 
SeqLocAdjustForInsert(CSeq_loc_equiv & equiv,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1670 void SeqLocAdjustForInsert(CSeq_loc_equiv& equiv,
1671                 TSeqPos from, TSeqPos to,
1672                 const CSeq_id* seqid)
1673 {
1674     CSeq_loc_equiv::Tdata::iterator it = equiv.Set().begin();
1675     while (it != equiv.Set().end()) {
1676         SeqLocAdjustForInsert(**it, from, to, seqid);
1677         it++;
1678     }
1679 }
1680 
1681 
SeqLocAdjustForInsert(CSeq_loc & loc,TSeqPos from,TSeqPos to,const CSeq_id * seqid)1682 void SeqLocAdjustForInsert(CSeq_loc& loc,
1683                 TSeqPos from, TSeqPos to,
1684                 const CSeq_id* seqid)
1685 {
1686     // Given a seqloc and a range, insert into the seqloc
1687 
1688     switch(loc.Which())
1689     {
1690         // Single interval
1691         case CSeq_loc::e_Int:
1692             SeqLocAdjustForInsert(loc.SetInt(), from, to, seqid);
1693             break;
1694 
1695         // Multiple intervals
1696         case CSeq_loc::e_Packed_int:
1697             SeqLocAdjustForInsert(loc.SetPacked_int(), from, to, seqid);
1698             break;
1699 
1700         // Multiple seqlocs
1701         case CSeq_loc::e_Mix:
1702             SeqLocAdjustForInsert(loc.SetMix(), from, to, seqid);
1703             break;
1704         case CSeq_loc::e_Pnt:
1705             SeqLocAdjustForInsert(loc.SetPnt(), from, to, seqid);
1706             break;
1707 
1708         case CSeq_loc::e_Packed_pnt:
1709             SeqLocAdjustForInsert(loc.SetPacked_pnt(), from, to, seqid);
1710             break;
1711         case CSeq_loc::e_Bond:
1712             SeqLocAdjustForInsert(loc.SetBond(), from, to, seqid);
1713             break;
1714         case CSeq_loc::e_Equiv:
1715             SeqLocAdjustForInsert(loc.SetEquiv(), from, to, seqid);
1716             break;
1717         case CSeq_loc::e_Empty:
1718         case CSeq_loc::e_Null:
1719         case CSeq_loc::e_not_set:
1720         case CSeq_loc::e_Whole:
1721         case CSeq_loc::e_Feat:
1722             // no adjustment needeed
1723             break;
1724     }
1725 }
1726 
1727 
SplitLocationForGap(CSeq_interval & before,TSeqPos start,TSeqPos stop,const CSeq_id * seqid,bool & cut,unsigned int options)1728 CRef<CSeq_interval> SplitLocationForGap(CSeq_interval& before,
1729     TSeqPos start, TSeqPos stop,
1730     const CSeq_id* seqid, bool& cut,
1731     unsigned int options)
1732 {
1733     cut = false;
1734     if (!OkToAdjustLoc(before, seqid)) {
1735         return CRef<CSeq_interval>(NULL);
1736     }
1737     // These are required fields
1738     if (!(before.CanGetFrom() && before.CanGetTo()))
1739     {
1740         return CRef<CSeq_interval>(NULL);
1741     }
1742 
1743     // Feature location
1744     TSeqPos feat_from = before.GetFrom();
1745     TSeqPos feat_to = before.GetTo();
1746     CRef<CSeq_interval> after(NULL);
1747     if (feat_to < start) {
1748         // gap completely after location
1749         return after;
1750     }
1751 
1752     if (feat_from > stop && !(options & eSplitLocOption_split_in_intron)) {
1753         // if gap completely before location, but not splitting in introns,
1754         // no change
1755         return after;
1756     }
1757 
1758     if (feat_from < start && feat_to > stop) {
1759         // gap entirely in inteval
1760         if (!(options & eSplitLocOption_split_in_exon)) {
1761             return after;
1762         }
1763     }
1764 
1765     if (feat_to > stop) {
1766         after.Reset(new CSeq_interval());
1767         after->Assign(before);
1768         if (stop + 1 > feat_from) {
1769             after->SetFrom(stop + 1);
1770             if (options & eSplitLocOption_make_partial) {
1771                 after->SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
1772             }
1773         }
1774     }
1775     if (feat_from < start) {
1776         before.SetTo(start - 1);
1777         if (options & eSplitLocOption_make_partial) {
1778             before.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
1779         }
1780     } else {
1781         cut = true;
1782     }
1783     return after;
1784 }
1785 
1786 
SplitLocationForGap(CSeq_loc::TPacked_int & before_intervals,CSeq_loc::TPacked_int & after_intervals,TSeqPos start,TSeqPos stop,const CSeq_id * seqid,unsigned int options)1787 void SplitLocationForGap(CSeq_loc::TPacked_int& before_intervals,
1788                          CSeq_loc::TPacked_int& after_intervals,
1789                          TSeqPos start, TSeqPos stop,
1790                          const CSeq_id* seqid, unsigned int options)
1791 {
1792     if (before_intervals.IsSet()) {
1793         if (before_intervals.IsReverseStrand()) {
1794             reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1795         }
1796 
1797         auto it = before_intervals.Set().begin();
1798         while (it != before_intervals.Set().end()) {
1799             CSeq_interval& sub_interval = **it;
1800 
1801             TSeqPos int_from = sub_interval.GetFrom();
1802             if (int_from > stop && after_intervals.IsSet() && !after_intervals.Get().empty()) {
1803                 after_intervals.Set().push_back(Ref(&sub_interval));
1804                 it = before_intervals.Set().erase(it);
1805             }
1806             else {
1807                 bool cut = false;
1808                 CRef<CSeq_interval> after = SplitLocationForGap(sub_interval, start, stop, seqid, cut, options);
1809 
1810                 // Should interval be deleted from list?
1811                 if (cut) {
1812                     it = before_intervals.Set().erase(it);
1813                 }
1814                 else {
1815                     ++it;
1816                 }
1817                 if (after) {
1818                     after_intervals.Set().push_back(after);
1819                 }
1820             }
1821         }
1822 
1823         if (after_intervals.IsReverseStrand()) {
1824             reverse(after_intervals.Set().begin(), after_intervals.Set().end());
1825         }
1826         if (before_intervals.IsReverseStrand()) {
1827             reverse(before_intervals.Set().begin(), before_intervals.Set().end());
1828         }
1829     }
1830 }
1831 
1832 
SplitLocationForGap(CSeq_loc & loc1,CSeq_loc & loc2,size_t start,size_t stop,const CSeq_id * seqid,unsigned int options)1833 void SplitLocationForGap(CSeq_loc& loc1, CSeq_loc& loc2,
1834                          size_t start, size_t stop,
1835                          const CSeq_id* seqid, unsigned int options)
1836 {
1837     // Given a seqloc and a range, place the portion of the location before the range
1838     // into loc1 and the remainder of the location into loc2
1839 
1840     switch(loc1.Which())
1841     {
1842         // Single interval
1843         case CSeq_loc::e_Int:
1844             {{
1845                 bool cut = false;
1846                 CRef<CSeq_interval> after = SplitLocationForGap(loc1.SetInt(),
1847                     static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1848                     seqid, cut, options);
1849                 if (cut) {
1850                     loc1.Reset();
1851                 }
1852                 if (after) {
1853                     if (loc2.Which() == CSeq_loc::e_not_set) {
1854                         loc2.SetInt(*after);
1855                     } else {
1856                         CRef<CSeq_loc> add(new CSeq_loc());
1857                         add->SetInt(*after);
1858                         loc2.Add(*add);
1859                     }
1860                 }
1861             }}
1862             break;
1863         // Single point
1864         case CSeq_loc::e_Pnt:
1865             if (OkToAdjustLoc(loc1.GetPnt(), seqid)) {
1866                 if (stop < loc1.GetPnt().GetPoint()) {
1867                     if (loc2.Which() == CSeq_loc::e_not_set) {
1868                         loc2.SetPnt().Assign(loc1.GetPnt());
1869                     } else {
1870                         loc2.Add(loc1);
1871                     }
1872                     loc1.Reset();
1873                 }
1874             }
1875             break;
1876 
1877         // Multiple intervals
1878         case CSeq_loc::e_Packed_int:
1879             {{
1880                 CSeq_loc::TPacked_int& before_intervals = loc1.SetPacked_int();
1881                 CRef<CSeq_loc::TPacked_int> after_intervals(new CSeq_loc::TPacked_int);
1882                 SplitLocationForGap(before_intervals, *after_intervals,
1883                     static_cast<TSeqPos>(start), static_cast<TSeqPos>(stop),
1884                     seqid, options);
1885 
1886                 if (before_intervals.Set().empty()) {
1887                     loc1.Reset();
1888                 }
1889                 if (!after_intervals->Set().empty()) {
1890                     if (loc2.Which() == CSeq_loc::e_not_set) {
1891                         loc2.SetPacked_int().Assign(*after_intervals);
1892                     } else {
1893                         CRef<CSeq_loc> add(new CSeq_loc());
1894                         add->SetPacked_int().Assign(*after_intervals);
1895                         loc2.Add(*add);
1896                     }
1897                 }
1898             }}
1899             break;
1900 
1901         // Multiple seqlocs
1902         case CSeq_loc::e_Mix:
1903         {{
1904             CSeq_loc_mix& before_mix = loc1.SetMix();
1905             CRef<CSeq_loc_mix> after_mix(new CSeq_loc_mix);
1906             if (before_mix.IsSet()) {
1907                 if (before_mix.IsReverseStrand()) {
1908                     reverse(before_mix.Set().begin(), before_mix.Set().end());
1909                 }
1910                 auto it = before_mix.Set().begin();
1911                 while (it != before_mix.Set().end()) {
1912                     CSeq_loc& sub_loc = **it;
1913 
1914                     TSeqPos from = sub_loc.GetStart(eExtreme_Positional);
1915                     if (from > stop && after_mix->IsSet() && !after_mix->Get().empty()) {
1916                         after_mix->Set().push_back(Ref(&sub_loc));
1917                         it = before_mix.Set().erase(it);
1918                     }
1919                     else {
1920                         CRef<CSeq_loc> after(new CSeq_loc());
1921                         SplitLocationForGap(**it, *after, start, stop, seqid, options);
1922                         // Should seqloc be deleted from list?
1923                         if ((*it)->Which() == CSeq_loc::e_not_set) {
1924                             it = before_mix.Set().erase(it);
1925                         }
1926                         else {
1927                             ++it;
1928                         }
1929                         if (after->Which() != CSeq_loc::e_not_set) {
1930                             after_mix->Set().push_back(after);
1931                         }
1932                     }
1933                 }
1934 
1935                 if (after_mix->IsReverseStrand()) {
1936                     reverse(after_mix->Set().begin(), after_mix->Set().end());
1937                 }
1938                 if (before_mix.IsReverseStrand()) {
1939                     reverse(before_mix.Set().begin(), before_mix.Set().end());
1940                 }
1941 
1942                 // Update the original list
1943                 if (before_mix.Set().empty()) {
1944                     loc1.Reset();
1945                 }
1946                 if (!after_mix->Set().empty()) {
1947                     if (loc2.Which() == CSeq_loc::e_not_set) {
1948                         loc2.SetMix().Assign(*after_mix);
1949                     }
1950                     else {
1951                         CRef<CSeq_loc> add(new CSeq_loc());
1952                         add->SetMix().Assign(*after_mix);
1953                         loc2.Add(*add);
1954                     }
1955                 }
1956             }
1957         }}
1958             break;
1959         case CSeq_loc::e_Equiv:
1960              {{
1961                 CSeq_loc_equiv& before_equiv = loc1.SetEquiv();
1962                 CRef<CSeq_loc_equiv> after_equiv(new CSeq_loc_equiv);
1963                  if (before_equiv.IsSet()) {
1964                     // Process each seqloc in the list
1965                     CSeq_loc_equiv::Tdata::iterator it;
1966                     for (it = before_equiv.Set().begin();
1967                          it != before_equiv.Set().end(); )
1968                     {
1969                         CRef<CSeq_loc> after(new CSeq_loc());
1970                         SplitLocationForGap(**it, *after, start, stop, seqid, options);
1971                         // Should seqloc be deleted from list?
1972                         if ((*it)->Which() == CSeq_loc::e_not_set) {
1973                             it = before_equiv.Set().erase(it);
1974                         }
1975                         else {
1976                             ++it;
1977                         }
1978                         if (after->Which() != CSeq_loc::e_not_set) {
1979                             after_equiv->Set().push_back(after);
1980                         }
1981                     }
1982 
1983                     // Update the original list
1984                     if (before_equiv.Set().empty()) {
1985                         loc1.Reset();
1986                     }
1987                     if (!after_equiv->Set().empty()) {
1988                         if (loc2.Which() == CSeq_loc::e_not_set) {
1989                             loc2.SetMix().Assign(*after_equiv);
1990                         } else {
1991                             CRef<CSeq_loc> add(new CSeq_loc());
1992                             add->SetMix().Assign(*after_equiv);
1993                             loc2.Add(*add);
1994                         }
1995                     }
1996                 }
1997             }}
1998             break;
1999         case CSeq_loc::e_Packed_pnt:
2000             if (OkToAdjustLoc(loc1.GetPacked_pnt(), seqid)) {
2001                 CPacked_seqpnt::TPoints& before_points = loc1.SetPacked_pnt().SetPoints();
2002                 CPacked_seqpnt::TPoints after_points;
2003                 CPacked_seqpnt::TPoints::iterator it = loc1.SetPacked_pnt().SetPoints().begin();
2004                 while (it != loc1.SetPacked_pnt().SetPoints().end()) {
2005                     if (stop < *it) {
2006                         after_points.push_back(*it);
2007                     }
2008                     if (start >= *it) {
2009                         it = before_points.erase(it);
2010                     } else {
2011                         it++;
2012                     }
2013                 }
2014                 if (!after_points.empty()) {
2015                     CRef<CPacked_seqpnt> after(new CPacked_seqpnt());
2016                     after->Assign(loc1.GetPacked_pnt());
2017                     after->SetPoints().assign(after_points.begin(), after_points.end());
2018                     if (loc2.Which() == CSeq_loc::e_not_set) {
2019                         loc2.SetPacked_pnt().Assign(*after);
2020                     } else {
2021                         CRef<CSeq_loc> add(new CSeq_loc());
2022                         add->SetPacked_pnt().Assign(*after);
2023                         loc2.Add(*add);
2024                     }
2025                 }
2026                 if (before_points.empty()) {
2027                     loc1.Reset();
2028                 }
2029             }
2030             break;
2031         case CSeq_loc::e_Empty:
2032         case CSeq_loc::e_Null:
2033         case CSeq_loc::e_not_set:
2034         case CSeq_loc::e_Whole:
2035         case CSeq_loc::e_Feat:
2036         case CSeq_loc::e_Bond:
2037             // no adjustment needeed
2038             break;
2039     }
2040     NormalizeLoc(loc1);
2041     NormalizeLoc(loc2);
2042 }
2043 
2044 
CdregionAdjustForTrim(CCdregion & cdr,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2045 void CdregionAdjustForTrim(CCdregion& cdr,
2046                             TSeqPos from, TSeqPos to,
2047                             const CSeq_id* seqid)
2048 {
2049     CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2050     while (it != cdr.SetCode_break().end()) {
2051         if ((*it)->IsSetLoc()) {
2052             bool cut = false;
2053             bool adjusted = false;
2054             TSeqPos trim5 = 0;
2055             SeqLocAdjustForTrim((*it)->SetLoc(), from, to, seqid, cut, trim5, adjusted);
2056             if (cut) {
2057                 it = cdr.SetCode_break().erase(it);
2058             } else {
2059                 it++;
2060             }
2061         } else {
2062             it++;
2063         }
2064     }
2065     if (cdr.GetCode_break().empty()) {
2066         cdr.ResetCode_break();
2067     }
2068 }
2069 
2070 
TrnaAdjustForTrim(CTrna_ext & trna,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2071 void TrnaAdjustForTrim(CTrna_ext& trna,
2072                         TSeqPos from, TSeqPos to,
2073                         const CSeq_id* seqid)
2074 {
2075     if (trna.IsSetAnticodon()) {
2076         bool cut = false;
2077         bool trimmed = false;
2078         TSeqPos trim5 = 0;
2079         SeqLocAdjustForTrim(trna.SetAnticodon(), from, to, seqid, cut, trim5, trimmed);
2080         if (cut) {
2081             trna.ResetAnticodon();
2082         }
2083     }
2084 }
2085 
2086 
FeatureAdjustForTrim(CSeq_feat & feat,TSeqPos from,TSeqPos to,const CSeq_id * seqid,bool & bCompleteCut,bool & bTrimmed)2087 void FeatureAdjustForTrim(CSeq_feat& feat,
2088                             TSeqPos from, TSeqPos to,
2089                             const CSeq_id* seqid,
2090                             bool& bCompleteCut,
2091                             bool& bTrimmed)
2092 {
2093     TSeqPos trim5 = 0;
2094     SeqLocAdjustForTrim (feat.SetLocation(), from, to, seqid, bCompleteCut, trim5, bTrimmed);
2095     if (bCompleteCut) {
2096         return;
2097     }
2098 
2099     if (feat.IsSetData()) {
2100         switch (feat.GetData().GetSubtype()) {
2101             case CSeqFeatData::eSubtype_cdregion:
2102                 CdregionAdjustForTrim(feat.SetData().SetCdregion(), from, to, seqid);
2103                 break;
2104             case CSeqFeatData::eSubtype_tRNA:
2105                 TrnaAdjustForTrim(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2106                 break;
2107             default:
2108                 break;
2109         }
2110     }
2111 }
2112 
2113 
CdregionAdjustForInsert(CCdregion & cdr,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2114 void CdregionAdjustForInsert(CCdregion& cdr,
2115                             TSeqPos from, TSeqPos to,
2116                             const CSeq_id* seqid)
2117 {
2118     CCdregion::TCode_break::iterator it = cdr.SetCode_break().begin();
2119     while (it != cdr.SetCode_break().end()) {
2120         if ((*it)->IsSetLoc()) {
2121             SeqLocAdjustForInsert((*it)->SetLoc(), from, to, seqid);
2122         }
2123         it++;
2124     }
2125     if (cdr.GetCode_break().empty()) {
2126         cdr.ResetCode_break();
2127     }
2128 }
2129 
2130 
TrnaAdjustForInsert(CTrna_ext & trna,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2131 void TrnaAdjustForInsert(CTrna_ext& trna,
2132                         TSeqPos from, TSeqPos to,
2133                         const CSeq_id* seqid)
2134 {
2135     if (trna.IsSetAnticodon()) {
2136         SeqLocAdjustForInsert(trna.SetAnticodon(), from, to, seqid);
2137     }
2138 }
2139 
2140 
FeatureAdjustForInsert(CSeq_feat & feat,TSeqPos from,TSeqPos to,const CSeq_id * seqid)2141 void FeatureAdjustForInsert(CSeq_feat& feat,
2142                             TSeqPos from, TSeqPos to,
2143                             const CSeq_id* seqid)
2144 {
2145     SeqLocAdjustForInsert (feat.SetLocation(), from, to, seqid);
2146 
2147     if (feat.IsSetData()) {
2148         switch (feat.GetData().GetSubtype()) {
2149             case CSeqFeatData::eSubtype_cdregion:
2150                 CdregionAdjustForInsert(feat.SetData().SetCdregion(), from, to, seqid);
2151                 break;
2152             case CSeqFeatData::eSubtype_tRNA:
2153                 TrnaAdjustForInsert(feat.SetData().SetRna().SetExt().SetTRNA(), from, to, seqid);
2154                 break;
2155             default:
2156                 break;
2157         }
2158     }
2159 }
2160 
2161 
s_PPntComparePlus(const TSeqPos & p1,const TSeqPos & p2)2162 bool s_PPntComparePlus(const TSeqPos& p1, const TSeqPos& p2)
2163 {
2164     return (p1 < p2);
2165 }
2166 
2167 
s_PPntCompareMinus(const TSeqPos & p1,const TSeqPos & p2)2168 bool s_PPntCompareMinus(const TSeqPos& p1, const TSeqPos& p2)
2169 {
2170     return (p1 > p2);
2171 }
2172 
2173 
CorrectIntervalOrder(CPacked_seqpnt & ppnt)2174 bool CorrectIntervalOrder(CPacked_seqpnt& ppnt)
2175 {
2176     bool rval = false;
2177     if (!ppnt.IsSetPoints()) {
2178         // nothing to do
2179     } else if (!ppnt.IsSetStrand() || ppnt.GetStrand() == eNa_strand_plus || ppnt.GetStrand() == eNa_strand_unknown) {
2180         if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntComparePlus)) {
2181             stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntComparePlus);
2182             rval = true;
2183         }
2184     } else if (ppnt.IsSetStrand() && ppnt.GetStrand() == eNa_strand_minus) {
2185         if (!seq_mac_is_sorted(ppnt.GetPoints().begin(), ppnt.GetPoints().end(), s_PPntCompareMinus)) {
2186             stable_sort(ppnt.SetPoints().begin(), ppnt.SetPoints().end(), s_PPntCompareMinus);
2187             rval = true;
2188         }
2189     }
2190     return rval;
2191 }
2192 
2193 
s_StrandsConsistent(const CSeq_interval & a,const CSeq_interval & b)2194 bool s_StrandsConsistent(const CSeq_interval& a, const CSeq_interval& b)
2195 {
2196     if (a.IsSetStrand() && a.GetStrand() == eNa_strand_minus) {
2197         if (!b.IsSetStrand() || b.GetStrand() != eNa_strand_minus) {
2198             return false;
2199         } else {
2200             return true;
2201         }
2202     } else if (b.IsSetStrand() && b.GetStrand() == eNa_strand_minus) {
2203         return false;
2204     } else {
2205         return true;
2206     }
2207 }
2208 
2209 
CorrectIntervalOrder(CPacked_seqint & pint)2210 bool CorrectIntervalOrder(CPacked_seqint& pint)
2211 {
2212     if (pint.Get().size() < 2) {
2213         return false;
2214     }
2215     bool any_change = false;
2216 
2217     bool this_change = true;
2218     while (this_change) {
2219         this_change = false;
2220 
2221         // can only swap elements if they have the same strand and Seq-id
2222         CPacked_seqint::Tdata::iterator a = pint.Set().begin();
2223         CPacked_seqint::Tdata::iterator b = a;
2224         b++;
2225         while (b != pint.Set().end()) {
2226             if ((*a)->IsSetId() && (*b)->IsSetId() &&
2227                 (*a)->GetId().Equals((*b)->GetId()) &&
2228                 (*a)->IsSetFrom() && (*a)->IsSetTo() && (*a)->GetFrom() < (*a)->GetTo() &&
2229                 (*b)->IsSetFrom() && (*b)->IsSetTo() && (*b)->GetFrom() < (*b)->GetTo() &&
2230                 s_StrandsConsistent(**a, **b)) {
2231                 if ((*a)->IsSetStrand() && (*a)->GetStrand() == eNa_strand_minus) {
2232                     if ((*b)->GetTo() > (*a)->GetFrom()) {
2233                         CRef<CSeq_interval> swp(a->GetPointer());
2234                         a->Reset(b->GetPointer());
2235                         b->Reset(swp.GetPointer());
2236                         this_change = true;
2237                         any_change = true;
2238                     }
2239                 } else {
2240                     if ((*b)->GetTo() < (*a)->GetFrom()) {
2241                         CRef<CSeq_interval> swp(a->GetPointer());
2242                         a->Reset(b->GetPointer());
2243                         b->Reset(swp.GetPointer());
2244                         this_change = true;
2245                         any_change = true;
2246                     }
2247                 }
2248             }
2249             ++a;
2250             ++b;
2251         }
2252     }
2253     return any_change;
2254 }
2255 
2256 
OneIdOneStrand(const CSeq_loc & loc,const CSeq_id ** id,ENa_strand & strand)2257 bool OneIdOneStrand(const CSeq_loc& loc, const CSeq_id** id, ENa_strand& strand)
2258 {
2259     try {
2260         CSeq_loc_CI li(loc);
2261         *id = &(li.GetSeq_id());
2262         strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2263         if (strand == eNa_strand_unknown) {
2264             strand = eNa_strand_plus;
2265         }
2266         if (strand != eNa_strand_plus && strand != eNa_strand_minus) {
2267             return false;
2268         }
2269         ++li;
2270         while (li) {
2271             if (!li.GetSeq_id().Equals(**id)) {
2272                 return false;
2273             }
2274             ENa_strand this_strand = li.IsSetStrand() ? li.GetStrand() : eNa_strand_plus;
2275             if (this_strand == eNa_strand_unknown) {
2276                 this_strand = eNa_strand_plus;
2277             }
2278             if (this_strand != strand) {
2279                 return false;
2280             }
2281             ++li;
2282         }
2283         return true;
2284     } catch (CException&) {
2285         return false;
2286     }
2287 }
2288 
2289 
CorrectIntervalOrder(CSeq_loc::TMix::Tdata & mix)2290 bool CorrectIntervalOrder(CSeq_loc::TMix::Tdata& mix)
2291 {
2292     bool any_change = false;
2293     NON_CONST_ITERATE(CSeq_loc::TMix::Tdata, it, mix) {
2294         any_change |= CorrectIntervalOrder(**it);
2295     }
2296     if (mix.size() < 2) {
2297         return any_change;
2298     }
2299     bool this_change = true;
2300     while (this_change) {
2301         this_change = false;
2302 
2303         // can only swap elements if they have the same strand and Seq-id
2304         CSeq_loc::TMix::Tdata::iterator a = mix.begin();
2305         CSeq_loc::TMix::Tdata::iterator b = a;
2306         b++;
2307         while (b != mix.end()) {
2308             try {
2309                 const CSeq_id* a_id;
2310                 const CSeq_id* b_id;
2311                 ENa_strand a_strand;
2312                 ENa_strand b_strand;
2313                 if (OneIdOneStrand(**a, &a_id, a_strand) &&
2314                     OneIdOneStrand(**b, &b_id, b_strand) &&
2315                     a_id->Equals(*b_id) &&
2316                     a_strand == b_strand) {
2317                     if (a_strand == eNa_strand_plus) {
2318                         if ((*a)->GetStart(eExtreme_Biological) > (*b)->GetStop(eExtreme_Biological)) {
2319                             CRef<CSeq_loc> swp(a->GetPointer());
2320                             a->Reset(b->GetPointer());
2321                             b->Reset(swp.GetPointer());
2322                             this_change = true;
2323                             any_change = true;
2324                         }
2325                     } else if (a_strand == eNa_strand_minus) {
2326                         if ((*a)->GetStart(eExtreme_Biological) < (*b)->GetStop(eExtreme_Biological)) {
2327                             CRef<CSeq_loc> swp(a->GetPointer());
2328                             a->Reset(b->GetPointer());
2329                             b->Reset(swp.GetPointer());
2330                             this_change = true;
2331                             any_change = true;
2332                         }
2333                     }
2334                 }
2335             } catch (CException&) {
2336                 // not just one id
2337             }
2338             ++a;
2339             ++b;
2340         }
2341     }
2342     return any_change;
2343 }
2344 
2345 
CorrectIntervalOrder(CSeq_loc & loc)2346 bool CorrectIntervalOrder(CSeq_loc& loc)
2347 {
2348     bool any_change = false;
2349     switch (loc.Which()) {
2350         case CSeq_loc::e_Bond:
2351         case CSeq_loc::e_Empty:
2352         case CSeq_loc::e_Equiv:
2353         case CSeq_loc::e_Feat:
2354         case CSeq_loc::e_Int:
2355         case CSeq_loc::e_not_set:
2356         case CSeq_loc::e_Null:
2357         case CSeq_loc::e_Pnt:
2358         case CSeq_loc::e_Whole:
2359             // nothing to do
2360             break;
2361         case CSeq_loc::e_Mix:
2362             any_change = CorrectIntervalOrder(loc.SetMix().Set());
2363             break;
2364         case CSeq_loc::e_Packed_int:
2365             any_change = CorrectIntervalOrder(loc.SetPacked_int());
2366             break;
2367         case CSeq_loc::e_Packed_pnt:
2368             any_change = CorrectIntervalOrder(loc.SetPacked_pnt());
2369             break;
2370     }
2371     return any_change;
2372 }
2373 
2374 
IsExtendableLeft(TSeqPos left,const CBioseq & seq,CScope * scope,TSeqPos & extend_len)2375 bool IsExtendableLeft(TSeqPos left, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2376 {
2377     bool rval = false;
2378     if (left < 3) {
2379         rval = true;
2380         extend_len = left;
2381     } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2382                seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2383                seq.GetInst().IsSetExt() &&
2384                seq.GetInst().GetExt().IsDelta()) {
2385         TSeqPos offset = 0;
2386         TSeqPos last_gap_stop = 0;
2387         ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2388             if ((*it)->IsLiteral()) {
2389                 offset += (*it)->GetLiteral().GetLength();
2390                 if (!(*it)->GetLiteral().IsSetSeq_data()) {
2391                     last_gap_stop = offset;
2392                 } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2393                     last_gap_stop = offset;
2394                 }
2395             } else if ((*it)->IsLoc()) {
2396                 offset += sequence::GetLength((*it)->GetLoc(), scope);
2397             }
2398             if (offset > left) {
2399                 break;
2400             }
2401         }
2402         if (left >= last_gap_stop && left - last_gap_stop <= 3) {
2403             rval = true;
2404             extend_len = left - last_gap_stop;
2405         }
2406     }
2407     return rval;
2408 }
2409 
2410 
IsExtendableRight(TSeqPos right,const CBioseq & seq,CScope * scope,TSeqPos & extend_len)2411 bool IsExtendableRight(TSeqPos right, const CBioseq& seq, CScope* scope, TSeqPos& extend_len)
2412 {
2413     bool rval = false;
2414     if (right > seq.GetLength() - 5) {
2415         rval = true;
2416         extend_len = seq.GetLength() - right - 1;
2417     } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
2418         seq.GetInst().GetRepr() == CSeq_inst::eRepr_delta &&
2419         seq.GetInst().IsSetExt() &&
2420         seq.GetInst().GetExt().IsDelta()) {
2421         TSeqPos offset = 0;
2422         TSeqPos next_gap_start = 0;
2423         ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
2424             if ((*it)->IsLiteral()) {
2425                 if (!(*it)->GetLiteral().IsSetSeq_data()) {
2426                     next_gap_start = offset;
2427                 } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
2428                     next_gap_start = offset;
2429                 }
2430                 offset += (*it)->GetLiteral().GetLength();
2431             } else if ((*it)->IsLoc()) {
2432                 offset += sequence::GetLength((*it)->GetLoc(), scope);
2433             }
2434             if (offset > right + 4) {
2435                 break;
2436             }
2437         }
2438         if (next_gap_start > right && next_gap_start - right - 1 <= 3) {
2439             rval = true;
2440             extend_len = next_gap_start - right - 1;
2441         }
2442     }
2443     return rval;
2444 }
2445 
2446 
AdjustFeatureEnd5(CSeq_feat & cds,vector<CRef<CSeq_feat>> related_features,CScope & scope)2447 bool AdjustFeatureEnd5(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2448 {
2449     if (!cds.GetLocation().IsPartialStart(eExtreme_Biological)) {
2450         return false;
2451     }
2452 
2453     bool rval = false;
2454 
2455     CSeq_loc_CI first_l(cds.GetLocation());
2456     CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2457     CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2458 
2459     TSeqPos start = cds.GetLocation().GetStart(eExtreme_Biological);
2460     TSeqPos new_start = start;
2461     TSeqPos extend_len = 0;
2462     bool extendable = false;
2463 
2464     if (!first_l.IsSetStrand() || first_l.GetStrand() != eNa_strand_minus) {
2465         // positive strand
2466         if (start > 0) {
2467             extendable = IsExtendableLeft(start, *seq, &scope, extend_len);
2468             new_start = start - extend_len;
2469         }
2470     } else {
2471         if (start < seq->GetInst().GetLength() - 1) {
2472             extendable = IsExtendableRight(start, *seq, &scope, extend_len);
2473             new_start = start + extend_len;
2474         }
2475     }
2476     if (extendable) {
2477         CRef<CSeq_loc> cds_loc = SeqLocExtend5(cds.GetLocation(), new_start, &scope);
2478         if (cds_loc) {
2479             for (auto it : related_features) {
2480                 if (it->GetLocation().GetStart(eExtreme_Biological) == start) {
2481                     CRef<CSeq_loc> related_loc = SeqLocExtend5(it->GetLocation(), new_start, &scope);
2482                     if (related_loc) {
2483                         it->SetLocation().Assign(*related_loc);
2484                         if (it->IsSetData() && it->GetData().IsCdregion()) {
2485                             AdjustFrameFor5Extension(cds, extend_len);
2486                         }
2487                     }
2488                 }
2489             }
2490             cds.SetLocation().Assign(*cds_loc);
2491             if (cds.IsSetData() && cds.GetData().IsCdregion()) {
2492                 AdjustFrameFor5Extension(cds, extend_len);
2493             }
2494 
2495             rval = true;
2496         }
2497     }
2498 
2499     return rval;
2500 }
2501 
2502 
AdjustFeatureEnd3(CSeq_feat & cds,vector<CRef<CSeq_feat>> related_features,CScope & scope)2503 bool AdjustFeatureEnd3(CSeq_feat& cds, vector<CRef<CSeq_feat> > related_features, CScope& scope)
2504 {
2505     if (!cds.GetLocation().IsPartialStop(eExtreme_Biological)) {
2506         return false;
2507     }
2508 
2509     bool rval = false;
2510 
2511     CSeq_loc_CI last_l(cds.GetLocation());
2512     size_t num_intervals = last_l.GetSize();
2513     last_l.SetPos(num_intervals - 1);
2514 
2515     CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2516     CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2517 
2518     TSeqPos stop = cds.GetLocation().GetStop(eExtreme_Biological);
2519     TSeqPos new_stop = stop;
2520     TSeqPos extend_len = 0;
2521     bool extendable = false;
2522 
2523     if (!last_l.IsSetStrand() || last_l.GetStrand() != eNa_strand_minus) {
2524         // positive strand
2525         if (stop < seq->GetInst().GetLength() - 1) {
2526             extendable = IsExtendableRight(stop, *seq, &scope, extend_len);
2527             new_stop = stop + extend_len;
2528         }
2529     } else {
2530         if (stop > 0) {
2531             extendable = IsExtendableLeft(stop, *seq, &scope, extend_len);
2532             new_stop = stop - extend_len;
2533         }
2534     }
2535     if (extendable) {
2536         CRef<CSeq_loc> cds_loc = SeqLocExtend3(cds.GetLocation(), new_stop, &scope);
2537         if (cds_loc) {
2538             for (auto it : related_features) {
2539                 if (it->GetLocation().GetStop(eExtreme_Biological) == stop) {
2540                     CRef<CSeq_loc> related_loc = SeqLocExtend3(it->GetLocation(), new_stop, &scope);
2541                     if (related_loc) {
2542                         it->SetLocation().Assign(*related_loc);
2543                     }
2544                 }
2545             }
2546             cds.SetLocation().Assign(*cds_loc);
2547             rval = true;
2548         }
2549     }
2550 
2551     return rval;
2552 }
2553 
2554 
IsExtendable(const CSeq_feat & cds,CScope & scope)2555 bool IsExtendable(const CSeq_feat& cds, CScope& scope)
2556 {
2557     if (cds.GetLocation().IsPartialStart(eExtreme_Positional)) {
2558         CSeq_loc_CI first_l(cds.GetLocation());
2559         CBioseq_Handle bsh = scope.GetBioseqHandle(first_l.GetSeq_id());
2560         CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2561         TSeqPos extend_len = 0;
2562         TSeqPos start = first_l.GetRange().GetFrom();
2563         if (IsExtendableLeft(start, *seq, &scope, extend_len) && extend_len > 0) {
2564             return true;
2565         }
2566     }
2567     if (cds.GetLocation().IsPartialStop(eExtreme_Positional)) {
2568         CSeq_loc_CI last_l(cds.GetLocation());
2569         size_t num_intervals = last_l.GetSize();
2570         last_l.SetPos(num_intervals - 1);
2571         CBioseq_Handle bsh = scope.GetBioseqHandle(last_l.GetSeq_id());
2572         CConstRef<CBioseq> seq = bsh.GetCompleteBioseq();
2573 
2574         TSeqPos stop = cds.GetLocation().GetStop(eExtreme_Positional);
2575         TSeqPos extend_len = 0;
2576 
2577         if (IsExtendableRight(stop, *seq, &scope, extend_len) && extend_len > 0) {
2578             return true;
2579         }
2580     }
2581     return false;
2582 }
2583 
2584 
ExtendPartialFeatureEnds(CBioseq_Handle bsh)2585 bool ExtendPartialFeatureEnds(CBioseq_Handle bsh)
2586 {
2587     bool any_change = false;
2588     CFeat_CI f(bsh);
2589     CRef<feature::CFeatTree> tr(new feature::CFeatTree(f));
2590     while (f) {
2591         if (f->GetData().IsCdregion()) {
2592             CMappedFeat gene = tr->GetBestGene(*f);
2593             CMappedFeat mRNA = tr->GetParent(*f, CSeqFeatData::eSubtype_mRNA);
2594             vector<CRef<CSeq_feat> > related_features;
2595             if (gene) {
2596                 CRef<CSeq_feat> new_gene(new CSeq_feat());
2597                 new_gene->Assign(*(gene.GetOriginalSeq_feat()));
2598                 related_features.push_back(new_gene);
2599             }
2600             if (mRNA) {
2601                 CRef<CSeq_feat> new_mRNA(new CSeq_feat());
2602                 new_mRNA->Assign(*(mRNA.GetOriginalSeq_feat()));
2603                 related_features.push_back(new_mRNA);
2604             }
2605             CRef<CSeq_feat> new_cds(new CSeq_feat());
2606             new_cds->Assign(*(f->GetOriginalSeq_feat()));
2607 
2608             const bool adjusted_5prime = AdjustFeatureEnd5(*new_cds, related_features, bsh.GetScope());
2609             const bool adjusted_3prime = AdjustFeatureEnd3(*new_cds, related_features, bsh.GetScope());
2610 
2611             if (adjusted_5prime || adjusted_3prime) {
2612                 feature::RetranslateCDS(*new_cds, bsh.GetScope());
2613                 CSeq_feat_EditHandle feh(*f);
2614                 feh.Replace(*new_cds);
2615                 if (gene) {
2616                     CSeq_feat_EditHandle geh(gene);
2617                     geh.Replace(*(related_features.front()));
2618                 }
2619                 if (mRNA) {
2620                     CSeq_feat_EditHandle meh(mRNA);
2621                     meh.Replace(*(related_features.back()));
2622                 }
2623                 any_change = true;
2624             }
2625         }
2626         ++f;
2627     }
2628     return any_change;
2629 }
2630 
2631 
2632 END_SCOPE(edit)
2633 END_SCOPE(objects)
2634 END_NCBI_SCOPE
2635 
2636