1 /*  $Id: objcoords.cpp 618367 2020-10-19 14:54:50Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Vlad Lebedev, Victor Joukov
27  *
28  * File Description: Library to retrieve mapped sequence coordinates of
29  *                   the feature products given a sequence location
30  *
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <misc/hgvs/objcoords.hpp>
37 
38 #include <misc/hgvs/sequtils.hpp>
39 
40 #include <util/checksum.hpp>
41 
42 #include <util/sequtil/sequtil.hpp>
43 #include <util/sequtil/sequtil_manip.hpp>
44 
45 #include <objmgr/feat_ci.hpp>
46 #include <objmgr/align_ci.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
48 #include <objmgr/seq_loc_mapper.hpp>
49 #include <objmgr/seq_vector.hpp>
50 #include <objmgr/annot_selector.hpp>
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/util/feature.hpp>
53 #include <objects/general/Object_id.hpp>
54 #include <objects/general/User_object.hpp>
55 #include <objects/seq/Seqdesc.hpp>
56 #include <objects/seq/seq_id_handle.hpp>
57 #include <objects/seqloc/Seq_id.hpp>
58 #include <objects/seqloc/Seq_point.hpp>
59 #include <objects/seqfeat/Cdregion.hpp>
60 #include <objects/seqblock/GB_block.hpp>
61 #include <objects/seqalign/Spliced_seg.hpp>
62 #include <objects/seqalign/Spliced_exon_chunk.hpp>
63 
64 #include <math.h>
65 
66 
67 USING_SCOPE(ncbi);
68 USING_SCOPE(objects);
69 
70 
71 // size of sequence frame
72 static const TSeqPos kSeqFrame = 10;
73 
74 // Service functions
75 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement=false);
76 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment=0);
77 
78 static SAnnotSelector x_GetAnnotSelector();
79 
80 
81 /////////////////////////////////////////////////////////////////////////////
82 //  CReportEntry
83 //  Container for all information related to one logical entry - mRNA/CDS
84 //  combination or component.
85 
86 class CReportEntry : public CObject
87 {
88 public:
89     CReportEntry(CScope& scope, const CSeq_id& seq_id,
90         feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
91     CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
92         feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
93     CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align);
CReportEntry()94     CReportEntry() : m_cdr_length(0), m_cds_offset_set(false) {}
95     void GetCoordinates(CScope& scope,
96                         Uint4 type_flags,
97                         TSeqPos pos,
98                         CHGVS_Coordinate_Set& coords) const;
IsValid()99     bool IsValid() { return m_mrna || m_cds; }
100 
101     // Specific to feature/alignment based mRNA/CDS group
102     void SetGene(CScope& scope, const CSeq_feat& feat);
103     void SetMrna(CScope& scope, const CSeq_feat& feat);
104     void SetCds(CScope& scope, const CSeq_feat& feat);
105     void SetAlignment(CScope& scope, const CSeq_align& align);
106     // ??? is it specific, or can be made generic
GetTranscriptId()107     CConstRef<CSeq_id> GetTranscriptId() { return m_transcript_id; }
108 
109 private:
110     CConstRef<CSeq_feat>  m_mrna; /// mRNA feature on main sequence (genomic or transcript)
111     CConstRef<CSeq_feat>  m_cds;  /// CDS feature on main sequence
112     CConstRef<CSeq_feat>  m_rna_cds; /// CDS feature on transcript, where main is genomic
113     TSeqPos               m_cdr_length;
114     bool                  m_cds_offset_set;
115     TSeqPos               m_cds_offset;
116     bool                  m_mrna_plus_strand;
117     CConstRef<CSeq_align> m_mrna_align;
118     CRef<CSeq_loc_Mapper> m_mrna_mapper;
119     // Some features occur on many kinds of sequences, so
120     // we have an alias m_main_id for such cases
121     CConstRef<CSeq_id>    m_main_id;
122     CConstRef<CSeq_id>    m_genomic_id;
123     CConstRef<CSeq_id>    m_transcript_id;
124     CConstRef<CSeq_id>    m_prot_id;
125     string                m_locus;
126 
127     void x_SetId(CScope& scope, const CSeq_id& seq_id);
128     void x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat);
129     void x_SetRnaCds(CScope& scope, const CSeq_feat& cds);
130     void x_SetMrnaMapper(CScope& scope, const CSeq_feat& feat);
131 
132     CRef<CSeq_loc_Mapper> x_GetCdsMapper(CScope& scope, const CSeq_feat& cds) const;
133     TSignedSeqPos x_GetAdjustment(TSeqPos pos) const;
134     bool x_CheckExonGap(TSeqPos pos) const;
135     TSeqPos x_GetCDSOffset() const;
136     CRef<CHGVS_Coordinate> x_NewCoordinate(CScope& scope, const CSeq_id* id, TSeqPos pos) const;
137     void x_SetSequence(CHGVS_Coordinate& ref, CScope& scope, const CSeq_id* id,
138         TSignedSeqPos pos, TSignedSeqPos adjustment=0, bool plus_strand=true) const;
139     void x_SetHgvs(CHGVS_Coordinate& ref, const char* prefix, TSignedSeqPos pos,
140                    TSignedSeqPos adjustment = 0, bool utr3_tail = false,
141                    bool in_gap = false) const;
142     bool x_MapPos(const CSeq_loc_Mapper& mapper,
143                   const CSeq_id& id, TSeqPos pos,
144                   TSeqPos& mapped_pos) const;
145     void x_CalculateUTRAdjustments(TSignedSeqPos& feature_pos,
146                                    TSignedSeqPos& seq_pos,
147                                    TSignedSeqPos& adjustment,
148                                    bool& utr3_tail) const;
149     void x_GetRCoordinate(CScope& scope, TSeqPos pos,
150                           CHGVS_Coordinate_Set& coords) const;
151     void x_GetCCoordinate(CScope& scope, TSeqPos pos, TSignedSeqPos adj,
152                           CHGVS_Coordinate_Set& coords) const;
153     void x_GetPCoordinate(CScope& scope, TSeqPos pos,
154                           CHGVS_Coordinate_Set& coords) const;
155 };
156 
157 
158 
CReportEntry(CScope & scope,const CSeq_id & seq_id,feature::CFeatTree & feat_tree,const CMappedFeat & root_feat)159 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id,
160                            feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
161     : m_cdr_length(0), m_cds_offset_set(false)
162 {
163     x_SetId(scope, seq_id);
164     x_SetFeature(scope, feat_tree, root_feat);
165 }
166 
167 
CReportEntry(CScope & scope,const CSeq_id & seq_id,const CMappedFeat & gene,feature::CFeatTree & feat_tree,const CMappedFeat & root_feat)168 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
169                            feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
170     : m_cdr_length(0), m_cds_offset_set(false)
171 {
172     x_SetId(scope, seq_id);
173     SetGene(scope, gene.GetMappedFeature());
174     x_SetFeature(scope, feat_tree, root_feat);
175 }
176 
177 
CReportEntry(CScope & scope,const CSeq_id & seq_id,const CSeq_align & align)178 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align)
179 {
180     x_SetId(scope, seq_id);
181     SetAlignment(scope, align);
182 }
183 
184 
x_SetId(CScope & scope,const CSeq_id & seq_id)185 void CReportEntry::x_SetId(CScope& scope, const CSeq_id& seq_id)
186 {
187     m_main_id.Reset(&seq_id);
188     Uint4 type_flags = GetSequenceType(scope.GetBioseqHandle(seq_id));
189     /// check for protein
190     if (type_flags & CSeq_id::fAcc_prot) {
191         m_prot_id.Reset(&seq_id);
192     }
193     /// check for mRNA
194     else if (type_flags & CSeq_id::eAcc_mrna) {
195         m_transcript_id.Reset(&seq_id);
196     }
197     /// check for genomic
198     else if (type_flags & CSeq_id::fAcc_genomic) {
199         m_genomic_id.Reset(&seq_id);
200     }
201 }
202 
203 
x_SetFeature(CScope & scope,feature::CFeatTree & feat_tree,const CMappedFeat & feat)204 void CReportEntry::x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat)
205 {
206     const CSeq_feat& seq_feat = feat.GetMappedFeature();
207     CSeqFeatData::ESubtype subtype =
208         seq_feat.GetData().GetSubtype();
209 
210     switch (subtype) {
211     // The following case is useless, handled in different manner in calling code
212     case CSeqFeatData::eSubtype_gene:
213         SetGene(scope, seq_feat);
214         {{
215             vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
216             ITERATE (vector<CMappedFeat>, iter, cc) {
217                 x_SetFeature(scope, feat_tree, *iter);
218             }
219         }}
220         break;
221     case CSeqFeatData::eSubtype_mRNA:
222         SetMrna(scope, seq_feat);
223         if (feat.IsSetProduct()) {
224             CBioseq_Handle product_bsh =scope.GetBioseqHandle(feat.GetProduct());
225             if (product_bsh) {
226                 SAnnotSelector sel  = x_GetAnnotSelector();
227                 sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_cdregion);
228                 for (CFeat_CI feat_iter(product_bsh, sel); feat_iter;  ++feat_iter)
229                 {
230                     x_SetRnaCds(scope, feat_iter->GetMappedFeature());
231                 }
232             }
233         } else {
234             vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
235             // TODO: We don't handle multiple CDS per mRNA. Do they actually happen?
236             ITERATE (vector<CMappedFeat>, iter, cc) {
237                 const CSeq_feat& seq_feat = iter->GetMappedFeature();
238                 if (seq_feat.GetData().GetSubtype() ==
239                     CSeqFeatData::eSubtype_cdregion)
240                 {
241                     SetCds(scope, seq_feat);
242                     break;
243                 }
244             }
245         }
246         break;
247     case CSeqFeatData::eSubtype_cdregion:
248         SetCds(scope, seq_feat);
249         break;
250     default:
251         break;
252     }
253 }
254 
255 
GetCoordinates(CScope & scope,Uint4 type_flags,TSeqPos pos,CHGVS_Coordinate_Set & coords) const256 void CReportEntry::GetCoordinates(CScope& scope,
257                                   Uint4 type_flags,
258                                   TSeqPos pos,
259                                   CHGVS_Coordinate_Set& coords) const
260 {
261     // Fill out g. r. c. and p. coordinates based on the entry and type of
262     // original sequence
263     if (type_flags & CSeq_id::fAcc_prot) {
264         // do nothing
265         return;
266     }
267     // TODO: handle component here in the future
268 
269     // Calculate adjustment to the nearest exon, 0 if we are in exon
270     TSignedSeqPos adjustment = x_GetAdjustment(pos);
271     // check for genomic
272     if (type_flags & CSeq_id::fAcc_genomic) {
273         // r. c. and p. - r. here, c. and p. same as for mRNA
274         if (!adjustment)
275             x_GetRCoordinate(scope, pos, coords);
276     }
277     // check for mRNA, effectively union of cases with genomic
278     if (type_flags & (CSeq_id::fAcc_genomic | CSeq_id::eAcc_mrna)) {
279         // c. and p.
280         x_GetCCoordinate(scope, pos, adjustment, coords);
281         if (!adjustment)
282             x_GetPCoordinate(scope, pos, coords);
283     }
284 }
285 
286 
x_GetAdjustment(TSeqPos pos) const287 TSignedSeqPos CReportEntry::x_GetAdjustment(TSeqPos pos) const
288 {
289     // Calculate minimal distance to the exon boundary
290     if (!m_mrna_mapper) return 0;
291     //bool plus_strand = true;
292     if (m_mrna_align) {
293         // TODO: Use alignment, take into account exons
294         // const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
295         // ??? what should we do if we're in exon gap? How to handle exon overlap
296         // (ambiguous positions in an exon)
297     }
298     TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
299     TSignedSeqPos adj = 0;
300     const CMappingRanges& ranges = m_mrna_mapper->GetMappingRanges();
301     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*m_main_id);
302     CMappingRanges::TRangeIterator rg_it = ranges.BeginMappingRanges(
303                  idh,
304                  CMappingRanges::TRange::GetWhole().GetFrom(),
305                  CMappingRanges::TRange::GetWhole().GetTo());
306     for ( ; rg_it; ++rg_it) {
307         const CMappingRange& cvt = *rg_it->second;
308         TSignedSeqPos f = cvt.GetSrc_from();
309         TSignedSeqPos t = f + cvt.GetLength();
310         // TODO: Is it reliable enough to test for mRNA strand?
311         // plus_strand = !cvt.GetReverse();
312 //        LOG_POST("Pos " << pos << " from " << f << " to " << t);
313         int adj_for_intron;
314         if (signed_pos < f) {
315             adj_for_intron = signed_pos - f;
316         } else if (signed_pos > t) {
317             adj_for_intron = signed_pos - t;
318         } else {
319             return 0; // no adjustment needed
320         }
321         if (adj == 0  ||  std::abs(adj_for_intron) < std::abs(adj)) {
322             adj = adj_for_intron;
323         }
324     }
325 //    LOG_POST("Adjustment calculated " << adj);
326     return m_mrna_plus_strand ? adj : -adj;
327 }
328 
329 
x_CheckExonGap(TSeqPos pos) const330 bool CReportEntry::x_CheckExonGap(TSeqPos pos) const
331 {
332     if (!m_mrna_align) return false;
333     // Use alignment, take into account exons
334     const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
335     ITERATE (CSpliced_seg::TExons, exon_it, seg.GetExons()) {
336         const CSpliced_exon& exon = **exon_it;
337         if (!exon.IsSetGenomic_start() || pos < exon.GetGenomic_start() ||
338             !exon.IsSetGenomic_end() || pos > exon.GetGenomic_end() ||
339             !exon.IsSetParts()
340         ) {
341             continue;
342         }
343         TSeqPos exon_pos = m_mrna_plus_strand ?
344                                pos - exon.GetGenomic_start() :
345                                exon.GetGenomic_end() - pos;
346         ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
347             TSeqPos l = 0;
348             bool in_gap = false;
349             const CSpliced_exon_chunk& part = **part_it;
350             if (part.IsMatch()) {
351                 l = part.GetMatch();
352             } else if (part.IsMismatch()) {
353                 l = part.GetMismatch();
354             } else if (part.IsDiag()) {
355                 l = part.GetDiag();
356             } else if (part.IsProduct_ins()) {
357                 l = 0; // redundant
358             } else if (part.IsGenomic_ins()) {
359                 l = part.GetGenomic_ins();
360                 in_gap = true;
361             }
362             if (l > exon_pos) return in_gap;
363             exon_pos -= l;
364         }
365     }
366     return false;
367 }
368 
369 
x_GetCDSOffset() const370 TSeqPos CReportEntry::x_GetCDSOffset() const
371 {
372     return m_cds_offset_set ? m_cds_offset : 0;
373 }
374 
375 
x_NewCoordinate(CScope & scope,const CSeq_id * id,TSeqPos pos) const376 CRef<CHGVS_Coordinate> CReportEntry::x_NewCoordinate(CScope& scope, const CSeq_id* id, TSeqPos pos) const
377 {
378     // get the title (best id)
379     string title;
380     if (id) {
381         CSeq_id_Handle idh = sequence::GetId(*id, scope,
382                                              sequence::eGetId_Best);
383         idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
384     } else if (!m_locus.empty()) {
385         title = m_locus;
386     }
387     CRef<CHGVS_Coordinate> ref(new CHGVS_Coordinate());
388     ref->SetTitle(title);
389     ref->SetObject_id("");
390     ref->SetMarker_pos(pos);
391     ref->SetSequence("");
392     ref->SetHgvs_position("");
393     return ref;
394 }
395 
396 
x_SetSequence(CHGVS_Coordinate & ref,CScope & scope,const CSeq_id * id,TSignedSeqPos pos,TSignedSeqPos adjustment,bool plus_strand) const397 void CReportEntry::x_SetSequence(CHGVS_Coordinate& ref, CScope& scope, const CSeq_id* id,
398                                  TSignedSeqPos pos, TSignedSeqPos adjustment, bool plus_strand) const
399 {
400     if (!id) return;
401     CBioseq_Handle prod_bsh =
402         scope.GetBioseqHandle(*id);
403     CSeqVector prod_vec(prod_bsh, CBioseq_Handle::eCoding_Iupac,
404         plus_strand ? eNa_strand_plus : eNa_strand_minus);
405     ref.SetSequence( x_GetSequence(prod_vec, plus_strand ? pos : prod_vec.size() - pos - 1, adjustment) );
406 }
407 
408 
x_SetHgvs(CHGVS_Coordinate & ref,const char * prefix,TSignedSeqPos pos,TSignedSeqPos adjustment,bool utr3_tail,bool in_gap) const409 void CReportEntry::x_SetHgvs(CHGVS_Coordinate& ref, const char* prefix, TSignedSeqPos pos,
410                              TSignedSeqPos adjustment, bool utr3_tail,
411                              bool in_gap) const
412 {
413     string hgvs(prefix);
414     if (in_gap) {
415         TSignedSeqPos second_pos;
416         if (adjustment > 0) {
417             second_pos = pos + 1;
418         } else {
419             second_pos = pos;
420             pos -= 1;
421         }
422         hgvs += "(" + NStr::IntToString(pos+1) + "_" + NStr::IntToString(second_pos+1) + ")";
423         ref.SetHgvs_position(hgvs);
424         return;
425     }
426     TSignedSeqPos pos_1_based =
427         pos >= 0 ? pos + 1 : pos;
428     if (adjustment == 0 || !utr3_tail) {
429         hgvs += NStr::IntToString(pos_1_based);
430     }
431     if (adjustment != 0) {
432         if (utr3_tail) {
433             hgvs += '*';
434         } else if (adjustment > 0) {
435             hgvs += '+';
436         }
437         hgvs += NStr::IntToString(adjustment);
438     }
439     ref.SetHgvs_position(hgvs);
440 }
441 
442 
x_MapPos(const CSeq_loc_Mapper & mapper,const CSeq_id & id,TSeqPos pos,TSeqPos & mapped_pos) const443 bool CReportEntry::x_MapPos(const CSeq_loc_Mapper& mapper,
444                             const CSeq_id& id, TSeqPos pos,
445                             TSeqPos& mapped_pos) const
446 {
447     // fake location for seq_mapper
448     CRef<CSeq_loc> fake_loc(new CSeq_loc());
449     fake_loc->SetPnt().SetPoint(pos);
450     fake_loc->SetId(id);
451 
452     // Get the feature position
453     CRef<CSeq_loc> mapped_loc;
454     // Due to defect in definition of Map (non-const) we need to use coercion
455     mapped_loc = const_cast<CSeq_loc_Mapper*>(&mapper)->Map(fake_loc.GetObject());
456     if (mapped_loc->Which() == CSeq_loc::e_Null) {
457         ERR_POST(Warning << "loc mapping did not work");
458         return false;
459     }
460     mapped_pos = mapped_loc->GetPnt().GetPoint();
461     return true;
462 }
463 
464 
x_CalculateUTRAdjustments(TSignedSeqPos & feature_pos,TSignedSeqPos & seq_pos,TSignedSeqPos & adjustment,bool & utr3_tail) const465 void CReportEntry::x_CalculateUTRAdjustments(TSignedSeqPos& feature_pos,
466                                              TSignedSeqPos& seq_pos,
467                                              TSignedSeqPos& adjustment,
468                                              bool& utr3_tail) const
469 {
470     utr3_tail = false;
471     // To calculate whether we are in UTR 3' region we need to know if our
472     // unadjusted position would map beyond the coding feature length.
473     // There are two cases:
474     //   If the right flank is not zero, even adjusted position would map
475     // after cdr_length.
476     //   The second one is worse - its adjusted pos always maps to the last
477     // valid position of last exon. For this case we check it and add
478     // adjustment to be used in UTR 3' test.
479     TSignedSeqPos utr3_adjusted_pos = feature_pos;
480     if (m_cdr_length  &&  feature_pos >= TSignedSeqPos(m_cdr_length) - 1) {
481         utr3_adjusted_pos += adjustment;
482     }
483     // If position is out of bounds of the feature, we just add
484     // adjustment so it does not matter is it in intron or not.
485     // According to Alex Astashin, if the position is inside mRNA,
486     // it is reported in detail, so we check seq_pos, not feature_pos
487     if (seq_pos <= 0) {
488         feature_pos += adjustment;
489         seq_pos += adjustment;
490         adjustment = 0;
491     } else
492     if (m_cdr_length  &&  utr3_adjusted_pos >= TSignedSeqPos(m_cdr_length)) {
493         feature_pos += adjustment;
494         seq_pos += adjustment;
495         // Adjustment signifies the offset from the stop codon into 3' UTR
496         adjustment = feature_pos - m_cdr_length + 1;
497         utr3_tail = true;
498         feature_pos = m_cdr_length - 1;
499     }
500 }
501 
502 
x_GetRCoordinate(CScope & scope,TSeqPos pos,CHGVS_Coordinate_Set & coords) const503 void CReportEntry::x_GetRCoordinate(CScope& scope, TSeqPos pos,
504                                     CHGVS_Coordinate_Set& coords) const
505 {
506     CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_transcript_id, pos));
507 
508     if (!m_mrna_mapper) {
509         ERR_POST(Warning << "mRNA mapper is empty");
510         return;
511     }
512     TSeqPos mapped_pos;
513     if (x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos)) {
514         ref->SetPos_mapped(mapped_pos);
515         x_SetHgvs(*ref, "r.", mapped_pos);
516         if (m_transcript_id) {
517             x_SetSequence(*ref, scope, m_transcript_id, mapped_pos);
518         } else if (m_genomic_id) {
519             // Product-less mRNA feature - we're faking it a bit using
520             // original sequence and original, not mapped position and
521             // appropriate strand. Same trick works for c. coordinate below
522             x_SetSequence(*ref, scope, m_genomic_id, pos, 0, m_mrna_plus_strand);
523         }
524         coords.Set().push_back(ref);
525     }
526 }
527 
528 
x_GetCCoordinate(CScope & scope,TSeqPos pos,TSignedSeqPos adjustment,CHGVS_Coordinate_Set & coords) const529 void CReportEntry::x_GetCCoordinate(CScope& scope, TSeqPos pos, TSignedSeqPos adjustment,
530                                     CHGVS_Coordinate_Set& coords) const
531 {
532     if (!m_cds && !m_rna_cds) return;
533     CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_transcript_id, pos));
534     bool mapped = false;
535 
536     // adjusted_pos is for safely mapping the coordinate, it is guaranteed to be in
537     // exon, and even if the exon has a gap, not in the gap.
538     // adjustment is opposite the product, that is why we need to know
539     // the strand of the feature to find adjustment to mappable position
540     TSeqPos adjusted_pos = pos + (m_mrna_plus_strand ? -adjustment : adjustment);
541 
542     TSignedSeqPos mapped_pos;
543     TSeqPos seq_pos;
544     TSeqPos offset = 0;
545     bool utr3_tail = false;
546     bool in_gap = false;
547     if (m_mrna_mapper) {
548         // To use mRNA mapper to map to CDS coordinate we need to calculate
549         // CDS to mRNA offset here and use it to fix mapped coordinate
550         offset = x_GetCDSOffset();
551         if (x_MapPos(*m_mrna_mapper, *m_main_id, adjusted_pos, seq_pos)) {
552             in_gap = x_CheckExonGap(pos);
553             mapped_pos = seq_pos - offset;
554             TSignedSeqPos utr_adjusted_pos = seq_pos;
555             x_CalculateUTRAdjustments(mapped_pos, utr_adjusted_pos, adjustment, utr3_tail);
556             if (m_transcript_id) {
557                 x_SetSequence(*ref, scope, m_transcript_id, utr_adjusted_pos, adjustment);
558             } else if (m_genomic_id) {
559                 // Product-less mRNA feature - see comment for r. coordinate
560                 x_SetSequence(*ref, scope, m_genomic_id, pos, adjustment, m_mrna_plus_strand);
561             }
562             mapped = true;
563         }
564     } else if (m_cds) { // This condition is trivially true, but stays here to denote the case
565         // CDS feature only - map through fake product
566         CRef<CSeq_loc> fake_product(new CSeq_loc);
567         fake_product->SetWhole().Assign(*m_main_id);
568         CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper
569                      (m_cds->GetLocation(),
570                       *fake_product,
571                       &scope));
572         if (x_MapPos(*mapper, *m_main_id, adjusted_pos, seq_pos)) {
573             mapped_pos = seq_pos;
574             // Sequence coming from original id, use original pos
575             x_SetSequence(*ref, scope, m_main_id, pos, adjustment);
576             mapped = true;
577         }
578     } else {
579         // Sorry, not enough info to calculate the coordinate
580         return;
581     }
582     x_SetHgvs(*ref, "c.", mapped_pos, adjustment, utr3_tail, in_gap);
583 
584     if (!adjustment) {
585         ref->SetPos_mapped(mapped_pos);
586     }
587     if (mapped) coords.Set().push_back(ref);
588 }
589 
590 
x_GetPCoordinate(CScope & scope,TSeqPos pos,CHGVS_Coordinate_Set & coords) const591 void CReportEntry::x_GetPCoordinate(CScope& scope, TSeqPos pos,
592                                     CHGVS_Coordinate_Set& coords) const
593 {
594     TSeqPos mapped_pos = pos;
595     if (m_rna_cds  &&  m_mrna_mapper) {
596         // We have both DNA to RNA and RNA to protein mappings, so map 2 times
597         if (!x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos))
598             return;
599         if (!x_MapPos(*x_GetCdsMapper(scope, *m_rna_cds), *m_transcript_id, mapped_pos, mapped_pos))
600             return;
601     } else if (m_cds) {
602         if (!x_MapPos(*x_GetCdsMapper(scope, *m_cds), *m_main_id, pos, mapped_pos))
603             return;
604     } else {
605         return;
606     }
607     CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_prot_id, pos));
608     ref->SetPos_mapped(mapped_pos);
609     x_SetHgvs(*ref, "p.", mapped_pos);
610     x_SetSequence(*ref, scope, m_prot_id, mapped_pos);
611     coords.Set().push_back(ref);
612 }
613 
614 
SetGene(CScope & scope,const CSeq_feat & feat)615 void CReportEntry::SetGene(CScope& scope, const CSeq_feat& feat)
616 {
617     if (feat.GetData().GetGene().IsSetLocus_tag()) {
618         m_locus = feat.GetData().GetGene().GetLocus_tag();
619     } else if (feat.GetData().GetGene().IsSetLocus()) {
620         m_locus = feat.GetData().GetGene().GetLocus();
621     }
622     // CLabel::GetLabel(feat, &m_locus, CLabel::eContent, &scope);
623 }
624 
625 
SetMrna(CScope & scope,const CSeq_feat & feat)626 void CReportEntry::SetMrna(CScope& scope, const CSeq_feat& feat)
627 {
628     m_mrna.Reset(&feat);
629     if (feat.IsSetProduct())
630         // mRNA feature's product always points to transcript
631         m_transcript_id.Reset(feat.GetProduct().GetId());
632     m_mrna_plus_strand =
633         sequence::GetStrand(feat.GetLocation()) != eNa_strand_minus;
634     x_SetMrnaMapper(scope, feat);
635 }
636 
637 
SetCds(CScope & scope,const CSeq_feat & cds)638 void CReportEntry::SetCds(CScope& scope, const CSeq_feat& cds)
639 {
640     m_cds.Reset(&cds);
641 //    if (!m_mrna) m_main_id.Reset(cds.GetLocation().GetId());
642     m_prot_id.Reset(cds.GetProduct().GetId());
643     if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
644         m_cdr_length = sequence::GetLength(cds.GetLocation(), &scope);
645     }
646 
647     // Prefer direct calculation (see x_SetRnaCds) and check
648     // that mRNA is already set
649     if (m_cds_offset_set || !m_mrna) return;
650     // Offset is inferred given the
651     // locations of the mRNA and CDS feature
652     CSeq_loc_CI loc_iter(m_mrna->GetLocation());
653     TSeqPos start = cds.GetLocation().GetStart(eExtreme_Biological);
654     ENa_strand strand = cds.GetLocation().GetStrand();
655     TSeqRange r(start, start);
656     TSeqPos frame = 0;
657     if (cds.GetData().GetCdregion().IsSetFrame()) {
658         frame = cds.GetData().GetCdregion().GetFrame();
659         if (frame) {
660             frame -= 1;
661         }
662     }
663 
664     TSeqPos offset_acc = 0;
665     for (;  loc_iter;  ++loc_iter) {
666         if (loc_iter.GetRange()
667             .IntersectingWith(r)) {
668             if (strand == eNa_strand_minus) {
669                 m_cds_offset = offset_acc +
670                     loc_iter.GetRange().GetTo() - start - frame;
671             } else {
672                 m_cds_offset = offset_acc +
673                     start - loc_iter.GetRange().GetFrom() + frame;
674             }
675             m_cds_offset_set = true;
676             break;
677         }
678         offset_acc += loc_iter.GetRange().GetLength();
679     }
680 }
681 
682 
x_SetRnaCds(CScope & scope,const CSeq_feat & cds)683 void CReportEntry::x_SetRnaCds(CScope& scope, const CSeq_feat& cds)
684 {
685     m_rna_cds.Reset(&cds);
686     m_prot_id.Reset(cds.GetProduct().GetId());
687     if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
688         m_cdr_length = sequence::GetLength(cds.GetLocation(), &scope);
689     }
690     // Offset is determined by the
691     // placement of a CDS feature
692     // directly on the RNA sequence
693     m_cds_offset = cds.GetLocation().GetTotalRange().GetFrom();
694     if (cds.GetData().GetCdregion().IsSetFrame()) {
695         TSeqPos frame = cds.GetData().GetCdregion().GetFrame();
696         if (frame) {
697             m_cds_offset += frame - 1;
698         }
699     }
700     m_cds_offset_set = true;
701 }
702 
703 
x_SetMrnaMapper(CScope & scope,const CSeq_feat & feat)704 void CReportEntry::x_SetMrnaMapper(CScope& scope, const CSeq_feat& feat)
705 {
706     if (feat.IsSetProduct()) {
707         m_mrna_mapper.Reset(new CSeq_loc_Mapper
708                              (feat,
709                               CSeq_loc_Mapper::eLocationToProduct,
710                               &scope));
711     } else {
712         CRef<CSeq_loc> fake_product(new CSeq_loc);
713         fake_product->SetWhole().Assign(*m_main_id);
714         m_mrna_mapper.Reset(new CSeq_loc_Mapper
715                      (feat.GetLocation(),
716                       *fake_product,
717                       &scope));
718     }
719 }
720 
721 
SetAlignment(CScope & scope,const CSeq_align & align)722 void CReportEntry::SetAlignment(CScope& scope, const CSeq_align& align)
723 {
724     // cerr << MSerial_AsnText << align;
725 
726     if (align.GetSegs().Which() != CSeq_align::TSegs::e_Spliced)
727         return;
728 
729     m_main_id.Reset(&align.GetSeq_id(1));
730     m_genomic_id.Reset(&align.GetSeq_id(1));
731     m_transcript_id.Reset(&align.GetSeq_id(0));
732 
733     m_mrna_mapper.Reset(new CSeq_loc_Mapper
734 // Work-around for CXX-1555 - pass id of the sequence as a reliable way
735 // to indicate desirable direction of the mapping.
736 //      (*align, 0, scope));
737         (align, align.GetSeq_id(0), &scope));
738     m_mrna_align.Reset(&align);
739     const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
740     m_mrna_plus_strand = !(seg.IsSetGenomic_strand() && seg.GetGenomic_strand() == eNa_strand_minus);
741 
742     if (!m_cds) {
743         // There is no CDS feature because this entry is derived from
744         // alignment. Try to use CDS feature if any on target mRNA
745         CBioseq_Handle product_bsh =
746             scope.GetBioseqHandle
747             (align.GetSeq_id(0));
748         if (product_bsh) {
749             //
750             // our offset is determined by the
751             // placement of a CDS feature
752             // directly on the product RNA sequence
753             //
754 //            LOG_POST("Deriving CDS mapping from alignment");
755             SAnnotSelector sel  = x_GetAnnotSelector();
756             sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_cdregion);
757             // TODO: ??? Now we prefer LAST of CDSs for a given RNA. Can there be
758             // more than one, and what is the meaning for it?
759             for (CFeat_CI feat_iter(product_bsh, sel);
760                  feat_iter;  ++feat_iter)
761             {
762                 x_SetRnaCds(scope, feat_iter->GetMappedFeature());
763             }
764         }
765     } else {
766         // Recalculate CDS offset using the alignment
767         // Check that first exon is not partial
768         const CSpliced_exon &first_exon = *(seg.GetExons().front());
769         if (!(first_exon.IsSetPartial() && first_exon.GetPartial())) {
770             TSeqPos cds_start_on_dna = m_cds->GetLocation().GetStart(eExtreme_Biological);
771             ENa_strand strand = m_cds->GetLocation().GetStrand();
772             TSeqPos rna_start_on_dna;
773             if (strand == eNa_strand_minus) {
774                 rna_start_on_dna = seg.GetSeqStop(1);
775             } else {
776                 rna_start_on_dna = seg.GetSeqStart(1);
777             }
778             TSeqPos rna_start, cds_start;
779             x_MapPos(*m_mrna_mapper, *m_main_id, rna_start_on_dna, rna_start);
780             x_MapPos(*m_mrna_mapper, *m_main_id, cds_start_on_dna, cds_start);
781             m_cds_offset = cds_start - rna_start;
782             m_cds_offset_set = true;
783         }
784     }
785 }
786 
787 
x_GetCdsMapper(CScope & scope,const CSeq_feat & cds) const788 CRef<CSeq_loc_Mapper> CReportEntry::x_GetCdsMapper(CScope& scope, const CSeq_feat& cds) const
789 {
790     CRef<CSeq_loc_Mapper> cds_mapper(new CSeq_loc_Mapper
791                  (cds,
792                   CSeq_loc_Mapper::eLocationToProduct,
793                   &scope));
794     return cds_mapper;
795 }
796 
797 
798 typedef vector<CRef<CReportEntry> > TReportEntryList;
799 
800 static
801 TReportEntryList s_GetFeaturesForRange(CScope& scope, const CBioseq_Handle& bsh, TSeqRange search_range);
802 static
803 void s_ExtendEntriesFromAlignments(CScope& scope, const CBioseq_Handle& bsh, TSeqRange search_range, TReportEntryList& entries);
804 static
805 bool s_IsRefSeqGene(const CBioseq_Handle& bsh);
806 
807 
808 /////////////////////////////////////////////////////////////////////////////
809 //  CObjectCoords::
810 //
811 
CObjectCoords(CScope & scope)812 CObjectCoords::CObjectCoords(CScope& scope) :
813     m_Scope(&scope)
814 {
815 }
816 
817 
GetCoordinates(const CSeq_id & id,TSeqPos pos,TSeqPos window)818 CRef<CHGVS_Coordinate_Set> CObjectCoords::GetCoordinates(const CSeq_id& id, TSeqPos pos, TSeqPos window)
819 {
820     CRef<CHGVS_Coordinate_Set> coords(new CHGVS_Coordinate_Set);
821     GetCoordinates(*coords, id, pos, window);
822     return coords;
823 }
824 
825 void
GetCoordinates(CHGVS_Coordinate_Set & coords,const CSeq_id & id,TSeqPos pos,TSeqPos window)826 CObjectCoords::GetCoordinates(CHGVS_Coordinate_Set& coords, const CSeq_id& id, TSeqPos pos, TSeqPos window)
827 {
828     CBioseq_Handle bsh = m_Scope->GetBioseqHandle(id);
829     if ( !bsh ) {
830         NCBI_THROW(CException, eUnknown,
831                    "failed to retrieve sequence: " + id.AsFastaString());
832     }
833     CSeqVector seq_vec =
834         bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
835 
836     string title;
837     CSeq_id_Handle idh = sequence::GetId(id, *m_Scope, sequence::eGetId_Best);
838     idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
839 
840     // First entry is for the gi itself
841     CRef<CHGVS_Coordinate> ref(new CHGVS_Coordinate());
842 
843     // identify sequence
844     Uint4 type_flags = GetSequenceType(bsh);
845     string hgvs;
846 
847     /// check for protein
848     if (type_flags & CSeq_id::fAcc_prot) {
849         hgvs = "p.";
850     }
851     /// check for mRNA
852     else if (type_flags & CSeq_id::eAcc_mrna) {
853         hgvs = "r.";
854     }
855     /// check for genomic
856     else if (type_flags & CSeq_id::fAcc_genomic) {
857         hgvs = "g.";
858     }
859 
860     // Get Main sequence
861     ref->SetTitle(title);
862     ref->SetHgvs_position( hgvs + NStr::IntToString(pos + 1) );
863     ref->SetSequence( x_GetSequence(seq_vec, pos) );
864     ref->SetMarker_pos(pos);
865     ref->SetPos_mapped(pos);
866 
867     coords.Set().push_back(ref);
868 
869     // Get all RNAs and CDSs in the +-2k (default) vicinity of the
870     //   position in question and build a CFeatTree.
871     // Main cases are:
872     //   1. Genomic sequence. Tree will contain many mRNA->CDS chains
873     //      E.g. NT_011515 pos 1585115 (alignment present)
874     //           NC_000007.13 pos 65338429 (no alignment)
875     //           NC_000019.9 pos 45016991 - forward ribosomal slippage
876     //           NC_000007.13 v 94283650:94301032 - back ribosomal slippage,
877     //                        encoded through weird mRNA/CDS relation
878     //           NG_005895.1 - RefSeq Gene, features through alignment only
879     //   1a. mRNA can be w/o product (GenBank DNA)
880     //      E.g. BX571818.3
881     //   1b. No mRNA
882     //      E.g. NC_000913.2 pos 2282398
883     //   2. Transcript. There will be CDSs alone
884     //      E.g. NM_002020
885     //   2a. There can be mRNA w/o product (GenBank mRNA)
886     //      E.g. M11313
887     //   2b. No CDS
888     //      E.g. NR_002767
889     //   3. Protein sequence
890     // Overall procedure is following:
891     // Get all pairwise alignments for position into map by product id.
892     // Sort all top-level into ReportEntries, adding alignment for mRNA
893     //   products from the map (removing from map).
894     // Convert all remaning alignment to ReportEntries
895     // Get all components for position into GeneInfos
896     // Process all ReportEntries getting r., c., and p. (g. for components)
897     TSeqPos min_pos = pos >= window ? pos - window : pos;
898     TSeqPos max_pos = pos + window;
899     TSeqRange search_range(min_pos, max_pos);
900     // Per SV-487, if we deal with RefSeq Gene record only use
901     // alignments to get features
902     bool is_ref_seq_gene = s_IsRefSeqGene(bsh);
903     // is_ref_seq_gene = false; // DEBUG
904     TReportEntryList entries;
905     if (!is_ref_seq_gene) entries = s_GetFeaturesForRange(*m_Scope, bsh, search_range);
906     s_ExtendEntriesFromAlignments(*m_Scope, bsh, search_range, entries);
907     ///
908     /// now, evaluate each feature
909     /// we may need to inject an artificial mRNA feature to capture
910     /// the 'c.' coordinate.
911     ///
912     ITERATE(TReportEntryList, iter, entries) {
913         (*iter)->GetCoordinates(
914             *m_Scope,
915             type_flags,
916             pos,
917             coords);
918 
919     }
920 }
921 
922 
923 static
s_IsRefSeqGene(const CBioseq_Handle & bsh)924 bool s_IsRefSeqGene(const CBioseq_Handle& bsh)
925 {
926     // TODO: Use CSeq_descr_CI on bhs itself here
927     bool is_ref_seq_gene = false;
928     CSeq_entry_Handle seh = bsh.GetParentEntry();
929     if (seh.IsSetDescr()) {
930         const CSeq_descr& descr = seh.GetDescr();
931         CSeq_descr::Tdata descr_list = descr.Get();
932         ITERATE(CSeq_descr::Tdata, it, descr_list) {
933             if (it->GetObject().IsGenbank()) {
934                 const CSeqdesc::TGenbank& gb = it->GetObject().GetGenbank();
935                 if (gb.IsSetKeywords()) {
936                     ITERATE(list<string>, it1, gb.GetKeywords()) {
937                         if (*it1 == "RefSeqGene") {
938                             is_ref_seq_gene = true;
939                             break;
940                         }
941                     }
942                 }
943             }
944             if (is_ref_seq_gene) break;
945         }
946     }
947     return is_ref_seq_gene;
948 }
949 
950 
951 static
s_GetFeaturesForRange(CScope & scope,const CBioseq_Handle & bsh,TSeqRange search_range)952 TReportEntryList s_GetFeaturesForRange(
953     CScope& scope,
954     const CBioseq_Handle& bsh,
955     TSeqRange search_range)
956 {
957     TReportEntryList entries;
958     // find features on the sequence
959     SAnnotSelector sel  = x_GetAnnotSelector();
960     // We don't need gene record per se, only mRNA and CDS
961     sel.IncludeFeatType(CSeqFeatData::e_Gene);
962     sel.IncludeFeatType(CSeqFeatData::e_Rna);
963     sel.IncludeFeatType(CSeqFeatData::e_Cdregion);
964 
965     CFeat_CI feat_iter(bsh, search_range, sel);
966     feature::CFeatTree feat_tree;
967     feat_tree.AddFeatures(feat_iter);
968 
969     /*
970      Here we have following structures:
971      Gene
972      |\
973      | mRNA
974      |   \
975      |    CDS
976      |\
977      | mRNA
978      |\
979      | CDS
980      We convert them into list of ReportEntries which hold Gene, mRNA, and CDS
981      records (possibly empty). We need to pass only hierarchy root to ReportEntry
982      constructor except in case of Gene because Gene can have multiple distinct
983      mRNA descendants.
984     */
985     vector<CMappedFeat> feat_roots =
986         feat_tree.GetChildren(CMappedFeat());
987     CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
988     ITERATE (vector<CMappedFeat>, iter, feat_roots) {
989         const CSeq_feat& seq_feat = iter->GetMappedFeature();
990         CSeqFeatData::ESubtype subtype =
991             seq_feat.GetData().GetSubtype();
992         if (subtype == CSeqFeatData::eSubtype_gene) {
993             vector<CMappedFeat> cc = feat_tree.GetChildren(*iter);
994             ITERATE (vector<CMappedFeat>, iter1, cc) {
995                 CRef<CReportEntry> entry(new CReportEntry(
996                     scope,
997                     *seq_id,
998                     *iter, // pass Gene explicitly
999                     feat_tree,
1000                     *iter1));
1001                 if (entry->IsValid()) entries.push_back(entry);
1002             }
1003         } else {
1004             CRef<CReportEntry> entry(new CReportEntry(
1005                 scope,
1006                 *seq_id,
1007                 feat_tree,
1008                 *iter));
1009             if (entry->IsValid()) entries.push_back(entry);
1010         }
1011     }
1012     return entries;
1013 }
1014 
1015 
1016 static
s_ExtendEntriesFromAlignments(CScope & scope,const CBioseq_Handle & bsh,TSeqRange search_range,TReportEntryList & entries)1017 void s_ExtendEntriesFromAlignments(
1018     CScope& scope,
1019     const CBioseq_Handle& bsh,
1020     TSeqRange search_range,
1021     TReportEntryList& entries)
1022 {
1023     CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
1024     TReportEntryList new_entries;
1025     SAnnotSelector sel;
1026     //sel.SetAdaptiveDepth(true);
1027     // Slow, but otherwise it can't find all relevant alignments
1028     // for composite sequences
1029     sel.SetResolveAll();
1030     for (CAlign_CI align_it(bsh, search_range, sel); align_it; ++align_it) {
1031         const CSeq_align& al = *align_it;
1032         if (al.CheckNumRows() == 2) {
1033             string label; al.GetSeq_id(0).GetLabel(&label);
1034             bool found = false;
1035             NON_CONST_ITERATE(TReportEntryList, it, entries) {
1036                 CConstRef<CSeq_id> product_id((*it)->GetTranscriptId());
1037                 string entry_label; product_id->GetLabel(&entry_label);
1038 //                LOG_POST("Check match for alignment " + label + " with existing entry " + entry_label);
1039                 if (sequence::IsSameBioseq(
1040                         *product_id, al.GetSeq_id(0),
1041                         &scope))
1042                 {
1043                     // Found exising entry - extend it with alignment
1044                     (*it)->SetAlignment(scope, al);
1045 //                    LOG_POST("Entry for " + label + " extended by alignment");
1046                     found = true;
1047                     break;
1048                 }
1049             }
1050             // If we did not match this alignment to an existing entry, make a new one.
1051             if (!found) {
1052                 CBioseq_Handle target_bsh = scope.GetBioseqHandle(al.GetSeq_id(0));
1053                 if (target_bsh && (GetSequenceType(target_bsh) & CSeq_id::eAcc_mrna)) {
1054                     // Make new entry
1055                     CRef<CReportEntry> entry(new CReportEntry(scope, *seq_id, al));
1056                     new_entries.push_back(entry);
1057 //                    LOG_POST("New entry " + label + " created from alignment");
1058                 }
1059             }
1060         }
1061     }
1062     entries.insert(entries.end(), new_entries.begin(), new_entries.end());
1063 }
1064 
1065 
1066 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement)
x_GetSequence(CSeqVector & seq_vec,TSignedSeqPos pos,TSignedSeqPos adjustment)1067 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment)
1068 {
1069     string seq_before, seq_pos, seq_after;
1070     TSignedSeqPos seq_len = seq_vec.size();
1071     TSignedSeqPos max_seq_frame = TSignedSeqPos(kSeqFrame);
1072 
1073     pos += adjustment;
1074 
1075     if (adjustment >=0) {
1076         TSeqPos before_from = pos <= max_seq_frame ?
1077             0 : max(0, pos - max_seq_frame);
1078         seq_vec.GetSeqData(before_from, max(0, pos), seq_before);
1079         // This filler takes into account both before and after, so we don't
1080         // need to repeat it twice. We need to compensate for very negative
1081         // position to not overfill it, so we limit overfill by 2*kSeqFrame+1
1082         // which is total length of reported sequence context.
1083         if (pos < max_seq_frame) {
1084             string filler;
1085             int max_filler_len = min(2*max_seq_frame+1, max_seq_frame - pos);
1086             for (int i = 0; i < max_filler_len; i++) filler += "&nbsp;";
1087             seq_before = filler + seq_before;
1088         }
1089         if (adjustment != 0) {
1090             int fill_len = min(int(adjustment-1), int(seq_before.size()));
1091             seq_before.replace(seq_before.size()-fill_len, fill_len, fill_len, '-');
1092         }
1093     } else {
1094         for (int i = 0; i < max_seq_frame; i++) seq_before += "-";
1095     }
1096     if (adjustment == 0) {
1097         seq_vec.GetSeqData(pos, pos+1, seq_pos);
1098     } else {
1099         seq_pos = "-";
1100     }
1101     if (adjustment <= 0) {
1102         // If pos is less than -1, it starts eating into seq_after. The filler is taken
1103         // care of, now we ensure that we're not generating negative positions.
1104 
1105         seq_vec.GetSeqData(max(0, pos+1),
1106             max(0, min(seq_len, pos+max_seq_frame+1)), seq_after);
1107         if (adjustment != 0) {
1108             int fill_len = min(int(-adjustment-1), int(seq_after.size()));
1109             seq_after.replace(0, fill_len, fill_len, '-');
1110         }
1111     } else {
1112         for (int i = 0; i < max_seq_frame; i++) seq_after += "-";
1113     }
1114 /*
1115     if (complement) {
1116         string comp;
1117         CSeqManip::Complement(seq_before, CSeqUtil::e_Iupacna, 0, seq_before.size(), comp);
1118         CSeqManip::Complement(seq_after, CSeqUtil::e_Iupacna, 0, seq_after.size(), seq_before);
1119         seq_before = comp;
1120         CSeqManip::Complement(seq_pos, CSeqUtil::e_Iupacna, 0, seq_pos.size(), seq_pos);
1121     }
1122 */
1123 
1124     return
1125         seq_before + "<span class='xsv-seq_pos_highlight'>" +
1126         seq_pos + "</span>" + seq_after;
1127 }
1128 
x_GetAnnotSelector()1129 static SAnnotSelector x_GetAnnotSelector()
1130 {
1131     #ifdef __GUI_SEQ_UTILS_DEFINED__
1132     return CSeqUtils::GetAnnotSelector();
1133     #endif
1134 
1135     SAnnotSelector sel;
1136     sel
1137         // consider overlaps by total range...
1138         .SetOverlapTotalRange()
1139         // resolve all segments...
1140         .SetResolveAll()
1141     ;
1142 
1143     sel.SetExcludeExternal(false);
1144 
1145     ///
1146     /// known external annotations
1147     ///
1148     sel.ExcludeNamedAnnots("SNP");  /// SNPs = variation features
1149     sel.ExcludeNamedAnnots("CDD");  /// CDD  = conserved domains
1150     sel.ExcludeNamedAnnots("STS");  /// STS  = sequence tagged sites
1151 
1152     sel.SetAdaptiveDepth(true);
1153     sel.SetResolveAll();
1154 
1155     return sel;
1156 }
1157 
1158 /*
1159 /////////////////////////////////////////////////////////////////////////////
1160 // Service functions
1161 
1162 static
1163 TSignedSeqPos x_GetAdjustment(const CSeq_feat& feat, TSeqPos pos)
1164 {
1165     // Calculate minimal distance to the exon boundary
1166     int adj = 0;
1167     bool plus_strand =
1168         sequence::GetStrand(feat.GetLocation()) != eNa_strand_minus;
1169     TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
1170     CSeq_loc_CI loc_iter(feat.GetLocation());
1171     for ( ;  loc_iter;  ++loc_iter) {
1172         const TSeqRange& curr = loc_iter.GetRange();
1173         TSignedSeqPos f = curr.GetFrom();
1174         TSignedSeqPos t = curr.GetTo();
1175 
1176         int adj_for_intron;
1177         if (signed_pos < f) {
1178             adj_for_intron = signed_pos - f;
1179         } else if (signed_pos > t) {
1180             adj_for_intron = signed_pos - t;
1181         } else {
1182             return 0; // no adjustment needed
1183         }
1184         if (adj == 0  ||  std::abs(adj_for_intron) < std::abs(adj)) {
1185             adj = adj_for_intron;
1186         }
1187     }
1188     return plus_strand ? adj : -adj;
1189 }
1190 */
1191 
1192 
1193 // END_NCBI_SCOPE
1194