1 /* $Id: objcoords.cpp 618367 2020-10-19 14:54:50Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Vlad Lebedev, Victor Joukov
27 *
28 * File Description: Library to retrieve mapped sequence coordinates of
29 * the feature products given a sequence location
30 *
31 *
32 */
33
34 #include <ncbi_pch.hpp>
35
36 #include <misc/hgvs/objcoords.hpp>
37
38 #include <misc/hgvs/sequtils.hpp>
39
40 #include <util/checksum.hpp>
41
42 #include <util/sequtil/sequtil.hpp>
43 #include <util/sequtil/sequtil_manip.hpp>
44
45 #include <objmgr/feat_ci.hpp>
46 #include <objmgr/align_ci.hpp>
47 #include <objmgr/seqdesc_ci.hpp>
48 #include <objmgr/seq_loc_mapper.hpp>
49 #include <objmgr/seq_vector.hpp>
50 #include <objmgr/annot_selector.hpp>
51 #include <objmgr/util/sequence.hpp>
52 #include <objmgr/util/feature.hpp>
53 #include <objects/general/Object_id.hpp>
54 #include <objects/general/User_object.hpp>
55 #include <objects/seq/Seqdesc.hpp>
56 #include <objects/seq/seq_id_handle.hpp>
57 #include <objects/seqloc/Seq_id.hpp>
58 #include <objects/seqloc/Seq_point.hpp>
59 #include <objects/seqfeat/Cdregion.hpp>
60 #include <objects/seqblock/GB_block.hpp>
61 #include <objects/seqalign/Spliced_seg.hpp>
62 #include <objects/seqalign/Spliced_exon_chunk.hpp>
63
64 #include <math.h>
65
66
67 USING_SCOPE(ncbi);
68 USING_SCOPE(objects);
69
70
71 // size of sequence frame
72 static const TSeqPos kSeqFrame = 10;
73
74 // Service functions
75 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement=false);
76 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment=0);
77
78 static SAnnotSelector x_GetAnnotSelector();
79
80
81 /////////////////////////////////////////////////////////////////////////////
82 // CReportEntry
83 // Container for all information related to one logical entry - mRNA/CDS
84 // combination or component.
85
86 class CReportEntry : public CObject
87 {
88 public:
89 CReportEntry(CScope& scope, const CSeq_id& seq_id,
90 feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
91 CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
92 feature::CFeatTree& feat_tree, const CMappedFeat& root_feat);
93 CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align);
CReportEntry()94 CReportEntry() : m_cdr_length(0), m_cds_offset_set(false) {}
95 void GetCoordinates(CScope& scope,
96 Uint4 type_flags,
97 TSeqPos pos,
98 CHGVS_Coordinate_Set& coords) const;
IsValid()99 bool IsValid() { return m_mrna || m_cds; }
100
101 // Specific to feature/alignment based mRNA/CDS group
102 void SetGene(CScope& scope, const CSeq_feat& feat);
103 void SetMrna(CScope& scope, const CSeq_feat& feat);
104 void SetCds(CScope& scope, const CSeq_feat& feat);
105 void SetAlignment(CScope& scope, const CSeq_align& align);
106 // ??? is it specific, or can be made generic
GetTranscriptId()107 CConstRef<CSeq_id> GetTranscriptId() { return m_transcript_id; }
108
109 private:
110 CConstRef<CSeq_feat> m_mrna; /// mRNA feature on main sequence (genomic or transcript)
111 CConstRef<CSeq_feat> m_cds; /// CDS feature on main sequence
112 CConstRef<CSeq_feat> m_rna_cds; /// CDS feature on transcript, where main is genomic
113 TSeqPos m_cdr_length;
114 bool m_cds_offset_set;
115 TSeqPos m_cds_offset;
116 bool m_mrna_plus_strand;
117 CConstRef<CSeq_align> m_mrna_align;
118 CRef<CSeq_loc_Mapper> m_mrna_mapper;
119 // Some features occur on many kinds of sequences, so
120 // we have an alias m_main_id for such cases
121 CConstRef<CSeq_id> m_main_id;
122 CConstRef<CSeq_id> m_genomic_id;
123 CConstRef<CSeq_id> m_transcript_id;
124 CConstRef<CSeq_id> m_prot_id;
125 string m_locus;
126
127 void x_SetId(CScope& scope, const CSeq_id& seq_id);
128 void x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat);
129 void x_SetRnaCds(CScope& scope, const CSeq_feat& cds);
130 void x_SetMrnaMapper(CScope& scope, const CSeq_feat& feat);
131
132 CRef<CSeq_loc_Mapper> x_GetCdsMapper(CScope& scope, const CSeq_feat& cds) const;
133 TSignedSeqPos x_GetAdjustment(TSeqPos pos) const;
134 bool x_CheckExonGap(TSeqPos pos) const;
135 TSeqPos x_GetCDSOffset() const;
136 CRef<CHGVS_Coordinate> x_NewCoordinate(CScope& scope, const CSeq_id* id, TSeqPos pos) const;
137 void x_SetSequence(CHGVS_Coordinate& ref, CScope& scope, const CSeq_id* id,
138 TSignedSeqPos pos, TSignedSeqPos adjustment=0, bool plus_strand=true) const;
139 void x_SetHgvs(CHGVS_Coordinate& ref, const char* prefix, TSignedSeqPos pos,
140 TSignedSeqPos adjustment = 0, bool utr3_tail = false,
141 bool in_gap = false) const;
142 bool x_MapPos(const CSeq_loc_Mapper& mapper,
143 const CSeq_id& id, TSeqPos pos,
144 TSeqPos& mapped_pos) const;
145 void x_CalculateUTRAdjustments(TSignedSeqPos& feature_pos,
146 TSignedSeqPos& seq_pos,
147 TSignedSeqPos& adjustment,
148 bool& utr3_tail) const;
149 void x_GetRCoordinate(CScope& scope, TSeqPos pos,
150 CHGVS_Coordinate_Set& coords) const;
151 void x_GetCCoordinate(CScope& scope, TSeqPos pos, TSignedSeqPos adj,
152 CHGVS_Coordinate_Set& coords) const;
153 void x_GetPCoordinate(CScope& scope, TSeqPos pos,
154 CHGVS_Coordinate_Set& coords) const;
155 };
156
157
158
CReportEntry(CScope & scope,const CSeq_id & seq_id,feature::CFeatTree & feat_tree,const CMappedFeat & root_feat)159 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id,
160 feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
161 : m_cdr_length(0), m_cds_offset_set(false)
162 {
163 x_SetId(scope, seq_id);
164 x_SetFeature(scope, feat_tree, root_feat);
165 }
166
167
CReportEntry(CScope & scope,const CSeq_id & seq_id,const CMappedFeat & gene,feature::CFeatTree & feat_tree,const CMappedFeat & root_feat)168 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CMappedFeat& gene,
169 feature::CFeatTree& feat_tree, const CMappedFeat& root_feat)
170 : m_cdr_length(0), m_cds_offset_set(false)
171 {
172 x_SetId(scope, seq_id);
173 SetGene(scope, gene.GetMappedFeature());
174 x_SetFeature(scope, feat_tree, root_feat);
175 }
176
177
CReportEntry(CScope & scope,const CSeq_id & seq_id,const CSeq_align & align)178 CReportEntry::CReportEntry(CScope& scope, const CSeq_id& seq_id, const CSeq_align& align)
179 {
180 x_SetId(scope, seq_id);
181 SetAlignment(scope, align);
182 }
183
184
x_SetId(CScope & scope,const CSeq_id & seq_id)185 void CReportEntry::x_SetId(CScope& scope, const CSeq_id& seq_id)
186 {
187 m_main_id.Reset(&seq_id);
188 Uint4 type_flags = GetSequenceType(scope.GetBioseqHandle(seq_id));
189 /// check for protein
190 if (type_flags & CSeq_id::fAcc_prot) {
191 m_prot_id.Reset(&seq_id);
192 }
193 /// check for mRNA
194 else if (type_flags & CSeq_id::eAcc_mrna) {
195 m_transcript_id.Reset(&seq_id);
196 }
197 /// check for genomic
198 else if (type_flags & CSeq_id::fAcc_genomic) {
199 m_genomic_id.Reset(&seq_id);
200 }
201 }
202
203
x_SetFeature(CScope & scope,feature::CFeatTree & feat_tree,const CMappedFeat & feat)204 void CReportEntry::x_SetFeature(CScope& scope, feature::CFeatTree& feat_tree, const CMappedFeat& feat)
205 {
206 const CSeq_feat& seq_feat = feat.GetMappedFeature();
207 CSeqFeatData::ESubtype subtype =
208 seq_feat.GetData().GetSubtype();
209
210 switch (subtype) {
211 // The following case is useless, handled in different manner in calling code
212 case CSeqFeatData::eSubtype_gene:
213 SetGene(scope, seq_feat);
214 {{
215 vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
216 ITERATE (vector<CMappedFeat>, iter, cc) {
217 x_SetFeature(scope, feat_tree, *iter);
218 }
219 }}
220 break;
221 case CSeqFeatData::eSubtype_mRNA:
222 SetMrna(scope, seq_feat);
223 if (feat.IsSetProduct()) {
224 CBioseq_Handle product_bsh =scope.GetBioseqHandle(feat.GetProduct());
225 if (product_bsh) {
226 SAnnotSelector sel = x_GetAnnotSelector();
227 sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_cdregion);
228 for (CFeat_CI feat_iter(product_bsh, sel); feat_iter; ++feat_iter)
229 {
230 x_SetRnaCds(scope, feat_iter->GetMappedFeature());
231 }
232 }
233 } else {
234 vector<CMappedFeat> cc = feat_tree.GetChildren(feat);
235 // TODO: We don't handle multiple CDS per mRNA. Do they actually happen?
236 ITERATE (vector<CMappedFeat>, iter, cc) {
237 const CSeq_feat& seq_feat = iter->GetMappedFeature();
238 if (seq_feat.GetData().GetSubtype() ==
239 CSeqFeatData::eSubtype_cdregion)
240 {
241 SetCds(scope, seq_feat);
242 break;
243 }
244 }
245 }
246 break;
247 case CSeqFeatData::eSubtype_cdregion:
248 SetCds(scope, seq_feat);
249 break;
250 default:
251 break;
252 }
253 }
254
255
GetCoordinates(CScope & scope,Uint4 type_flags,TSeqPos pos,CHGVS_Coordinate_Set & coords) const256 void CReportEntry::GetCoordinates(CScope& scope,
257 Uint4 type_flags,
258 TSeqPos pos,
259 CHGVS_Coordinate_Set& coords) const
260 {
261 // Fill out g. r. c. and p. coordinates based on the entry and type of
262 // original sequence
263 if (type_flags & CSeq_id::fAcc_prot) {
264 // do nothing
265 return;
266 }
267 // TODO: handle component here in the future
268
269 // Calculate adjustment to the nearest exon, 0 if we are in exon
270 TSignedSeqPos adjustment = x_GetAdjustment(pos);
271 // check for genomic
272 if (type_flags & CSeq_id::fAcc_genomic) {
273 // r. c. and p. - r. here, c. and p. same as for mRNA
274 if (!adjustment)
275 x_GetRCoordinate(scope, pos, coords);
276 }
277 // check for mRNA, effectively union of cases with genomic
278 if (type_flags & (CSeq_id::fAcc_genomic | CSeq_id::eAcc_mrna)) {
279 // c. and p.
280 x_GetCCoordinate(scope, pos, adjustment, coords);
281 if (!adjustment)
282 x_GetPCoordinate(scope, pos, coords);
283 }
284 }
285
286
x_GetAdjustment(TSeqPos pos) const287 TSignedSeqPos CReportEntry::x_GetAdjustment(TSeqPos pos) const
288 {
289 // Calculate minimal distance to the exon boundary
290 if (!m_mrna_mapper) return 0;
291 //bool plus_strand = true;
292 if (m_mrna_align) {
293 // TODO: Use alignment, take into account exons
294 // const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
295 // ??? what should we do if we're in exon gap? How to handle exon overlap
296 // (ambiguous positions in an exon)
297 }
298 TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
299 TSignedSeqPos adj = 0;
300 const CMappingRanges& ranges = m_mrna_mapper->GetMappingRanges();
301 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*m_main_id);
302 CMappingRanges::TRangeIterator rg_it = ranges.BeginMappingRanges(
303 idh,
304 CMappingRanges::TRange::GetWhole().GetFrom(),
305 CMappingRanges::TRange::GetWhole().GetTo());
306 for ( ; rg_it; ++rg_it) {
307 const CMappingRange& cvt = *rg_it->second;
308 TSignedSeqPos f = cvt.GetSrc_from();
309 TSignedSeqPos t = f + cvt.GetLength();
310 // TODO: Is it reliable enough to test for mRNA strand?
311 // plus_strand = !cvt.GetReverse();
312 // LOG_POST("Pos " << pos << " from " << f << " to " << t);
313 int adj_for_intron;
314 if (signed_pos < f) {
315 adj_for_intron = signed_pos - f;
316 } else if (signed_pos > t) {
317 adj_for_intron = signed_pos - t;
318 } else {
319 return 0; // no adjustment needed
320 }
321 if (adj == 0 || std::abs(adj_for_intron) < std::abs(adj)) {
322 adj = adj_for_intron;
323 }
324 }
325 // LOG_POST("Adjustment calculated " << adj);
326 return m_mrna_plus_strand ? adj : -adj;
327 }
328
329
x_CheckExonGap(TSeqPos pos) const330 bool CReportEntry::x_CheckExonGap(TSeqPos pos) const
331 {
332 if (!m_mrna_align) return false;
333 // Use alignment, take into account exons
334 const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
335 ITERATE (CSpliced_seg::TExons, exon_it, seg.GetExons()) {
336 const CSpliced_exon& exon = **exon_it;
337 if (!exon.IsSetGenomic_start() || pos < exon.GetGenomic_start() ||
338 !exon.IsSetGenomic_end() || pos > exon.GetGenomic_end() ||
339 !exon.IsSetParts()
340 ) {
341 continue;
342 }
343 TSeqPos exon_pos = m_mrna_plus_strand ?
344 pos - exon.GetGenomic_start() :
345 exon.GetGenomic_end() - pos;
346 ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
347 TSeqPos l = 0;
348 bool in_gap = false;
349 const CSpliced_exon_chunk& part = **part_it;
350 if (part.IsMatch()) {
351 l = part.GetMatch();
352 } else if (part.IsMismatch()) {
353 l = part.GetMismatch();
354 } else if (part.IsDiag()) {
355 l = part.GetDiag();
356 } else if (part.IsProduct_ins()) {
357 l = 0; // redundant
358 } else if (part.IsGenomic_ins()) {
359 l = part.GetGenomic_ins();
360 in_gap = true;
361 }
362 if (l > exon_pos) return in_gap;
363 exon_pos -= l;
364 }
365 }
366 return false;
367 }
368
369
x_GetCDSOffset() const370 TSeqPos CReportEntry::x_GetCDSOffset() const
371 {
372 return m_cds_offset_set ? m_cds_offset : 0;
373 }
374
375
x_NewCoordinate(CScope & scope,const CSeq_id * id,TSeqPos pos) const376 CRef<CHGVS_Coordinate> CReportEntry::x_NewCoordinate(CScope& scope, const CSeq_id* id, TSeqPos pos) const
377 {
378 // get the title (best id)
379 string title;
380 if (id) {
381 CSeq_id_Handle idh = sequence::GetId(*id, scope,
382 sequence::eGetId_Best);
383 idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
384 } else if (!m_locus.empty()) {
385 title = m_locus;
386 }
387 CRef<CHGVS_Coordinate> ref(new CHGVS_Coordinate());
388 ref->SetTitle(title);
389 ref->SetObject_id("");
390 ref->SetMarker_pos(pos);
391 ref->SetSequence("");
392 ref->SetHgvs_position("");
393 return ref;
394 }
395
396
x_SetSequence(CHGVS_Coordinate & ref,CScope & scope,const CSeq_id * id,TSignedSeqPos pos,TSignedSeqPos adjustment,bool plus_strand) const397 void CReportEntry::x_SetSequence(CHGVS_Coordinate& ref, CScope& scope, const CSeq_id* id,
398 TSignedSeqPos pos, TSignedSeqPos adjustment, bool plus_strand) const
399 {
400 if (!id) return;
401 CBioseq_Handle prod_bsh =
402 scope.GetBioseqHandle(*id);
403 CSeqVector prod_vec(prod_bsh, CBioseq_Handle::eCoding_Iupac,
404 plus_strand ? eNa_strand_plus : eNa_strand_minus);
405 ref.SetSequence( x_GetSequence(prod_vec, plus_strand ? pos : prod_vec.size() - pos - 1, adjustment) );
406 }
407
408
x_SetHgvs(CHGVS_Coordinate & ref,const char * prefix,TSignedSeqPos pos,TSignedSeqPos adjustment,bool utr3_tail,bool in_gap) const409 void CReportEntry::x_SetHgvs(CHGVS_Coordinate& ref, const char* prefix, TSignedSeqPos pos,
410 TSignedSeqPos adjustment, bool utr3_tail,
411 bool in_gap) const
412 {
413 string hgvs(prefix);
414 if (in_gap) {
415 TSignedSeqPos second_pos;
416 if (adjustment > 0) {
417 second_pos = pos + 1;
418 } else {
419 second_pos = pos;
420 pos -= 1;
421 }
422 hgvs += "(" + NStr::IntToString(pos+1) + "_" + NStr::IntToString(second_pos+1) + ")";
423 ref.SetHgvs_position(hgvs);
424 return;
425 }
426 TSignedSeqPos pos_1_based =
427 pos >= 0 ? pos + 1 : pos;
428 if (adjustment == 0 || !utr3_tail) {
429 hgvs += NStr::IntToString(pos_1_based);
430 }
431 if (adjustment != 0) {
432 if (utr3_tail) {
433 hgvs += '*';
434 } else if (adjustment > 0) {
435 hgvs += '+';
436 }
437 hgvs += NStr::IntToString(adjustment);
438 }
439 ref.SetHgvs_position(hgvs);
440 }
441
442
x_MapPos(const CSeq_loc_Mapper & mapper,const CSeq_id & id,TSeqPos pos,TSeqPos & mapped_pos) const443 bool CReportEntry::x_MapPos(const CSeq_loc_Mapper& mapper,
444 const CSeq_id& id, TSeqPos pos,
445 TSeqPos& mapped_pos) const
446 {
447 // fake location for seq_mapper
448 CRef<CSeq_loc> fake_loc(new CSeq_loc());
449 fake_loc->SetPnt().SetPoint(pos);
450 fake_loc->SetId(id);
451
452 // Get the feature position
453 CRef<CSeq_loc> mapped_loc;
454 // Due to defect in definition of Map (non-const) we need to use coercion
455 mapped_loc = const_cast<CSeq_loc_Mapper*>(&mapper)->Map(fake_loc.GetObject());
456 if (mapped_loc->Which() == CSeq_loc::e_Null) {
457 ERR_POST(Warning << "loc mapping did not work");
458 return false;
459 }
460 mapped_pos = mapped_loc->GetPnt().GetPoint();
461 return true;
462 }
463
464
x_CalculateUTRAdjustments(TSignedSeqPos & feature_pos,TSignedSeqPos & seq_pos,TSignedSeqPos & adjustment,bool & utr3_tail) const465 void CReportEntry::x_CalculateUTRAdjustments(TSignedSeqPos& feature_pos,
466 TSignedSeqPos& seq_pos,
467 TSignedSeqPos& adjustment,
468 bool& utr3_tail) const
469 {
470 utr3_tail = false;
471 // To calculate whether we are in UTR 3' region we need to know if our
472 // unadjusted position would map beyond the coding feature length.
473 // There are two cases:
474 // If the right flank is not zero, even adjusted position would map
475 // after cdr_length.
476 // The second one is worse - its adjusted pos always maps to the last
477 // valid position of last exon. For this case we check it and add
478 // adjustment to be used in UTR 3' test.
479 TSignedSeqPos utr3_adjusted_pos = feature_pos;
480 if (m_cdr_length && feature_pos >= TSignedSeqPos(m_cdr_length) - 1) {
481 utr3_adjusted_pos += adjustment;
482 }
483 // If position is out of bounds of the feature, we just add
484 // adjustment so it does not matter is it in intron or not.
485 // According to Alex Astashin, if the position is inside mRNA,
486 // it is reported in detail, so we check seq_pos, not feature_pos
487 if (seq_pos <= 0) {
488 feature_pos += adjustment;
489 seq_pos += adjustment;
490 adjustment = 0;
491 } else
492 if (m_cdr_length && utr3_adjusted_pos >= TSignedSeqPos(m_cdr_length)) {
493 feature_pos += adjustment;
494 seq_pos += adjustment;
495 // Adjustment signifies the offset from the stop codon into 3' UTR
496 adjustment = feature_pos - m_cdr_length + 1;
497 utr3_tail = true;
498 feature_pos = m_cdr_length - 1;
499 }
500 }
501
502
x_GetRCoordinate(CScope & scope,TSeqPos pos,CHGVS_Coordinate_Set & coords) const503 void CReportEntry::x_GetRCoordinate(CScope& scope, TSeqPos pos,
504 CHGVS_Coordinate_Set& coords) const
505 {
506 CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_transcript_id, pos));
507
508 if (!m_mrna_mapper) {
509 ERR_POST(Warning << "mRNA mapper is empty");
510 return;
511 }
512 TSeqPos mapped_pos;
513 if (x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos)) {
514 ref->SetPos_mapped(mapped_pos);
515 x_SetHgvs(*ref, "r.", mapped_pos);
516 if (m_transcript_id) {
517 x_SetSequence(*ref, scope, m_transcript_id, mapped_pos);
518 } else if (m_genomic_id) {
519 // Product-less mRNA feature - we're faking it a bit using
520 // original sequence and original, not mapped position and
521 // appropriate strand. Same trick works for c. coordinate below
522 x_SetSequence(*ref, scope, m_genomic_id, pos, 0, m_mrna_plus_strand);
523 }
524 coords.Set().push_back(ref);
525 }
526 }
527
528
x_GetCCoordinate(CScope & scope,TSeqPos pos,TSignedSeqPos adjustment,CHGVS_Coordinate_Set & coords) const529 void CReportEntry::x_GetCCoordinate(CScope& scope, TSeqPos pos, TSignedSeqPos adjustment,
530 CHGVS_Coordinate_Set& coords) const
531 {
532 if (!m_cds && !m_rna_cds) return;
533 CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_transcript_id, pos));
534 bool mapped = false;
535
536 // adjusted_pos is for safely mapping the coordinate, it is guaranteed to be in
537 // exon, and even if the exon has a gap, not in the gap.
538 // adjustment is opposite the product, that is why we need to know
539 // the strand of the feature to find adjustment to mappable position
540 TSeqPos adjusted_pos = pos + (m_mrna_plus_strand ? -adjustment : adjustment);
541
542 TSignedSeqPos mapped_pos;
543 TSeqPos seq_pos;
544 TSeqPos offset = 0;
545 bool utr3_tail = false;
546 bool in_gap = false;
547 if (m_mrna_mapper) {
548 // To use mRNA mapper to map to CDS coordinate we need to calculate
549 // CDS to mRNA offset here and use it to fix mapped coordinate
550 offset = x_GetCDSOffset();
551 if (x_MapPos(*m_mrna_mapper, *m_main_id, adjusted_pos, seq_pos)) {
552 in_gap = x_CheckExonGap(pos);
553 mapped_pos = seq_pos - offset;
554 TSignedSeqPos utr_adjusted_pos = seq_pos;
555 x_CalculateUTRAdjustments(mapped_pos, utr_adjusted_pos, adjustment, utr3_tail);
556 if (m_transcript_id) {
557 x_SetSequence(*ref, scope, m_transcript_id, utr_adjusted_pos, adjustment);
558 } else if (m_genomic_id) {
559 // Product-less mRNA feature - see comment for r. coordinate
560 x_SetSequence(*ref, scope, m_genomic_id, pos, adjustment, m_mrna_plus_strand);
561 }
562 mapped = true;
563 }
564 } else if (m_cds) { // This condition is trivially true, but stays here to denote the case
565 // CDS feature only - map through fake product
566 CRef<CSeq_loc> fake_product(new CSeq_loc);
567 fake_product->SetWhole().Assign(*m_main_id);
568 CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper
569 (m_cds->GetLocation(),
570 *fake_product,
571 &scope));
572 if (x_MapPos(*mapper, *m_main_id, adjusted_pos, seq_pos)) {
573 mapped_pos = seq_pos;
574 // Sequence coming from original id, use original pos
575 x_SetSequence(*ref, scope, m_main_id, pos, adjustment);
576 mapped = true;
577 }
578 } else {
579 // Sorry, not enough info to calculate the coordinate
580 return;
581 }
582 x_SetHgvs(*ref, "c.", mapped_pos, adjustment, utr3_tail, in_gap);
583
584 if (!adjustment) {
585 ref->SetPos_mapped(mapped_pos);
586 }
587 if (mapped) coords.Set().push_back(ref);
588 }
589
590
x_GetPCoordinate(CScope & scope,TSeqPos pos,CHGVS_Coordinate_Set & coords) const591 void CReportEntry::x_GetPCoordinate(CScope& scope, TSeqPos pos,
592 CHGVS_Coordinate_Set& coords) const
593 {
594 TSeqPos mapped_pos = pos;
595 if (m_rna_cds && m_mrna_mapper) {
596 // We have both DNA to RNA and RNA to protein mappings, so map 2 times
597 if (!x_MapPos(*m_mrna_mapper, *m_genomic_id, pos, mapped_pos))
598 return;
599 if (!x_MapPos(*x_GetCdsMapper(scope, *m_rna_cds), *m_transcript_id, mapped_pos, mapped_pos))
600 return;
601 } else if (m_cds) {
602 if (!x_MapPos(*x_GetCdsMapper(scope, *m_cds), *m_main_id, pos, mapped_pos))
603 return;
604 } else {
605 return;
606 }
607 CRef<CHGVS_Coordinate> ref(x_NewCoordinate(scope, m_prot_id, pos));
608 ref->SetPos_mapped(mapped_pos);
609 x_SetHgvs(*ref, "p.", mapped_pos);
610 x_SetSequence(*ref, scope, m_prot_id, mapped_pos);
611 coords.Set().push_back(ref);
612 }
613
614
SetGene(CScope & scope,const CSeq_feat & feat)615 void CReportEntry::SetGene(CScope& scope, const CSeq_feat& feat)
616 {
617 if (feat.GetData().GetGene().IsSetLocus_tag()) {
618 m_locus = feat.GetData().GetGene().GetLocus_tag();
619 } else if (feat.GetData().GetGene().IsSetLocus()) {
620 m_locus = feat.GetData().GetGene().GetLocus();
621 }
622 // CLabel::GetLabel(feat, &m_locus, CLabel::eContent, &scope);
623 }
624
625
SetMrna(CScope & scope,const CSeq_feat & feat)626 void CReportEntry::SetMrna(CScope& scope, const CSeq_feat& feat)
627 {
628 m_mrna.Reset(&feat);
629 if (feat.IsSetProduct())
630 // mRNA feature's product always points to transcript
631 m_transcript_id.Reset(feat.GetProduct().GetId());
632 m_mrna_plus_strand =
633 sequence::GetStrand(feat.GetLocation()) != eNa_strand_minus;
634 x_SetMrnaMapper(scope, feat);
635 }
636
637
SetCds(CScope & scope,const CSeq_feat & cds)638 void CReportEntry::SetCds(CScope& scope, const CSeq_feat& cds)
639 {
640 m_cds.Reset(&cds);
641 // if (!m_mrna) m_main_id.Reset(cds.GetLocation().GetId());
642 m_prot_id.Reset(cds.GetProduct().GetId());
643 if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
644 m_cdr_length = sequence::GetLength(cds.GetLocation(), &scope);
645 }
646
647 // Prefer direct calculation (see x_SetRnaCds) and check
648 // that mRNA is already set
649 if (m_cds_offset_set || !m_mrna) return;
650 // Offset is inferred given the
651 // locations of the mRNA and CDS feature
652 CSeq_loc_CI loc_iter(m_mrna->GetLocation());
653 TSeqPos start = cds.GetLocation().GetStart(eExtreme_Biological);
654 ENa_strand strand = cds.GetLocation().GetStrand();
655 TSeqRange r(start, start);
656 TSeqPos frame = 0;
657 if (cds.GetData().GetCdregion().IsSetFrame()) {
658 frame = cds.GetData().GetCdregion().GetFrame();
659 if (frame) {
660 frame -= 1;
661 }
662 }
663
664 TSeqPos offset_acc = 0;
665 for (; loc_iter; ++loc_iter) {
666 if (loc_iter.GetRange()
667 .IntersectingWith(r)) {
668 if (strand == eNa_strand_minus) {
669 m_cds_offset = offset_acc +
670 loc_iter.GetRange().GetTo() - start - frame;
671 } else {
672 m_cds_offset = offset_acc +
673 start - loc_iter.GetRange().GetFrom() + frame;
674 }
675 m_cds_offset_set = true;
676 break;
677 }
678 offset_acc += loc_iter.GetRange().GetLength();
679 }
680 }
681
682
x_SetRnaCds(CScope & scope,const CSeq_feat & cds)683 void CReportEntry::x_SetRnaCds(CScope& scope, const CSeq_feat& cds)
684 {
685 m_rna_cds.Reset(&cds);
686 m_prot_id.Reset(cds.GetProduct().GetId());
687 if (!cds.GetLocation().IsPnt() && (!cds.IsSetPartial() || !cds.GetPartial())) {
688 m_cdr_length = sequence::GetLength(cds.GetLocation(), &scope);
689 }
690 // Offset is determined by the
691 // placement of a CDS feature
692 // directly on the RNA sequence
693 m_cds_offset = cds.GetLocation().GetTotalRange().GetFrom();
694 if (cds.GetData().GetCdregion().IsSetFrame()) {
695 TSeqPos frame = cds.GetData().GetCdregion().GetFrame();
696 if (frame) {
697 m_cds_offset += frame - 1;
698 }
699 }
700 m_cds_offset_set = true;
701 }
702
703
x_SetMrnaMapper(CScope & scope,const CSeq_feat & feat)704 void CReportEntry::x_SetMrnaMapper(CScope& scope, const CSeq_feat& feat)
705 {
706 if (feat.IsSetProduct()) {
707 m_mrna_mapper.Reset(new CSeq_loc_Mapper
708 (feat,
709 CSeq_loc_Mapper::eLocationToProduct,
710 &scope));
711 } else {
712 CRef<CSeq_loc> fake_product(new CSeq_loc);
713 fake_product->SetWhole().Assign(*m_main_id);
714 m_mrna_mapper.Reset(new CSeq_loc_Mapper
715 (feat.GetLocation(),
716 *fake_product,
717 &scope));
718 }
719 }
720
721
SetAlignment(CScope & scope,const CSeq_align & align)722 void CReportEntry::SetAlignment(CScope& scope, const CSeq_align& align)
723 {
724 // cerr << MSerial_AsnText << align;
725
726 if (align.GetSegs().Which() != CSeq_align::TSegs::e_Spliced)
727 return;
728
729 m_main_id.Reset(&align.GetSeq_id(1));
730 m_genomic_id.Reset(&align.GetSeq_id(1));
731 m_transcript_id.Reset(&align.GetSeq_id(0));
732
733 m_mrna_mapper.Reset(new CSeq_loc_Mapper
734 // Work-around for CXX-1555 - pass id of the sequence as a reliable way
735 // to indicate desirable direction of the mapping.
736 // (*align, 0, scope));
737 (align, align.GetSeq_id(0), &scope));
738 m_mrna_align.Reset(&align);
739 const CSpliced_seg& seg = m_mrna_align->GetSegs().GetSpliced();
740 m_mrna_plus_strand = !(seg.IsSetGenomic_strand() && seg.GetGenomic_strand() == eNa_strand_minus);
741
742 if (!m_cds) {
743 // There is no CDS feature because this entry is derived from
744 // alignment. Try to use CDS feature if any on target mRNA
745 CBioseq_Handle product_bsh =
746 scope.GetBioseqHandle
747 (align.GetSeq_id(0));
748 if (product_bsh) {
749 //
750 // our offset is determined by the
751 // placement of a CDS feature
752 // directly on the product RNA sequence
753 //
754 // LOG_POST("Deriving CDS mapping from alignment");
755 SAnnotSelector sel = x_GetAnnotSelector();
756 sel.IncludeFeatSubtype(CSeqFeatData::eSubtype_cdregion);
757 // TODO: ??? Now we prefer LAST of CDSs for a given RNA. Can there be
758 // more than one, and what is the meaning for it?
759 for (CFeat_CI feat_iter(product_bsh, sel);
760 feat_iter; ++feat_iter)
761 {
762 x_SetRnaCds(scope, feat_iter->GetMappedFeature());
763 }
764 }
765 } else {
766 // Recalculate CDS offset using the alignment
767 // Check that first exon is not partial
768 const CSpliced_exon &first_exon = *(seg.GetExons().front());
769 if (!(first_exon.IsSetPartial() && first_exon.GetPartial())) {
770 TSeqPos cds_start_on_dna = m_cds->GetLocation().GetStart(eExtreme_Biological);
771 ENa_strand strand = m_cds->GetLocation().GetStrand();
772 TSeqPos rna_start_on_dna;
773 if (strand == eNa_strand_minus) {
774 rna_start_on_dna = seg.GetSeqStop(1);
775 } else {
776 rna_start_on_dna = seg.GetSeqStart(1);
777 }
778 TSeqPos rna_start, cds_start;
779 x_MapPos(*m_mrna_mapper, *m_main_id, rna_start_on_dna, rna_start);
780 x_MapPos(*m_mrna_mapper, *m_main_id, cds_start_on_dna, cds_start);
781 m_cds_offset = cds_start - rna_start;
782 m_cds_offset_set = true;
783 }
784 }
785 }
786
787
x_GetCdsMapper(CScope & scope,const CSeq_feat & cds) const788 CRef<CSeq_loc_Mapper> CReportEntry::x_GetCdsMapper(CScope& scope, const CSeq_feat& cds) const
789 {
790 CRef<CSeq_loc_Mapper> cds_mapper(new CSeq_loc_Mapper
791 (cds,
792 CSeq_loc_Mapper::eLocationToProduct,
793 &scope));
794 return cds_mapper;
795 }
796
797
798 typedef vector<CRef<CReportEntry> > TReportEntryList;
799
800 static
801 TReportEntryList s_GetFeaturesForRange(CScope& scope, const CBioseq_Handle& bsh, TSeqRange search_range);
802 static
803 void s_ExtendEntriesFromAlignments(CScope& scope, const CBioseq_Handle& bsh, TSeqRange search_range, TReportEntryList& entries);
804 static
805 bool s_IsRefSeqGene(const CBioseq_Handle& bsh);
806
807
808 /////////////////////////////////////////////////////////////////////////////
809 // CObjectCoords::
810 //
811
CObjectCoords(CScope & scope)812 CObjectCoords::CObjectCoords(CScope& scope) :
813 m_Scope(&scope)
814 {
815 }
816
817
GetCoordinates(const CSeq_id & id,TSeqPos pos,TSeqPos window)818 CRef<CHGVS_Coordinate_Set> CObjectCoords::GetCoordinates(const CSeq_id& id, TSeqPos pos, TSeqPos window)
819 {
820 CRef<CHGVS_Coordinate_Set> coords(new CHGVS_Coordinate_Set);
821 GetCoordinates(*coords, id, pos, window);
822 return coords;
823 }
824
825 void
GetCoordinates(CHGVS_Coordinate_Set & coords,const CSeq_id & id,TSeqPos pos,TSeqPos window)826 CObjectCoords::GetCoordinates(CHGVS_Coordinate_Set& coords, const CSeq_id& id, TSeqPos pos, TSeqPos window)
827 {
828 CBioseq_Handle bsh = m_Scope->GetBioseqHandle(id);
829 if ( !bsh ) {
830 NCBI_THROW(CException, eUnknown,
831 "failed to retrieve sequence: " + id.AsFastaString());
832 }
833 CSeqVector seq_vec =
834 bsh.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
835
836 string title;
837 CSeq_id_Handle idh = sequence::GetId(id, *m_Scope, sequence::eGetId_Best);
838 idh.GetSeqId()->GetLabel(&title, CSeq_id::eContent);
839
840 // First entry is for the gi itself
841 CRef<CHGVS_Coordinate> ref(new CHGVS_Coordinate());
842
843 // identify sequence
844 Uint4 type_flags = GetSequenceType(bsh);
845 string hgvs;
846
847 /// check for protein
848 if (type_flags & CSeq_id::fAcc_prot) {
849 hgvs = "p.";
850 }
851 /// check for mRNA
852 else if (type_flags & CSeq_id::eAcc_mrna) {
853 hgvs = "r.";
854 }
855 /// check for genomic
856 else if (type_flags & CSeq_id::fAcc_genomic) {
857 hgvs = "g.";
858 }
859
860 // Get Main sequence
861 ref->SetTitle(title);
862 ref->SetHgvs_position( hgvs + NStr::IntToString(pos + 1) );
863 ref->SetSequence( x_GetSequence(seq_vec, pos) );
864 ref->SetMarker_pos(pos);
865 ref->SetPos_mapped(pos);
866
867 coords.Set().push_back(ref);
868
869 // Get all RNAs and CDSs in the +-2k (default) vicinity of the
870 // position in question and build a CFeatTree.
871 // Main cases are:
872 // 1. Genomic sequence. Tree will contain many mRNA->CDS chains
873 // E.g. NT_011515 pos 1585115 (alignment present)
874 // NC_000007.13 pos 65338429 (no alignment)
875 // NC_000019.9 pos 45016991 - forward ribosomal slippage
876 // NC_000007.13 v 94283650:94301032 - back ribosomal slippage,
877 // encoded through weird mRNA/CDS relation
878 // NG_005895.1 - RefSeq Gene, features through alignment only
879 // 1a. mRNA can be w/o product (GenBank DNA)
880 // E.g. BX571818.3
881 // 1b. No mRNA
882 // E.g. NC_000913.2 pos 2282398
883 // 2. Transcript. There will be CDSs alone
884 // E.g. NM_002020
885 // 2a. There can be mRNA w/o product (GenBank mRNA)
886 // E.g. M11313
887 // 2b. No CDS
888 // E.g. NR_002767
889 // 3. Protein sequence
890 // Overall procedure is following:
891 // Get all pairwise alignments for position into map by product id.
892 // Sort all top-level into ReportEntries, adding alignment for mRNA
893 // products from the map (removing from map).
894 // Convert all remaning alignment to ReportEntries
895 // Get all components for position into GeneInfos
896 // Process all ReportEntries getting r., c., and p. (g. for components)
897 TSeqPos min_pos = pos >= window ? pos - window : pos;
898 TSeqPos max_pos = pos + window;
899 TSeqRange search_range(min_pos, max_pos);
900 // Per SV-487, if we deal with RefSeq Gene record only use
901 // alignments to get features
902 bool is_ref_seq_gene = s_IsRefSeqGene(bsh);
903 // is_ref_seq_gene = false; // DEBUG
904 TReportEntryList entries;
905 if (!is_ref_seq_gene) entries = s_GetFeaturesForRange(*m_Scope, bsh, search_range);
906 s_ExtendEntriesFromAlignments(*m_Scope, bsh, search_range, entries);
907 ///
908 /// now, evaluate each feature
909 /// we may need to inject an artificial mRNA feature to capture
910 /// the 'c.' coordinate.
911 ///
912 ITERATE(TReportEntryList, iter, entries) {
913 (*iter)->GetCoordinates(
914 *m_Scope,
915 type_flags,
916 pos,
917 coords);
918
919 }
920 }
921
922
923 static
s_IsRefSeqGene(const CBioseq_Handle & bsh)924 bool s_IsRefSeqGene(const CBioseq_Handle& bsh)
925 {
926 // TODO: Use CSeq_descr_CI on bhs itself here
927 bool is_ref_seq_gene = false;
928 CSeq_entry_Handle seh = bsh.GetParentEntry();
929 if (seh.IsSetDescr()) {
930 const CSeq_descr& descr = seh.GetDescr();
931 CSeq_descr::Tdata descr_list = descr.Get();
932 ITERATE(CSeq_descr::Tdata, it, descr_list) {
933 if (it->GetObject().IsGenbank()) {
934 const CSeqdesc::TGenbank& gb = it->GetObject().GetGenbank();
935 if (gb.IsSetKeywords()) {
936 ITERATE(list<string>, it1, gb.GetKeywords()) {
937 if (*it1 == "RefSeqGene") {
938 is_ref_seq_gene = true;
939 break;
940 }
941 }
942 }
943 }
944 if (is_ref_seq_gene) break;
945 }
946 }
947 return is_ref_seq_gene;
948 }
949
950
951 static
s_GetFeaturesForRange(CScope & scope,const CBioseq_Handle & bsh,TSeqRange search_range)952 TReportEntryList s_GetFeaturesForRange(
953 CScope& scope,
954 const CBioseq_Handle& bsh,
955 TSeqRange search_range)
956 {
957 TReportEntryList entries;
958 // find features on the sequence
959 SAnnotSelector sel = x_GetAnnotSelector();
960 // We don't need gene record per se, only mRNA and CDS
961 sel.IncludeFeatType(CSeqFeatData::e_Gene);
962 sel.IncludeFeatType(CSeqFeatData::e_Rna);
963 sel.IncludeFeatType(CSeqFeatData::e_Cdregion);
964
965 CFeat_CI feat_iter(bsh, search_range, sel);
966 feature::CFeatTree feat_tree;
967 feat_tree.AddFeatures(feat_iter);
968
969 /*
970 Here we have following structures:
971 Gene
972 |\
973 | mRNA
974 | \
975 | CDS
976 |\
977 | mRNA
978 |\
979 | CDS
980 We convert them into list of ReportEntries which hold Gene, mRNA, and CDS
981 records (possibly empty). We need to pass only hierarchy root to ReportEntry
982 constructor except in case of Gene because Gene can have multiple distinct
983 mRNA descendants.
984 */
985 vector<CMappedFeat> feat_roots =
986 feat_tree.GetChildren(CMappedFeat());
987 CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
988 ITERATE (vector<CMappedFeat>, iter, feat_roots) {
989 const CSeq_feat& seq_feat = iter->GetMappedFeature();
990 CSeqFeatData::ESubtype subtype =
991 seq_feat.GetData().GetSubtype();
992 if (subtype == CSeqFeatData::eSubtype_gene) {
993 vector<CMappedFeat> cc = feat_tree.GetChildren(*iter);
994 ITERATE (vector<CMappedFeat>, iter1, cc) {
995 CRef<CReportEntry> entry(new CReportEntry(
996 scope,
997 *seq_id,
998 *iter, // pass Gene explicitly
999 feat_tree,
1000 *iter1));
1001 if (entry->IsValid()) entries.push_back(entry);
1002 }
1003 } else {
1004 CRef<CReportEntry> entry(new CReportEntry(
1005 scope,
1006 *seq_id,
1007 feat_tree,
1008 *iter));
1009 if (entry->IsValid()) entries.push_back(entry);
1010 }
1011 }
1012 return entries;
1013 }
1014
1015
1016 static
s_ExtendEntriesFromAlignments(CScope & scope,const CBioseq_Handle & bsh,TSeqRange search_range,TReportEntryList & entries)1017 void s_ExtendEntriesFromAlignments(
1018 CScope& scope,
1019 const CBioseq_Handle& bsh,
1020 TSeqRange search_range,
1021 TReportEntryList& entries)
1022 {
1023 CConstRef<CSeq_id> seq_id(bsh.GetSeqId());
1024 TReportEntryList new_entries;
1025 SAnnotSelector sel;
1026 //sel.SetAdaptiveDepth(true);
1027 // Slow, but otherwise it can't find all relevant alignments
1028 // for composite sequences
1029 sel.SetResolveAll();
1030 for (CAlign_CI align_it(bsh, search_range, sel); align_it; ++align_it) {
1031 const CSeq_align& al = *align_it;
1032 if (al.CheckNumRows() == 2) {
1033 string label; al.GetSeq_id(0).GetLabel(&label);
1034 bool found = false;
1035 NON_CONST_ITERATE(TReportEntryList, it, entries) {
1036 CConstRef<CSeq_id> product_id((*it)->GetTranscriptId());
1037 string entry_label; product_id->GetLabel(&entry_label);
1038 // LOG_POST("Check match for alignment " + label + " with existing entry " + entry_label);
1039 if (sequence::IsSameBioseq(
1040 *product_id, al.GetSeq_id(0),
1041 &scope))
1042 {
1043 // Found exising entry - extend it with alignment
1044 (*it)->SetAlignment(scope, al);
1045 // LOG_POST("Entry for " + label + " extended by alignment");
1046 found = true;
1047 break;
1048 }
1049 }
1050 // If we did not match this alignment to an existing entry, make a new one.
1051 if (!found) {
1052 CBioseq_Handle target_bsh = scope.GetBioseqHandle(al.GetSeq_id(0));
1053 if (target_bsh && (GetSequenceType(target_bsh) & CSeq_id::eAcc_mrna)) {
1054 // Make new entry
1055 CRef<CReportEntry> entry(new CReportEntry(scope, *seq_id, al));
1056 new_entries.push_back(entry);
1057 // LOG_POST("New entry " + label + " created from alignment");
1058 }
1059 }
1060 }
1061 }
1062 entries.insert(entries.end(), new_entries.begin(), new_entries.end());
1063 }
1064
1065
1066 //static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, bool complement)
x_GetSequence(CSeqVector & seq_vec,TSignedSeqPos pos,TSignedSeqPos adjustment)1067 static string x_GetSequence(CSeqVector& seq_vec, TSignedSeqPos pos, TSignedSeqPos adjustment)
1068 {
1069 string seq_before, seq_pos, seq_after;
1070 TSignedSeqPos seq_len = seq_vec.size();
1071 TSignedSeqPos max_seq_frame = TSignedSeqPos(kSeqFrame);
1072
1073 pos += adjustment;
1074
1075 if (adjustment >=0) {
1076 TSeqPos before_from = pos <= max_seq_frame ?
1077 0 : max(0, pos - max_seq_frame);
1078 seq_vec.GetSeqData(before_from, max(0, pos), seq_before);
1079 // This filler takes into account both before and after, so we don't
1080 // need to repeat it twice. We need to compensate for very negative
1081 // position to not overfill it, so we limit overfill by 2*kSeqFrame+1
1082 // which is total length of reported sequence context.
1083 if (pos < max_seq_frame) {
1084 string filler;
1085 int max_filler_len = min(2*max_seq_frame+1, max_seq_frame - pos);
1086 for (int i = 0; i < max_filler_len; i++) filler += " ";
1087 seq_before = filler + seq_before;
1088 }
1089 if (adjustment != 0) {
1090 int fill_len = min(int(adjustment-1), int(seq_before.size()));
1091 seq_before.replace(seq_before.size()-fill_len, fill_len, fill_len, '-');
1092 }
1093 } else {
1094 for (int i = 0; i < max_seq_frame; i++) seq_before += "-";
1095 }
1096 if (adjustment == 0) {
1097 seq_vec.GetSeqData(pos, pos+1, seq_pos);
1098 } else {
1099 seq_pos = "-";
1100 }
1101 if (adjustment <= 0) {
1102 // If pos is less than -1, it starts eating into seq_after. The filler is taken
1103 // care of, now we ensure that we're not generating negative positions.
1104
1105 seq_vec.GetSeqData(max(0, pos+1),
1106 max(0, min(seq_len, pos+max_seq_frame+1)), seq_after);
1107 if (adjustment != 0) {
1108 int fill_len = min(int(-adjustment-1), int(seq_after.size()));
1109 seq_after.replace(0, fill_len, fill_len, '-');
1110 }
1111 } else {
1112 for (int i = 0; i < max_seq_frame; i++) seq_after += "-";
1113 }
1114 /*
1115 if (complement) {
1116 string comp;
1117 CSeqManip::Complement(seq_before, CSeqUtil::e_Iupacna, 0, seq_before.size(), comp);
1118 CSeqManip::Complement(seq_after, CSeqUtil::e_Iupacna, 0, seq_after.size(), seq_before);
1119 seq_before = comp;
1120 CSeqManip::Complement(seq_pos, CSeqUtil::e_Iupacna, 0, seq_pos.size(), seq_pos);
1121 }
1122 */
1123
1124 return
1125 seq_before + "<span class='xsv-seq_pos_highlight'>" +
1126 seq_pos + "</span>" + seq_after;
1127 }
1128
x_GetAnnotSelector()1129 static SAnnotSelector x_GetAnnotSelector()
1130 {
1131 #ifdef __GUI_SEQ_UTILS_DEFINED__
1132 return CSeqUtils::GetAnnotSelector();
1133 #endif
1134
1135 SAnnotSelector sel;
1136 sel
1137 // consider overlaps by total range...
1138 .SetOverlapTotalRange()
1139 // resolve all segments...
1140 .SetResolveAll()
1141 ;
1142
1143 sel.SetExcludeExternal(false);
1144
1145 ///
1146 /// known external annotations
1147 ///
1148 sel.ExcludeNamedAnnots("SNP"); /// SNPs = variation features
1149 sel.ExcludeNamedAnnots("CDD"); /// CDD = conserved domains
1150 sel.ExcludeNamedAnnots("STS"); /// STS = sequence tagged sites
1151
1152 sel.SetAdaptiveDepth(true);
1153 sel.SetResolveAll();
1154
1155 return sel;
1156 }
1157
1158 /*
1159 /////////////////////////////////////////////////////////////////////////////
1160 // Service functions
1161
1162 static
1163 TSignedSeqPos x_GetAdjustment(const CSeq_feat& feat, TSeqPos pos)
1164 {
1165 // Calculate minimal distance to the exon boundary
1166 int adj = 0;
1167 bool plus_strand =
1168 sequence::GetStrand(feat.GetLocation()) != eNa_strand_minus;
1169 TSignedSeqPos signed_pos = static_cast<TSignedSeqPos>(pos);
1170 CSeq_loc_CI loc_iter(feat.GetLocation());
1171 for ( ; loc_iter; ++loc_iter) {
1172 const TSeqRange& curr = loc_iter.GetRange();
1173 TSignedSeqPos f = curr.GetFrom();
1174 TSignedSeqPos t = curr.GetTo();
1175
1176 int adj_for_intron;
1177 if (signed_pos < f) {
1178 adj_for_intron = signed_pos - f;
1179 } else if (signed_pos > t) {
1180 adj_for_intron = signed_pos - t;
1181 } else {
1182 return 0; // no adjustment needed
1183 }
1184 if (adj == 0 || std::abs(adj_for_intron) < std::abs(adj)) {
1185 adj = adj_for_intron;
1186 }
1187 }
1188 return plus_strand ? adj : -adj;
1189 }
1190 */
1191
1192
1193 // END_NCBI_SCOPE
1194