1 /*
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Azat Badretdin
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 #include <ncbi_pch.hpp>
33 #include "read_blast_result.hpp"
getFromTo(const CSeq_loc & loc,TSeqPos & from,TSeqPos & to,ENa_strand & strand)34 void CReadBlastApp::getFromTo(const CSeq_loc& loc, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
35 {
36   if(loc.IsInt())
37     {
38     getFromTo(loc.GetInt(), from, to, strand);
39     }
40   else if (loc.IsMix())
41     {
42     getFromTo(loc.GetMix(), from, to, strand);
43     }
44   else if (loc.IsPacked_int())
45     {
46     getFromTo(loc.GetPacked_int(), from, to, strand);
47     }
48   else if (loc.IsNull())
49     {
50     NcbiCerr << "INFO: getFromTo got NULL interval" << NcbiEndl;
51     from = 0; to = 0;
52     }
53   else
54     {
55     NcbiCerr << "ERROR: getFromTo got unusual interval" << NcbiEndl;
56     from = 0; to = 0;
57     }
58 }
59 
60 
getFromTo(const CSeq_loc_mix & mix,TSeqPos & from,TSeqPos & to,ENa_strand & strand)61 void CReadBlastApp::getFromTo(const CSeq_loc_mix& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
62 {
63   from = 1<<30;
64   to   = 0;
65   ITERATE(CSeq_loc_mix::Tdata, loc, mix.Get())
66     {
67     TSeqPos from2, to2;
68     getFromTo(**loc, from2, to2, strand);
69     if(from==0 && to==0) continue;
70     if(from2<from ) from = from2;
71     if(to2>to) to = to2;
72     }
73 }
74 
getFromTo(const CPacked_seqint & mix,TSeqPos & from,TSeqPos & to,ENa_strand & strand)75 void CReadBlastApp::getFromTo(const CPacked_seqint& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
76 {
77   from = 1<<30;
78   to   = 0;
79   ITERATE(CPacked_seqint::Tdata, loc, mix.Get())
80     {
81     TSeqPos from2, to2;
82     getFromTo(**loc, from2, to2, strand);
83     if(from==0 && to==0) continue;
84     if(from2<from ) from = from2;
85     if(to2>to) to = to2;
86     }
87 }
88 
getFromTo(const CSeq_interval & inter,TSeqPos & from,TSeqPos & to,ENa_strand & strand)89 void CReadBlastApp::getFromTo(const CSeq_interval& inter, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
90 {
91   from = inter.GetFrom();
92   to   = inter.GetTo();
93   strand=eNa_strand_unknown;
94   if(inter.CanGetStrand())
95     {
96     strand = inter.GetStrand();
97     }
98 }
99 
hasGenomicLocation(const CBioseq & seq)100 bool CReadBlastApp::hasGenomicLocation(const CBioseq& seq)
101 {
102    if(PrintDetails()) NcbiCerr << "hasGenomicLocation starts" << NcbiEndl;
103    ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
104      {
105      if( !(*annot)->GetData().IsFtable()  ) continue;
106      ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable()  )
107        {
108        if( !(*gen_feature)->CanGetComment()) continue;
109        if( (*gen_feature)->GetComment().find("Genomic Location") == string::npos ) continue;
110        return true;
111        }
112      }
113    if(seq.IsAa())
114      NcbiCerr << "hasGenomicLocation: ERROR : sequence "
115                                << GetStringDescr (seq)
116                                << " does not have genomic location "
117                                << NcbiEndl;
118    return false;
119 }
getGenomicLocation(const CBioseq & seq)120 const CSeq_loc&  CReadBlastApp::getGenomicLocation(const CBioseq& seq)
121 {
122    if(PrintDetails()) NcbiCerr << "getGenomicLocation starts" << NcbiEndl;
123    ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
124      {
125      if( !(*annot)->GetData().IsFtable()  ) continue;
126      ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable()  )
127        {
128        if( !(*gen_feature)->CanGetComment()) continue;
129        if( (*gen_feature)->GetComment().find("Genomic Location") == string::npos ) continue;
130        return (*gen_feature)->GetLocation();
131        }
132      }
133    NcbiCerr << "getGenomicLocation: FATAL: sequence "
134                                // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
135                                << GetStringDescr (seq)
136                                << " does not have genomic location"
137                                << NcbiEndl;
138    throw;
139 }
140 
getLocusTag(const CBioseq & seq)141 string CReadBlastApp::getLocusTag(const CBioseq& seq)
142 {
143    string tag = "";
144    string mark = "Genomic Location: ";
145    ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
146      {
147      if( !(*annot)->GetData().IsFtable()  ) continue;
148      ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable()  )
149        {
150        if( !(*gen_feature)->CanGetComment()) continue;
151        if( (*gen_feature)->GetComment().find(mark) == string::npos ) continue;
152        tag = (*gen_feature)->GetComment();
153        tag.erase(0, mark.length());
154        return tag;
155        }
156      }
157    return tag;
158 }
159 
hasGenomicInterval(const CBioseq & seq)160 bool CReadBlastApp::hasGenomicInterval(const CBioseq& seq)
161 {
162    if(PrintDetails()) NcbiCerr << "hasGenomicInterval starts" << NcbiEndl;
163    ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
164      {
165      if( !(*annot)->GetData().IsFtable()  ) continue;
166      ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable()  )
167        {
168        if(! (*gen_feature)->GetLocation().GetInt().CanGetStrand() ) continue;
169        if(! (*gen_feature)->GetLocation().GetInt().GetId().IsLocal() ) continue;
170        return true;
171        }
172      }
173    NcbiCerr << "hasGenomicInterval: ERROR: sequence "
174                                // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
175                                << GetStringDescr (seq)
176                                << " does not have intervals with strands"
177                                << NcbiEndl;
178    return false;
179 }
180 
getGenomicInterval(const CBioseq & seq)181 const CSeq_interval& CReadBlastApp::getGenomicInterval(const CBioseq& seq)
182 {
183    if(PrintDetails()) NcbiCerr << "getGenomicInterval starts" << NcbiEndl;
184    ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
185      {
186      if( !(*annot)->GetData().IsFtable()  ) continue;
187      ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable()  )
188        {
189        if(! (*gen_feature)->GetLocation().GetInt().CanGetStrand() ) continue;
190        if(! (*gen_feature)->GetLocation().GetInt().GetId().IsLocal() ) continue;
191        return (*gen_feature)->GetLocation().GetInt();
192        }
193      }
194    NcbiCerr << "getGenomicInterval: FATAL: sequence "
195                                // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
196                                << GetStringDescr (seq)
197                                << " does not have intervals with strands"
198                                << NcbiEndl;
199    throw;
200 }
201 
GetLocMap(LocMap & loc_map,const CSeq_annot::C_Data::TFtable & feats)202 void CReadBlastApp::GetLocMap
203   (
204   LocMap& loc_map,  // location -> gene_feature for that location
205   const CSeq_annot::C_Data::TFtable& feats
206   )
207 {
208   if(PrintDetails()) NcbiCerr << "GetLocMap starts" << NcbiEndl;
209   loc_map.clear();
210   IncreaseVerbosity();
211   ITERATE(CSeq_annot::C_Data::TFtable, f, feats)
212     {
213     if( !(*f)->GetData().IsGene() ) continue;
214 // each location gets a gene name
215     string n; (*f)->GetData().GetGene().GetLabel(&n);
216     string loc_string = GetLocationString(**f);
217     loc_map[loc_string]=*f;
218     // if(PrintDetails()) NcbiCerr << "GetLocMap: stored location (" << loc_string << ")" << NcbiEndl; // this is too much
219     }
220   DecreaseVerbosity();
221   if(PrintDetails()) NcbiCerr << "GetLocMap: locMap size: "
222                               << loc_map.size()
223                               << NcbiEndl;
224   if(PrintDetails()) NcbiCerr << "GetLocMap ends" << NcbiEndl;
225   return;
226 }
227 
GetLocationString(const CSeq_feat & f)228 string GetLocationString(const CSeq_feat& f)
229 {
230   return GetLocationString(f.GetLocation());
231 }
GetLocationString(const interval_type & loc)232 template <typename interval_type> string GetLocationString ( const interval_type& loc)
233 {
234   TSeqPos from, to;
235   ENa_strand strand;
236   CReadBlastApp::getFromTo( loc, from, to, strand);
237   CNcbiStrstream label;
238   string strand_sign = strand == eNa_strand_plus ? "+" : "-";
239   label << from << "-" << to << ":" << strand_sign << '\0';
240 
241   return label.str();
242 
243 
244 }
245 
246 template string GetLocationString<CSeq_loc>(const CSeq_loc& loc);
247