1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Azat Badretdin
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 #include <ncbi_pch.hpp>
33 #include "read_blast_result.hpp"
getFromTo(const CSeq_loc & loc,TSeqPos & from,TSeqPos & to,ENa_strand & strand)34 void CReadBlastApp::getFromTo(const CSeq_loc& loc, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
35 {
36 if(loc.IsInt())
37 {
38 getFromTo(loc.GetInt(), from, to, strand);
39 }
40 else if (loc.IsMix())
41 {
42 getFromTo(loc.GetMix(), from, to, strand);
43 }
44 else if (loc.IsPacked_int())
45 {
46 getFromTo(loc.GetPacked_int(), from, to, strand);
47 }
48 else if (loc.IsNull())
49 {
50 NcbiCerr << "INFO: getFromTo got NULL interval" << NcbiEndl;
51 from = 0; to = 0;
52 }
53 else
54 {
55 NcbiCerr << "ERROR: getFromTo got unusual interval" << NcbiEndl;
56 from = 0; to = 0;
57 }
58 }
59
60
getFromTo(const CSeq_loc_mix & mix,TSeqPos & from,TSeqPos & to,ENa_strand & strand)61 void CReadBlastApp::getFromTo(const CSeq_loc_mix& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
62 {
63 from = 1<<30;
64 to = 0;
65 ITERATE(CSeq_loc_mix::Tdata, loc, mix.Get())
66 {
67 TSeqPos from2, to2;
68 getFromTo(**loc, from2, to2, strand);
69 if(from==0 && to==0) continue;
70 if(from2<from ) from = from2;
71 if(to2>to) to = to2;
72 }
73 }
74
getFromTo(const CPacked_seqint & mix,TSeqPos & from,TSeqPos & to,ENa_strand & strand)75 void CReadBlastApp::getFromTo(const CPacked_seqint& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
76 {
77 from = 1<<30;
78 to = 0;
79 ITERATE(CPacked_seqint::Tdata, loc, mix.Get())
80 {
81 TSeqPos from2, to2;
82 getFromTo(**loc, from2, to2, strand);
83 if(from==0 && to==0) continue;
84 if(from2<from ) from = from2;
85 if(to2>to) to = to2;
86 }
87 }
88
getFromTo(const CSeq_interval & inter,TSeqPos & from,TSeqPos & to,ENa_strand & strand)89 void CReadBlastApp::getFromTo(const CSeq_interval& inter, TSeqPos& from, TSeqPos& to, ENa_strand& strand)
90 {
91 from = inter.GetFrom();
92 to = inter.GetTo();
93 strand=eNa_strand_unknown;
94 if(inter.CanGetStrand())
95 {
96 strand = inter.GetStrand();
97 }
98 }
99
hasGenomicLocation(const CBioseq & seq)100 bool CReadBlastApp::hasGenomicLocation(const CBioseq& seq)
101 {
102 if(PrintDetails()) NcbiCerr << "hasGenomicLocation starts" << NcbiEndl;
103 ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
104 {
105 if( !(*annot)->GetData().IsFtable() ) continue;
106 ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable() )
107 {
108 if( !(*gen_feature)->CanGetComment()) continue;
109 if( (*gen_feature)->GetComment().find("Genomic Location") == string::npos ) continue;
110 return true;
111 }
112 }
113 if(seq.IsAa())
114 NcbiCerr << "hasGenomicLocation: ERROR : sequence "
115 << GetStringDescr (seq)
116 << " does not have genomic location "
117 << NcbiEndl;
118 return false;
119 }
getGenomicLocation(const CBioseq & seq)120 const CSeq_loc& CReadBlastApp::getGenomicLocation(const CBioseq& seq)
121 {
122 if(PrintDetails()) NcbiCerr << "getGenomicLocation starts" << NcbiEndl;
123 ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
124 {
125 if( !(*annot)->GetData().IsFtable() ) continue;
126 ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable() )
127 {
128 if( !(*gen_feature)->CanGetComment()) continue;
129 if( (*gen_feature)->GetComment().find("Genomic Location") == string::npos ) continue;
130 return (*gen_feature)->GetLocation();
131 }
132 }
133 NcbiCerr << "getGenomicLocation: FATAL: sequence "
134 // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
135 << GetStringDescr (seq)
136 << " does not have genomic location"
137 << NcbiEndl;
138 throw;
139 }
140
getLocusTag(const CBioseq & seq)141 string CReadBlastApp::getLocusTag(const CBioseq& seq)
142 {
143 string tag = "";
144 string mark = "Genomic Location: ";
145 ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
146 {
147 if( !(*annot)->GetData().IsFtable() ) continue;
148 ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable() )
149 {
150 if( !(*gen_feature)->CanGetComment()) continue;
151 if( (*gen_feature)->GetComment().find(mark) == string::npos ) continue;
152 tag = (*gen_feature)->GetComment();
153 tag.erase(0, mark.length());
154 return tag;
155 }
156 }
157 return tag;
158 }
159
hasGenomicInterval(const CBioseq & seq)160 bool CReadBlastApp::hasGenomicInterval(const CBioseq& seq)
161 {
162 if(PrintDetails()) NcbiCerr << "hasGenomicInterval starts" << NcbiEndl;
163 ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
164 {
165 if( !(*annot)->GetData().IsFtable() ) continue;
166 ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable() )
167 {
168 if(! (*gen_feature)->GetLocation().GetInt().CanGetStrand() ) continue;
169 if(! (*gen_feature)->GetLocation().GetInt().GetId().IsLocal() ) continue;
170 return true;
171 }
172 }
173 NcbiCerr << "hasGenomicInterval: ERROR: sequence "
174 // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
175 << GetStringDescr (seq)
176 << " does not have intervals with strands"
177 << NcbiEndl;
178 return false;
179 }
180
getGenomicInterval(const CBioseq & seq)181 const CSeq_interval& CReadBlastApp::getGenomicInterval(const CBioseq& seq)
182 {
183 if(PrintDetails()) NcbiCerr << "getGenomicInterval starts" << NcbiEndl;
184 ITERATE(CBioseq::TAnnot, annot, seq.GetAnnot())
185 {
186 if( !(*annot)->GetData().IsFtable() ) continue;
187 ITERATE(CSeq_annot::C_Data::TFtable, gen_feature, (*annot)->GetData().GetFtable() )
188 {
189 if(! (*gen_feature)->GetLocation().GetInt().CanGetStrand() ) continue;
190 if(! (*gen_feature)->GetLocation().GetInt().GetId().IsLocal() ) continue;
191 return (*gen_feature)->GetLocation().GetInt();
192 }
193 }
194 NcbiCerr << "getGenomicInterval: FATAL: sequence "
195 // << CSeq_id::GetStringDescr (seq, CSeq_id::eFormat_FastA)
196 << GetStringDescr (seq)
197 << " does not have intervals with strands"
198 << NcbiEndl;
199 throw;
200 }
201
GetLocMap(LocMap & loc_map,const CSeq_annot::C_Data::TFtable & feats)202 void CReadBlastApp::GetLocMap
203 (
204 LocMap& loc_map, // location -> gene_feature for that location
205 const CSeq_annot::C_Data::TFtable& feats
206 )
207 {
208 if(PrintDetails()) NcbiCerr << "GetLocMap starts" << NcbiEndl;
209 loc_map.clear();
210 IncreaseVerbosity();
211 ITERATE(CSeq_annot::C_Data::TFtable, f, feats)
212 {
213 if( !(*f)->GetData().IsGene() ) continue;
214 // each location gets a gene name
215 string n; (*f)->GetData().GetGene().GetLabel(&n);
216 string loc_string = GetLocationString(**f);
217 loc_map[loc_string]=*f;
218 // if(PrintDetails()) NcbiCerr << "GetLocMap: stored location (" << loc_string << ")" << NcbiEndl; // this is too much
219 }
220 DecreaseVerbosity();
221 if(PrintDetails()) NcbiCerr << "GetLocMap: locMap size: "
222 << loc_map.size()
223 << NcbiEndl;
224 if(PrintDetails()) NcbiCerr << "GetLocMap ends" << NcbiEndl;
225 return;
226 }
227
GetLocationString(const CSeq_feat & f)228 string GetLocationString(const CSeq_feat& f)
229 {
230 return GetLocationString(f.GetLocation());
231 }
GetLocationString(const interval_type & loc)232 template <typename interval_type> string GetLocationString ( const interval_type& loc)
233 {
234 TSeqPos from, to;
235 ENa_strand strand;
236 CReadBlastApp::getFromTo( loc, from, to, strand);
237 CNcbiStrstream label;
238 string strand_sign = strand == eNa_strand_plus ? "+" : "-";
239 label << from << "-" << to << ":" << strand_sign << '\0';
240
241 return label.str();
242
243
244 }
245
246 template string GetLocationString<CSeq_loc>(const CSeq_loc& loc);
247