1 /* $Id: idmapper.cpp 632526 2021-06-02 17:25:01Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Frank Ludwig
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <serial/iterator.hpp>
36
37 // Objects includes
38 #include <objects/general/Object_id.hpp>
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <objects/seqloc/Seq_loc.hpp>
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seqres/Seq_graph.hpp>
43 #include <objects/seqfeat/Seq_feat.hpp>
44 #include <objects/seqset/Seq_entry.hpp>
45
46 #include <objtools/readers/message_listener.hpp>
47 #include <objtools/readers/idmapper.hpp>
48
49 #include <objmgr/util/sequence.hpp>
50
51 BEGIN_NCBI_SCOPE
52 USING_SCOPE(objects);
53
54
CIdMapper(const std::string & strContext,bool bInvert,ILineErrorListener * pErrors)55 CIdMapper::CIdMapper(const std::string& strContext,
56 bool bInvert,
57 ILineErrorListener* pErrors)
58 : m_strContext(strContext),
59 m_bInvert(bInvert),
60 m_pErrors( pErrors )
61 {
62 }
63
64
AddMapping(const CSeq_id_Handle & from,const CSeq_id_Handle & to)65 void CIdMapper::AddMapping(const CSeq_id_Handle& from,
66 const CSeq_id_Handle& to )
67 {
68 CSeq_id_Handle key = from;
69 CSeq_id_Handle val = to;
70 if (m_bInvert) {
71 std::swap(key, val);
72 }
73
74 TMapperCache::iterator it =
75 m_Cache.insert(TMapperCache::value_type(key, SMapper())).first;
76 it->second.dest_idh = to;
77 it->second.dest_mapper.Reset();
78 }
79
80
AddMapping(const CSeq_loc & loc_from,const CSeq_loc & loc_to)81 void CIdMapper::AddMapping(const CSeq_loc& loc_from,
82 const CSeq_loc& loc_to)
83 {
84 CConstRef<CSeq_id> id1(loc_from.GetId());
85 CConstRef<CSeq_id> id2(loc_to.GetId());
86 CSeq_id_Handle idh1;
87 CSeq_id_Handle idh2;
88 if (id1) {
89 idh1 = CSeq_id_Handle::GetHandle(*id1);
90 }
91 if (id2) {
92 idh2 = CSeq_id_Handle::GetHandle(*id2);
93 }
94
95 CSeq_id_Handle key = idh1;
96 CRef<CSeq_loc_Mapper> mapper;
97 if (m_bInvert) {
98 key = idh2;
99 mapper.Reset(new CSeq_loc_Mapper(loc_to, loc_from));
100 } else {
101 mapper.Reset(new CSeq_loc_Mapper(loc_from, loc_to));
102 }
103
104 TMapperCache::iterator it =
105 m_Cache.insert(TMapperCache::value_type(key, SMapper())).first;
106 it->second.dest_idh = CSeq_id_Handle();
107 it->second.dest_mapper = mapper;
108 }
109
110
Map(const CSeq_id_Handle & from)111 CSeq_id_Handle CIdMapper::Map(const CSeq_id_Handle& from )
112 {
113 TMapperCache::iterator it = m_Cache.find(from);
114 if (it != m_Cache.end()) {
115 if (it->second.dest_idh) {
116 return it->second.dest_idh;
117 } else if (it->second.dest_mapper) {
118 CRef<CSeq_loc> loc_from(new CSeq_loc);
119 loc_from->SetWhole().Assign(*from.GetSeqId());
120 CRef<CSeq_loc> loc_to = it->second.dest_mapper->Map(*loc_from);
121 CConstRef<CSeq_id> id(loc_to->GetId());
122 if (id) {
123 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*id);
124 return idh;
125 }
126 }
127 }
128
129 //
130 // Cannot map this ID. We will treat this as an error.
131 //
132 if ( m_pErrors ) {
133 AutoPtr<CObjReaderLineException> pMapError(
134 CObjReaderLineException::Create(
135 eDiag_Error, 0, MapErrorString( from ) ) );
136 if ( !m_pErrors->PutError( *pMapError ) ) {
137 pMapError->Throw();
138 }
139 }
140 return from;
141 };
142
143
Map(const CSeq_loc & from)144 CRef<CSeq_loc> CIdMapper::Map(const CSeq_loc& from )
145 {
146 CConstRef<CSeq_id> id(from.GetId());
147 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*id);
148 TMapperCache::iterator it = m_Cache.find(idh);
149 if (it != m_Cache.end()) {
150 CRef<CSeq_loc> loc_to;
151 if (it->second.dest_idh) {
152 loc_to.Reset(new CSeq_loc);
153 loc_to->Assign(from);
154 loc_to->SetId(*it->second.dest_idh.GetSeqId());
155 } else if (it->second.dest_mapper) {
156 loc_to = it->second.dest_mapper->Map(from);
157 if (loc_to->IsNull()) {
158 loc_to.Reset();
159 }
160 }
161 if (loc_to) {
162 return loc_to;
163 }
164 }
165
166 //
167 // Cannot map this ID. We will treat this as an error.
168 //
169 //
170 // Cannot map this ID. We will treat this as an error.
171 //
172 if ( m_pErrors ) {
173 AutoPtr<CObjReaderLineException> pMapError(
174 CObjReaderLineException::Create(
175 eDiag_Error, 0, MapErrorString( from ) ) );
176 if ( !m_pErrors->PutError( *pMapError ) ) {
177 pMapError->Throw();
178 }
179 }
180
181 CRef<CSeq_loc> loc(new CSeq_loc);
182 loc->Assign(from);
183 return loc;
184 };
185
186
187
188 // ============================================================================
MapErrorString(const CSeq_id_Handle & idh)189 string CIdMapper::MapErrorString(const CSeq_id_Handle& idh )
190 {
191 string strId = idh.AsString();
192 string strMsg(
193 string("IdMapper: Unable to resolve ID \"") + strId + string("\"") );
194 return strMsg;
195 };
196
197
MapErrorString(const CSeq_loc & loc)198 string CIdMapper::MapErrorString(const CSeq_loc& loc )
199 {
200 string strId;
201 loc.GetLabel(&strId);
202 string strMsg(
203 string("IdMapper: Unable to resolve ID \"") + strId + string("\"") );
204 return strMsg;
205 };
206
207
MapObject(CSerialObject & object)208 void CIdMapper::MapObject(CSerialObject& object)
209 {
210 set< CRef<CSeq_id> > ids;
211 CTypeIterator< CSeq_id > idit( object );
212 for ( ; idit; ++idit ) {
213 CSeq_id& id = *idit;
214 if (ids.insert(CRef<CSeq_id>(&id)).second == false) {
215 continue;
216 }
217 CSeq_id_Handle idh = Map( CSeq_id_Handle::GetHandle(id) );
218 if ( !idh ) {
219 continue;
220 }
221 id.Assign(*idh.GetSeqId());
222 }
223 };
224
225
226 // Composite mapper
227
228 CAtomicCounter CIdMapperComposite::SNode::sm_Counter;
229
AddMapper(IIdMapper * mapper,TPriority priority,EOwnership ownership)230 void CIdMapperComposite::AddMapper(IIdMapper* mapper,
231 TPriority priority,
232 EOwnership ownership)
233 {
234 m_Mappers.insert(SNode(mapper, priority, ownership));
235 }
236
237
Map(const CSeq_id_Handle & idh)238 CSeq_id_Handle CIdMapperComposite::Map(const CSeq_id_Handle& idh)
239 {
240 CSeq_id_Handle ret;
241 ITERATE(TMappers, it, m_Mappers) {
242 ret = it->m_Mapper->Map(idh);
243 if (ret && ret != idh) break;
244 }
245 return ret ? ret : idh;
246 }
247
248
Map(const CSeq_loc & loc)249 CRef<CSeq_loc> CIdMapperComposite::Map(const CSeq_loc& loc)
250 {
251 CRef<CSeq_loc> ret;
252 ITERATE(TMappers, it, m_Mappers) {
253 ret = it->m_Mapper->Map(loc);
254 if ( ret ) break;
255 }
256 return ret;
257 }
258
259
GetErrCodeString(void) const260 const char* CIdMapperException::GetErrCodeString(void) const
261 {
262 switch ( GetErrCode() ) {
263 case eBadSeqId: return "eBadSeqId";
264 case eOther: return "eOther";
265 default: return CException::GetErrCodeString();
266 }
267 }
268
269
270 END_NCBI_SCOPE
271
272