1 /*  $Id: idmapper.cpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Frank Ludwig
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <serial/iterator.hpp>
36 
37 // Objects includes
38 #include <objects/general/Object_id.hpp>
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <objects/seqloc/Seq_loc.hpp>
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seqres/Seq_graph.hpp>
43 #include <objects/seqfeat/Seq_feat.hpp>
44 #include <objects/seqset/Seq_entry.hpp>
45 
46 #include <objtools/readers/message_listener.hpp>
47 #include <objtools/readers/idmapper.hpp>
48 
49 #include <objmgr/util/sequence.hpp>
50 
51 BEGIN_NCBI_SCOPE
52 USING_SCOPE(objects);
53 
54 
CIdMapper(const std::string & strContext,bool bInvert,ILineErrorListener * pErrors)55 CIdMapper::CIdMapper(const std::string& strContext,
56                      bool bInvert,
57                      ILineErrorListener* pErrors)
58     : m_strContext(strContext),
59       m_bInvert(bInvert),
60       m_pErrors( pErrors )
61 {
62 }
63 
64 
AddMapping(const CSeq_id_Handle & from,const CSeq_id_Handle & to)65 void CIdMapper::AddMapping(const CSeq_id_Handle& from,
66                            const CSeq_id_Handle& to )
67 {
68     CSeq_id_Handle key = from;
69     CSeq_id_Handle val = to;
70     if (m_bInvert) {
71         std::swap(key, val);
72     }
73 
74     TMapperCache::iterator it =
75         m_Cache.insert(TMapperCache::value_type(key, SMapper())).first;
76     it->second.dest_idh = to;
77     it->second.dest_mapper.Reset();
78 }
79 
80 
AddMapping(const CSeq_loc & loc_from,const CSeq_loc & loc_to)81 void CIdMapper::AddMapping(const CSeq_loc& loc_from,
82                            const CSeq_loc& loc_to)
83 {
84     CConstRef<CSeq_id> id1(loc_from.GetId());
85     CConstRef<CSeq_id> id2(loc_to.GetId());
86     CSeq_id_Handle idh1;
87     CSeq_id_Handle idh2;
88     if (id1) {
89         idh1 = CSeq_id_Handle::GetHandle(*id1);
90     }
91     if (id2) {
92         idh2 = CSeq_id_Handle::GetHandle(*id2);
93     }
94 
95     CSeq_id_Handle key = idh1;
96     CRef<CSeq_loc_Mapper> mapper;
97     if (m_bInvert) {
98         key = idh2;
99         mapper.Reset(new CSeq_loc_Mapper(loc_to, loc_from));
100     } else {
101         mapper.Reset(new CSeq_loc_Mapper(loc_from, loc_to));
102     }
103 
104     TMapperCache::iterator it =
105         m_Cache.insert(TMapperCache::value_type(key, SMapper())).first;
106     it->second.dest_idh = CSeq_id_Handle();
107     it->second.dest_mapper = mapper;
108 }
109 
110 
Map(const CSeq_id_Handle & from)111 CSeq_id_Handle CIdMapper::Map(const CSeq_id_Handle& from )
112 {
113     TMapperCache::iterator it = m_Cache.find(from);
114     if (it != m_Cache.end()) {
115         if (it->second.dest_idh) {
116             return it->second.dest_idh;
117         } else if (it->second.dest_mapper) {
118             CRef<CSeq_loc> loc_from(new CSeq_loc);
119             loc_from->SetWhole().Assign(*from.GetSeqId());
120             CRef<CSeq_loc> loc_to = it->second.dest_mapper->Map(*loc_from);
121             CConstRef<CSeq_id> id(loc_to->GetId());
122             if (id) {
123                 CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*id);
124                 return idh;
125             }
126         }
127     }
128 
129     //
130     //  Cannot map this ID. We will treat this as an error.
131     //
132     if ( m_pErrors ) {
133         AutoPtr<CObjReaderLineException> pMapError(
134             CObjReaderLineException::Create(
135             eDiag_Error, 0, MapErrorString( from ) ) );
136         if ( !m_pErrors->PutError( *pMapError ) ) {
137             pMapError->Throw();
138         }
139     }
140     return from;
141 };
142 
143 
Map(const CSeq_loc & from)144 CRef<CSeq_loc> CIdMapper::Map(const CSeq_loc& from )
145 {
146     CConstRef<CSeq_id> id(from.GetId());
147     CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*id);
148     TMapperCache::iterator it = m_Cache.find(idh);
149     if (it != m_Cache.end()) {
150         CRef<CSeq_loc> loc_to;
151         if (it->second.dest_idh) {
152             loc_to.Reset(new CSeq_loc);
153             loc_to->Assign(from);
154             loc_to->SetId(*it->second.dest_idh.GetSeqId());
155         } else if (it->second.dest_mapper) {
156             loc_to = it->second.dest_mapper->Map(from);
157             if (loc_to->IsNull()) {
158                 loc_to.Reset();
159             }
160         }
161         if (loc_to) {
162             return loc_to;
163         }
164     }
165 
166     //
167     //  Cannot map this ID. We will treat this as an error.
168     //
169     //
170     //  Cannot map this ID. We will treat this as an error.
171     //
172     if ( m_pErrors ) {
173         AutoPtr<CObjReaderLineException> pMapError(
174             CObjReaderLineException::Create(
175             eDiag_Error, 0, MapErrorString( from ) ) );
176         if ( !m_pErrors->PutError( *pMapError ) ) {
177             pMapError->Throw();
178         }
179     }
180 
181     CRef<CSeq_loc> loc(new CSeq_loc);
182     loc->Assign(from);
183     return loc;
184 };
185 
186 
187 
188 //  ============================================================================
MapErrorString(const CSeq_id_Handle & idh)189 string CIdMapper::MapErrorString(const CSeq_id_Handle& idh )
190 {
191     string strId = idh.AsString();
192     string strMsg(
193         string("IdMapper: Unable to resolve ID \"") + strId + string("\"") );
194     return strMsg;
195 };
196 
197 
MapErrorString(const CSeq_loc & loc)198 string CIdMapper::MapErrorString(const CSeq_loc& loc )
199 {
200     string strId;
201     loc.GetLabel(&strId);
202     string strMsg(
203         string("IdMapper: Unable to resolve ID \"") + strId + string("\"") );
204     return strMsg;
205 };
206 
207 
MapObject(CSerialObject & object)208 void CIdMapper::MapObject(CSerialObject& object)
209 {
210     set< CRef<CSeq_id> > ids;
211     CTypeIterator< CSeq_id > idit( object );
212     for ( ;  idit;  ++idit ) {
213         CSeq_id& id = *idit;
214         if (ids.insert(CRef<CSeq_id>(&id)).second == false) {
215             continue;
216         }
217         CSeq_id_Handle idh = Map( CSeq_id_Handle::GetHandle(id) );
218         if ( !idh ) {
219             continue;
220         }
221         id.Assign(*idh.GetSeqId());
222     }
223 };
224 
225 
226 // Composite mapper
227 
228 CAtomicCounter CIdMapperComposite::SNode::sm_Counter;
229 
AddMapper(IIdMapper * mapper,TPriority priority,EOwnership ownership)230 void CIdMapperComposite::AddMapper(IIdMapper* mapper,
231                                     TPriority  priority,
232                                     EOwnership ownership)
233 {
234     m_Mappers.insert(SNode(mapper, priority, ownership));
235 }
236 
237 
Map(const CSeq_id_Handle & idh)238 CSeq_id_Handle CIdMapperComposite::Map(const CSeq_id_Handle& idh)
239 {
240     CSeq_id_Handle ret;
241     ITERATE(TMappers, it, m_Mappers) {
242         ret = it->m_Mapper->Map(idh);
243         if (ret  &&  ret != idh) break;
244     }
245     return ret ? ret : idh;
246 }
247 
248 
Map(const CSeq_loc & loc)249 CRef<CSeq_loc> CIdMapperComposite::Map(const CSeq_loc& loc)
250 {
251     CRef<CSeq_loc> ret;
252     ITERATE(TMappers, it, m_Mappers) {
253         ret = it->m_Mapper->Map(loc);
254         if ( ret ) break;
255     }
256     return ret;
257 }
258 
259 
GetErrCodeString(void) const260 const char* CIdMapperException::GetErrCodeString(void) const
261 {
262     switch ( GetErrCode() ) {
263     case eBadSeqId:      return "eBadSeqId";
264     case eOther:         return "eOther";
265     default:             return CException::GetErrCodeString();
266     }
267 }
268 
269 
270 END_NCBI_SCOPE
271 
272