1 /*  $Id: unit_test_idmapper.cpp 468563 2015-05-26 16:00:34Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Mike DiCuccio
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbi_system.hpp>
37 
38 
39 // This header must be included before all Boost.Test headers if there are any
40 #include <corelib/test_boost.hpp>
41 
42 #include <objtools/readers/idmapper.hpp>
43 #include <objects/seqloc/Seq_loc.hpp>
44 #include <objects/seqloc/Seq_interval.hpp>
45 #include <objects/seqloc/Seq_point.hpp>
46 #include <objmgr/util/sequence.hpp>
47 
48 
49 USING_NCBI_SCOPE;
50 USING_SCOPE(objects);
51 
52 
53 ///
54 /// Test simple seq-id -> seq-id mapping, given a mapping table of seq-id ->
55 /// seq-id
56 ///
BOOST_AUTO_TEST_CASE(Test_SimpleIdMapper)57 BOOST_AUTO_TEST_CASE(Test_SimpleIdMapper)
58 {
59     CSeq_id id1("lcl|1");
60     CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
61 
62     CSeq_id id2("lcl|2");
63     CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
64 
65     CIdMapper mapper;
66     mapper.AddMapping(idh1, idh2);
67 
68     CSeq_id_Handle mapped_idh = mapper.Map(idh1);
69     BOOST_CHECK_EQUAL(idh2, mapped_idh);
70 
71     ///
72     /// by default, we echo the given handle if a mapping cannot be found.
73     ///
74     CSeq_id id3("lcl|3");
75     CSeq_id_Handle idh3 = CSeq_id_Handle::GetHandle(id3);
76     mapped_idh = mapper.Map(idh3);
77     BOOST_CHECK_EQUAL(idh3, mapped_idh);
78 }
79 
80 
81 ///
82 /// Test location mapping given a context in which we only map seq-ids
83 /// The goal here is to output the same location that we start with, except we
84 /// change the seq-ids
85 ///
BOOST_AUTO_TEST_CASE(Test_SimpleLocMapper)86 BOOST_AUTO_TEST_CASE(Test_SimpleLocMapper)
87 {
88     CSeq_id id1("lcl|1");
89     CSeq_id id2("lcl|2");
90 
91     CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
92     CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
93 
94     CIdMapper mapper;
95     mapper.AddMapping(idh1, idh2);
96 
97     CSeq_loc loc1;
98     loc1.SetInt().SetFrom(  0);
99     loc1.SetInt().SetTo  (100);
100     loc1.SetId(id1);
101 
102     CRef<CSeq_loc> loc2 = mapper.Map(loc1);
103 
104     CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc2->GetId());
105     BOOST_CHECK_EQUAL(idh2, mapped_idh);
106     BOOST_CHECK_EQUAL(loc1.GetInt().GetFrom(), loc2->GetInt().GetFrom());
107     BOOST_CHECK_EQUAL(loc1.GetInt().GetTo(),   loc2->GetInt().GetTo());
108 }
109 
110 
111 ///
112 /// More Complex case involving mapping of location ranges that will shift
113 /// positions
114 ///
BOOST_AUTO_TEST_CASE(Test_ComplexLocMapper)115 BOOST_AUTO_TEST_CASE(Test_ComplexLocMapper)
116 {
117     CSeq_id id1("lcl|1");
118     CSeq_id id2("lcl|2");
119 
120     CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
121     CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
122 
123     CSeq_loc loc1;
124     loc1.SetInt().SetFrom( 10);
125     loc1.SetInt().SetTo  (100);
126     loc1.SetId(id1);
127 
128     CSeq_loc loc2;
129     loc2.SetInt().SetFrom( 0);
130     loc2.SetInt().SetTo  (90);
131     loc2.SetId(id2);
132 
133     CIdMapper mapper;
134     mapper.AddMapping(loc1, loc2);
135 
136     CSeq_loc loc3;
137     loc3.SetPnt().SetPoint(55);
138     loc3.SetId(id1);
139 
140     CRef<CSeq_loc> loc4 = mapper.Map(loc3);
141 
142     CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
143     BOOST_CHECK_EQUAL(idh2, mapped_idh);
144     BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)45);
145 }
146 
147 
148 ///
149 /// More Complex case involving mapping of location ranges that will shift
150 /// positions
151 ///
BOOST_AUTO_TEST_CASE(Test_EvenMoreComplexLocMapper)152 BOOST_AUTO_TEST_CASE(Test_EvenMoreComplexLocMapper)
153 {
154     CSeq_id id1("lcl|1");
155     CSeq_id id2("lcl|2");
156     CSeq_id id3("lcl|3");
157 
158     CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
159     CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
160     CSeq_id_Handle idh3 = CSeq_id_Handle::GetHandle(id3);
161 
162     CSeq_loc loc1;
163     loc1.SetInt().SetFrom( 10);
164     loc1.SetInt().SetTo  (100);
165     loc1.SetId(id1);
166 
167     CSeq_loc loc2;
168     {{
169          CRef<CSeq_loc> sub;
170 
171          sub.Reset(new CSeq_loc);
172          sub->SetInt().SetFrom(0);
173          sub->SetInt().SetTo(45);
174          sub->SetId(id2);
175          loc2.SetMix().Set().push_back(sub);
176 
177          sub.Reset(new CSeq_loc);
178          sub->SetInt().SetFrom(0);
179          sub->SetInt().SetTo(44);
180          sub->SetId(id3);
181          loc2.SetMix().Set().push_back(sub);
182      }}
183 
184     CIdMapper mapper;
185     mapper.AddMapping(loc1, loc2);
186 
187     {{
188          CSeq_loc loc3;
189          loc3.SetPnt().SetPoint(40);
190          loc3.SetId(id1);
191 
192          CRef<CSeq_loc> loc4 = mapper.Map(loc3);
193 
194          CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
195          BOOST_CHECK_EQUAL(idh2, mapped_idh);
196          BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)30);
197      }}
198 
199     {{
200          CSeq_loc loc3;
201          loc3.SetPnt().SetPoint(60);
202          loc3.SetId(id1);
203 
204          CRef<CSeq_loc> loc4 = mapper.Map(loc3);
205 
206          CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
207          BOOST_CHECK_EQUAL(idh3, mapped_idh);
208          BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)4);
209      }}
210 
211     {{
212          CSeq_loc loc3;
213          loc3.SetInt().SetFrom(45);
214          loc3.SetInt().SetTo(60);
215          loc3.SetId(id1);
216 
217          CRef<CSeq_loc> loc4 = mapper.Map(loc3);
218 
219          BOOST_CHECK_EQUAL(loc4->GetId(), (const CSeq_id*)NULL);
220          BOOST_CHECK_EQUAL(loc4->Which(), CSeq_loc::e_Packed_int);
221 
222          CSeq_loc_CI loc_iter(*loc4);
223 
224          /// interval 1
225          BOOST_CHECK_EQUAL(idh2, loc_iter.GetSeq_id_Handle());
226          BOOST_CHECK_EQUAL(loc_iter.GetRange(), TSeqRange(35, 45));
227          ++loc_iter;
228 
229          BOOST_CHECK_EQUAL(idh3, loc_iter.GetSeq_id_Handle());
230          BOOST_CHECK_EQUAL(loc_iter.GetRange(), TSeqRange(0, 4));
231          ++loc_iter;
232 
233          BOOST_CHECK( !loc_iter );
234 
235      }}
236 }
237 
238 
239 ///
240 /// Test of built-in ID mapper
241 /// This also tests the configuration-based methods as well
242 ///
BOOST_AUTO_TEST_CASE(Test_BuiltinIdMapper)243 BOOST_AUTO_TEST_CASE(Test_BuiltinIdMapper)
244 {
245     CSeq_id id1("lcl|chr1");
246     CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
247 
248     CSeq_id id2("lcl|2");
249     CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
250 
251     CIdMapperBuiltin mapper("hg19");
252 
253     {{
254          CSeq_loc loc1;
255          loc1.SetInt().SetFrom( 10);
256          loc1.SetInt().SetTo  (100);
257          loc1.SetId(id1);
258 
259          CSeq_id_Handle mapped_idh = mapper.Map(idh1);
260          BOOST_CHECK_EQUAL(GI_CONST(224589800), mapped_idh.GetGi());
261 
262          CRef<CSeq_loc> loc2 = mapper.Map(loc1);
263          mapped_idh = CSeq_id_Handle::GetHandle(*loc2->GetId());
264          BOOST_CHECK_EQUAL(GI_CONST(224589800), mapped_idh.GetGi());
265          BOOST_CHECK_EQUAL(loc2->GetInt().GetFrom(), (TSeqPos)10);
266          BOOST_CHECK_EQUAL(loc2->GetInt().GetTo(), (TSeqPos)100);
267      }}
268 
269     {{
270          CSeq_loc loc2;
271          loc2.SetInt().SetFrom( 10);
272          loc2.SetInt().SetTo  (100);
273          loc2.SetId(id2);
274 
275          CSeq_id_Handle mapped_idh = mapper.Map(idh2);
276          BOOST_CHECK_EQUAL(GI_CONST(224589811), mapped_idh.GetGi());
277 
278          CRef<CSeq_loc> loc3 = mapper.Map(loc2);
279          mapped_idh = CSeq_id_Handle::GetHandle(*loc3->GetId());
280          BOOST_CHECK_EQUAL(GI_CONST(224589811), mapped_idh.GetGi());
281          BOOST_CHECK_EQUAL(loc3->GetInt().GetFrom(), (TSeqPos)10);
282          BOOST_CHECK_EQUAL(loc3->GetInt().GetTo(), (TSeqPos)100);
283      }}
284 }
285 
286