1 /* $Id: unit_test_idmapper.cpp 468563 2015-05-26 16:00:34Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mike DiCuccio
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32
33 #include <ncbi_pch.hpp>
34
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbi_system.hpp>
37
38
39 // This header must be included before all Boost.Test headers if there are any
40 #include <corelib/test_boost.hpp>
41
42 #include <objtools/readers/idmapper.hpp>
43 #include <objects/seqloc/Seq_loc.hpp>
44 #include <objects/seqloc/Seq_interval.hpp>
45 #include <objects/seqloc/Seq_point.hpp>
46 #include <objmgr/util/sequence.hpp>
47
48
49 USING_NCBI_SCOPE;
50 USING_SCOPE(objects);
51
52
53 ///
54 /// Test simple seq-id -> seq-id mapping, given a mapping table of seq-id ->
55 /// seq-id
56 ///
BOOST_AUTO_TEST_CASE(Test_SimpleIdMapper)57 BOOST_AUTO_TEST_CASE(Test_SimpleIdMapper)
58 {
59 CSeq_id id1("lcl|1");
60 CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
61
62 CSeq_id id2("lcl|2");
63 CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
64
65 CIdMapper mapper;
66 mapper.AddMapping(idh1, idh2);
67
68 CSeq_id_Handle mapped_idh = mapper.Map(idh1);
69 BOOST_CHECK_EQUAL(idh2, mapped_idh);
70
71 ///
72 /// by default, we echo the given handle if a mapping cannot be found.
73 ///
74 CSeq_id id3("lcl|3");
75 CSeq_id_Handle idh3 = CSeq_id_Handle::GetHandle(id3);
76 mapped_idh = mapper.Map(idh3);
77 BOOST_CHECK_EQUAL(idh3, mapped_idh);
78 }
79
80
81 ///
82 /// Test location mapping given a context in which we only map seq-ids
83 /// The goal here is to output the same location that we start with, except we
84 /// change the seq-ids
85 ///
BOOST_AUTO_TEST_CASE(Test_SimpleLocMapper)86 BOOST_AUTO_TEST_CASE(Test_SimpleLocMapper)
87 {
88 CSeq_id id1("lcl|1");
89 CSeq_id id2("lcl|2");
90
91 CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
92 CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
93
94 CIdMapper mapper;
95 mapper.AddMapping(idh1, idh2);
96
97 CSeq_loc loc1;
98 loc1.SetInt().SetFrom( 0);
99 loc1.SetInt().SetTo (100);
100 loc1.SetId(id1);
101
102 CRef<CSeq_loc> loc2 = mapper.Map(loc1);
103
104 CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc2->GetId());
105 BOOST_CHECK_EQUAL(idh2, mapped_idh);
106 BOOST_CHECK_EQUAL(loc1.GetInt().GetFrom(), loc2->GetInt().GetFrom());
107 BOOST_CHECK_EQUAL(loc1.GetInt().GetTo(), loc2->GetInt().GetTo());
108 }
109
110
111 ///
112 /// More Complex case involving mapping of location ranges that will shift
113 /// positions
114 ///
BOOST_AUTO_TEST_CASE(Test_ComplexLocMapper)115 BOOST_AUTO_TEST_CASE(Test_ComplexLocMapper)
116 {
117 CSeq_id id1("lcl|1");
118 CSeq_id id2("lcl|2");
119
120 CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
121 CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
122
123 CSeq_loc loc1;
124 loc1.SetInt().SetFrom( 10);
125 loc1.SetInt().SetTo (100);
126 loc1.SetId(id1);
127
128 CSeq_loc loc2;
129 loc2.SetInt().SetFrom( 0);
130 loc2.SetInt().SetTo (90);
131 loc2.SetId(id2);
132
133 CIdMapper mapper;
134 mapper.AddMapping(loc1, loc2);
135
136 CSeq_loc loc3;
137 loc3.SetPnt().SetPoint(55);
138 loc3.SetId(id1);
139
140 CRef<CSeq_loc> loc4 = mapper.Map(loc3);
141
142 CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
143 BOOST_CHECK_EQUAL(idh2, mapped_idh);
144 BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)45);
145 }
146
147
148 ///
149 /// More Complex case involving mapping of location ranges that will shift
150 /// positions
151 ///
BOOST_AUTO_TEST_CASE(Test_EvenMoreComplexLocMapper)152 BOOST_AUTO_TEST_CASE(Test_EvenMoreComplexLocMapper)
153 {
154 CSeq_id id1("lcl|1");
155 CSeq_id id2("lcl|2");
156 CSeq_id id3("lcl|3");
157
158 CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
159 CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
160 CSeq_id_Handle idh3 = CSeq_id_Handle::GetHandle(id3);
161
162 CSeq_loc loc1;
163 loc1.SetInt().SetFrom( 10);
164 loc1.SetInt().SetTo (100);
165 loc1.SetId(id1);
166
167 CSeq_loc loc2;
168 {{
169 CRef<CSeq_loc> sub;
170
171 sub.Reset(new CSeq_loc);
172 sub->SetInt().SetFrom(0);
173 sub->SetInt().SetTo(45);
174 sub->SetId(id2);
175 loc2.SetMix().Set().push_back(sub);
176
177 sub.Reset(new CSeq_loc);
178 sub->SetInt().SetFrom(0);
179 sub->SetInt().SetTo(44);
180 sub->SetId(id3);
181 loc2.SetMix().Set().push_back(sub);
182 }}
183
184 CIdMapper mapper;
185 mapper.AddMapping(loc1, loc2);
186
187 {{
188 CSeq_loc loc3;
189 loc3.SetPnt().SetPoint(40);
190 loc3.SetId(id1);
191
192 CRef<CSeq_loc> loc4 = mapper.Map(loc3);
193
194 CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
195 BOOST_CHECK_EQUAL(idh2, mapped_idh);
196 BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)30);
197 }}
198
199 {{
200 CSeq_loc loc3;
201 loc3.SetPnt().SetPoint(60);
202 loc3.SetId(id1);
203
204 CRef<CSeq_loc> loc4 = mapper.Map(loc3);
205
206 CSeq_id_Handle mapped_idh = CSeq_id_Handle::GetHandle(*loc4->GetId());
207 BOOST_CHECK_EQUAL(idh3, mapped_idh);
208 BOOST_CHECK_EQUAL(loc4->GetPnt().GetPoint(), (TSeqPos)4);
209 }}
210
211 {{
212 CSeq_loc loc3;
213 loc3.SetInt().SetFrom(45);
214 loc3.SetInt().SetTo(60);
215 loc3.SetId(id1);
216
217 CRef<CSeq_loc> loc4 = mapper.Map(loc3);
218
219 BOOST_CHECK_EQUAL(loc4->GetId(), (const CSeq_id*)NULL);
220 BOOST_CHECK_EQUAL(loc4->Which(), CSeq_loc::e_Packed_int);
221
222 CSeq_loc_CI loc_iter(*loc4);
223
224 /// interval 1
225 BOOST_CHECK_EQUAL(idh2, loc_iter.GetSeq_id_Handle());
226 BOOST_CHECK_EQUAL(loc_iter.GetRange(), TSeqRange(35, 45));
227 ++loc_iter;
228
229 BOOST_CHECK_EQUAL(idh3, loc_iter.GetSeq_id_Handle());
230 BOOST_CHECK_EQUAL(loc_iter.GetRange(), TSeqRange(0, 4));
231 ++loc_iter;
232
233 BOOST_CHECK( !loc_iter );
234
235 }}
236 }
237
238
239 ///
240 /// Test of built-in ID mapper
241 /// This also tests the configuration-based methods as well
242 ///
BOOST_AUTO_TEST_CASE(Test_BuiltinIdMapper)243 BOOST_AUTO_TEST_CASE(Test_BuiltinIdMapper)
244 {
245 CSeq_id id1("lcl|chr1");
246 CSeq_id_Handle idh1 = CSeq_id_Handle::GetHandle(id1);
247
248 CSeq_id id2("lcl|2");
249 CSeq_id_Handle idh2 = CSeq_id_Handle::GetHandle(id2);
250
251 CIdMapperBuiltin mapper("hg19");
252
253 {{
254 CSeq_loc loc1;
255 loc1.SetInt().SetFrom( 10);
256 loc1.SetInt().SetTo (100);
257 loc1.SetId(id1);
258
259 CSeq_id_Handle mapped_idh = mapper.Map(idh1);
260 BOOST_CHECK_EQUAL(GI_CONST(224589800), mapped_idh.GetGi());
261
262 CRef<CSeq_loc> loc2 = mapper.Map(loc1);
263 mapped_idh = CSeq_id_Handle::GetHandle(*loc2->GetId());
264 BOOST_CHECK_EQUAL(GI_CONST(224589800), mapped_idh.GetGi());
265 BOOST_CHECK_EQUAL(loc2->GetInt().GetFrom(), (TSeqPos)10);
266 BOOST_CHECK_EQUAL(loc2->GetInt().GetTo(), (TSeqPos)100);
267 }}
268
269 {{
270 CSeq_loc loc2;
271 loc2.SetInt().SetFrom( 10);
272 loc2.SetInt().SetTo (100);
273 loc2.SetId(id2);
274
275 CSeq_id_Handle mapped_idh = mapper.Map(idh2);
276 BOOST_CHECK_EQUAL(GI_CONST(224589811), mapped_idh.GetGi());
277
278 CRef<CSeq_loc> loc3 = mapper.Map(loc2);
279 mapped_idh = CSeq_id_Handle::GetHandle(*loc3->GetId());
280 BOOST_CHECK_EQUAL(GI_CONST(224589811), mapped_idh.GetGi());
281 BOOST_CHECK_EQUAL(loc3->GetInt().GetFrom(), (TSeqPos)10);
282 BOOST_CHECK_EQUAL(loc3->GetInt().GetTo(), (TSeqPos)100);
283 }}
284 }
285
286