1 /* $Id: unit_test_feature_propagate.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * Unit tests for feature propagation.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35
36 #include <corelib/ncbi_system.hpp>
37
38 // This macro should be defined before inclusion of test_boost.hpp in all
39 // "*.cpp" files inside executable except one. It is like function main() for
40 // non-Boost.Test executables is defined only in one *.cpp file - other files
41 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
42 // then test_boost.hpp will define such "main()" function for tests.
43 //
44 // Usually if your unit tests contain only one *.cpp file you should not
45 // care about this macro at all.
46 //
47 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
48
49
50 // This header must be included before all Boost.Test headers if there are any
51 #include <corelib/test_boost.hpp>
52
53 #include <objects/misc/sequence_macros.hpp>
54 #include <objects/seqset/Seq_entry.hpp>
55 #include <objects/seq/Seq_ext.hpp>
56 #include <objects/seq/Seq_literal.hpp>
57 #include <objects/seq/Seq_data.hpp>
58 #include <objects/seqalign/Seq_align.hpp>
59 #include <objects/seqalign/Dense_seg.hpp>
60 #include <objects/seqfeat/Imp_feat.hpp>
61 #include <objects/seqfeat/Cdregion.hpp>
62 #include <objects/seqfeat/Code_break.hpp>
63 #include <objects/seqloc/Seq_id.hpp>
64 #include <objects/seqloc/Seq_loc.hpp>
65 #include <objects/seqloc/Seq_interval.hpp>
66 #include <objects/seq/seqport_util.hpp>
67 #include <objmgr/object_manager.hpp>
68 #include <objmgr/scope.hpp>
69 #include <objmgr/bioseq_ci.hpp>
70 #include <objmgr/feat_ci.hpp>
71 #include <objmgr/seq_vector.hpp>
72 #include <objmgr/util/sequence.hpp>
73 #include <objmgr/util/seq_loc_util.hpp>
74 #include <objmgr/align_ci.hpp>
75
76 #include <objects/seqalign/Dense_seg.hpp>
77
78 #include <objtools/unit_test_util/unit_test_util.hpp>
79
80 #include <objtools/edit/feature_propagate.hpp>
81
82 #include <corelib/ncbiapp.hpp>
83
84 #include <common/test_assert.h> /* This header must go last */
85
86
87 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)88 BEGIN_SCOPE(objects)
89
90
91
92
93
94 NCBITEST_INIT_TREE()
95 {
96 if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
97 }
98 }
99
100 static bool s_debugMode = false;
101
NCBITEST_INIT_CMDLINE(arg_desc)102 NCBITEST_INIT_CMDLINE(arg_desc)
103 {
104 // Here we make descriptions of command line parameters that we are
105 // going to use.
106
107 arg_desc->AddFlag( "debug_mode",
108 "Debugging mode writes errors seen for each test" );
109 }
110
NCBITEST_AUTO_INIT()111 NCBITEST_AUTO_INIT()
112 {
113 // initialization function body
114
115 const CArgs& args = CNcbiApplication::Instance()->GetArgs();
116 if (args["debug_mode"]) {
117 s_debugMode = true;
118 }
119 }
120
121
CheckPropagatedLocation(const CSeq_loc & expected,const CSeq_loc & propagated)122 void CheckPropagatedLocation(const CSeq_loc& expected, const CSeq_loc& propagated)
123 {
124 BOOST_CHECK(expected.Equals(propagated));
125 }
126
127 /*
128 good1: 60
129 good2: 65
130 good3: 70
131 annot {
132 {
133 data align {
134 {
135 type global,
136 dim 3,
137 segs denseg {
138 dim 3,
139 numseg 1,
140 ids {
141 local str "good1",
142 local str "good2",
143 local str "good3"
144 },
145 starts {
146 0,
147 5,
148 10
149 },
150 lens {
151 60
152 }
153 }
154 }
155 }
156 }
157 }
158 }
159 */
160
CreateBioseqsAndAlign(size_t front_insert)161 tuple<CRef<CSeq_entry>, CRef<CSeq_align>, CRef<CSeq_entry>, CRef<CSeq_entry>, CRef<CSeq_entry> > CreateBioseqsAndAlign(size_t front_insert)
162 {
163 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
164 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
165 auto it = entry->SetSet().SetSeq_set().begin();
166 CRef<CSeq_entry> seq1 = *it;
167 ++it;
168 CRef<CSeq_entry> seq2 = *it;
169 ++it;
170 CRef<CSeq_entry> seq3 = *it;
171 return make_tuple(entry, align, seq1, seq2, seq3);
172 }
173
AddBioseqsToScope(CRef<CSeq_entry> entry)174 tuple<CBioseq_Handle, CBioseq_Handle, CBioseq_Handle, CRef<CScope> > AddBioseqsToScope(CRef<CSeq_entry> entry)
175 {
176 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
177
178 CRef<CScope> scope(new CScope(*object_manager));
179 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
180 CBioseq_CI bi(seh);
181 CBioseq_Handle bsh1 = *bi;
182 ++bi;
183 CBioseq_Handle bsh2 = *bi;
184 ++bi;
185 CBioseq_Handle bsh3 = *bi;
186
187 return make_tuple(bsh1,bsh2,bsh3, scope);
188 }
189
CreateLoc(TSeqPos from,TSeqPos to,const CSeq_id & id,bool loc_partial5,bool loc_partial3,bool is_minus_strand=false)190 CRef<CSeq_loc> CreateLoc(TSeqPos from, TSeqPos to, const CSeq_id &id, bool loc_partial5, bool loc_partial3, bool is_minus_strand = false)
191 {
192 CRef<CSeq_loc> loc(new CSeq_loc());
193 loc->SetInt().SetFrom(from);
194 loc->SetInt().SetTo(to);
195 loc->SetInt().SetId().Assign(id);
196 if (is_minus_strand) {
197 loc->SetInt().SetStrand(eNa_strand_minus);
198 }
199 loc->SetPartialStart(loc_partial5, eExtreme_Biological);
200 loc->SetPartialStop(loc_partial3, eExtreme_Biological);
201 return loc;
202 }
203
CreateTwoIntLoc(TSeqPos from1,TSeqPos to1,TSeqPos from2,TSeqPos to2,ENa_strand strand,const CSeq_id & id,bool loc_partial5,bool loc_partial3)204 CRef<CSeq_loc> CreateTwoIntLoc(TSeqPos from1, TSeqPos to1, TSeqPos from2, TSeqPos to2, ENa_strand strand, const CSeq_id &id, bool loc_partial5, bool loc_partial3)
205 {
206 CRef<CSeq_loc> loc1(new CSeq_loc());
207 loc1->SetInt().SetFrom(from1);
208 loc1->SetInt().SetTo(to1);
209 loc1->SetInt().SetId().Assign(id);
210 loc1->SetInt().SetStrand(strand);
211
212 CRef<CSeq_loc> loc2(new CSeq_loc());
213 loc2->SetInt().SetFrom(from2);
214 loc2->SetInt().SetTo(to2);
215 loc2->SetInt().SetId().Assign(id);
216 loc2->SetInt().SetStrand(strand);
217
218 CRef<CSeq_loc> loc(new CSeq_loc());
219 loc->SetMix().AddSeqLoc(*loc1);
220 loc->SetMix().AddSeqLoc(*loc2);
221 loc->SetPartialStart(loc_partial5, eExtreme_Biological);
222 loc->SetPartialStop(loc_partial3, eExtreme_Biological);
223 return loc;
224 }
225
CreateOrderedLoc(TSeqPos from1,TSeqPos to1,TSeqPos from2,TSeqPos to2,ENa_strand strand,const CSeq_id & id,bool loc_partial5,bool loc_partial3)226 CRef<CSeq_loc> CreateOrderedLoc(TSeqPos from1, TSeqPos to1, TSeqPos from2, TSeqPos to2, ENa_strand strand, const CSeq_id &id, bool loc_partial5, bool loc_partial3)
227 {
228 CRef<CSeq_loc> loc1(new CSeq_loc());
229 loc1->SetInt().SetFrom(from1);
230 loc1->SetInt().SetTo(to1);
231 loc1->SetInt().SetId().Assign(id);
232 loc1->SetInt().SetStrand(strand);
233
234 CRef<CSeq_loc> loc2(new CSeq_loc());
235 loc2->SetNull();
236
237 CRef<CSeq_loc> loc3(new CSeq_loc());
238 loc3->SetInt().SetFrom(from2);
239 loc3->SetInt().SetTo(to2);
240 loc3->SetInt().SetId().Assign(id);
241 loc3->SetInt().SetStrand(strand);
242
243 CRef<CSeq_loc> loc(new CSeq_loc());
244 loc->SetMix().AddSeqLoc(*loc1);
245 loc->SetMix().AddSeqLoc(*loc2);
246 loc->SetMix().AddSeqLoc(*loc3);
247 loc->SetPartialStart(loc_partial5, eExtreme_Biological);
248 loc->SetPartialStop(loc_partial3, eExtreme_Biological);
249 return loc;
250 }
251
CreatePointLoc(TSeqPos pos,const CSeq_id & id)252 CRef<CSeq_loc> CreatePointLoc(TSeqPos pos, const CSeq_id &id)
253 {
254 CRef<CSeq_loc> loc(new CSeq_loc());
255 loc->SetPnt().SetPoint(pos);
256 loc->SetPnt().SetId().Assign(id);
257 return loc;
258 }
259
CreateCds(CRef<CSeq_loc> main_loc,CRef<CSeq_entry> seq)260 CRef<CSeq_feat> CreateCds(CRef<CSeq_loc> main_loc, CRef<CSeq_entry> seq)
261 {
262 CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(seq, 15);
263 cds->SetData().SetCdregion();
264 cds->SetLocation().Assign(*main_loc);
265 return cds;
266 }
267
AddCodeBreak(CRef<CSeq_feat> cds,CRef<CSeq_loc> subloc)268 void AddCodeBreak(CRef<CSeq_feat> cds, CRef<CSeq_loc> subloc)
269 {
270 CRef<CCode_break> cbr(new CCode_break());
271 cbr->SetLoc().Assign(*subloc);
272 cds->SetData().SetCdregion().SetCode_break().push_back(cbr);
273 }
274
CreateTrna(CRef<CSeq_loc> main_loc,CRef<CSeq_entry> seq)275 CRef<CSeq_feat> CreateTrna(CRef<CSeq_loc> main_loc, CRef<CSeq_entry> seq)
276 {
277 CRef<CSeq_feat> trna = unit_test_util::AddMiscFeature(seq, 15);
278 trna->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
279 trna->SetLocation().Assign(*main_loc);
280 return trna;
281 }
282
AddAnticodon(CRef<CSeq_feat> trna,CRef<CSeq_loc> subloc)283 void AddAnticodon(CRef<CSeq_feat> trna, CRef<CSeq_loc> subloc)
284 {
285 trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*subloc);
286 }
287
288
289 // propagate cds without code-break from seq 1 to seq 2 and 3
TestCds(bool loc_partial5,bool loc_partial3)290 void TestCds(bool loc_partial5, bool loc_partial3)
291 {
292 size_t front_insert = 5;
293 CRef<CSeq_align> align;
294 CRef<CSeq_entry> entry, seq1, seq2, seq3;
295 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
296
297 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
298 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
299 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
300
301 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, loc_partial5, loc_partial3);
302 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
303
304 CBioseq_Handle bsh1, bsh2, bsh3;
305 CRef<CScope> scope;
306 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
307
308 CMessageListener_Basic listener;
309
310 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
311 CRef<CSeq_loc> expected_loc1 = CreateLoc(front_insert, 15+front_insert, id2, loc_partial5, loc_partial3);
312 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
313 BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
314 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
315 BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
316 BOOST_CHECK_EQUAL(listener.Count(), 0);
317
318 listener.Clear();
319
320 edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
321 CRef<CSeq_loc> expected_loc2 = CreateLoc(front_insert*2, 15+front_insert*2, id3, loc_partial5, loc_partial3);
322 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
323 BOOST_CHECK_EQUAL(new_feat2->GetData().GetSubtype(), cds->GetData().GetSubtype());
324 BOOST_CHECK(expected_loc2->Equals(new_feat2->GetLocation()));
325 BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
326 BOOST_CHECK_EQUAL(listener.Count(), 0);
327
328 listener.Clear();
329 }
330
331
332
333 // propagate cds with code-break from seq 1 to seq 2 and 3
TestCdsWithCodeBreak(bool subloc_partial5,bool subloc_partial3)334 void TestCdsWithCodeBreak(bool subloc_partial5, bool subloc_partial3)
335 {
336 size_t front_insert = 5;
337 CRef<CSeq_align> align;
338 CRef<CSeq_entry> entry, seq1, seq2, seq3;
339 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
340
341 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
342 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
343 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
344
345 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, false, false);
346 CRef<CSeq_loc> subloc = CreateLoc(3, 5, id1, subloc_partial5, subloc_partial3);
347 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
348 AddCodeBreak(cds, subloc);
349
350 CBioseq_Handle bsh1, bsh2, bsh3;
351 CRef<CScope> scope;
352 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
353
354 CMessageListener_Basic listener;
355
356 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
357 CRef<CSeq_loc> expected_subloc1 = CreateLoc(3+front_insert, 5+front_insert, id2, subloc_partial5, subloc_partial3);
358 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
359 BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), true);
360 BOOST_CHECK(expected_subloc1->Equals(new_feat1->GetData().GetCdregion().GetCode_break().front()->GetLoc()));
361 BOOST_CHECK_EQUAL(listener.Count(), 0);
362
363 listener.Clear();
364
365 edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
366 CRef<CSeq_loc> expected_subloc2 = CreateLoc(3+front_insert*2, 5+front_insert*2, id3, subloc_partial5, subloc_partial3);
367 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
368 BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), true);
369 BOOST_CHECK(expected_subloc2->Equals(new_feat2->GetData().GetCdregion().GetCode_break().front()->GetLoc()));
370 BOOST_CHECK_EQUAL(listener.Count(), 0);
371
372 listener.Clear();
373 }
374
375 // propagate cds without code-break from seq 3 to seq 1 and 2
TestCdsFromLastBioseq(bool loc_partial5,bool loc_partial3)376 void TestCdsFromLastBioseq(bool loc_partial5, bool loc_partial3)
377 {
378 size_t front_insert = 5;
379 CRef<CSeq_align> align;
380 CRef<CSeq_entry> entry, seq1, seq2, seq3;
381 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
382
383 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
384 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
385 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
386
387 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, loc_partial5, loc_partial3);
388 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
389
390 CBioseq_Handle bsh1, bsh2, bsh3;
391 CRef<CScope> scope;
392 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
393
394 CMessageListener_Basic listener;
395
396 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
397 CRef<CSeq_loc> expected_loc1 = CreateLoc(0, 5, id1, true, loc_partial3);
398 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
399 BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
400 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
401 BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
402 BOOST_CHECK_EQUAL(listener.Count(), 0);
403
404 listener.Clear();
405
406 edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
407 CRef<CSeq_loc> expected_loc2 = CreateLoc(5, 10, id2, true, loc_partial3);
408 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
409 BOOST_CHECK_EQUAL(new_feat2->GetData().GetSubtype(), cds->GetData().GetSubtype());
410 BOOST_CHECK(expected_loc2->Equals(new_feat2->GetLocation()));
411 BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
412 BOOST_CHECK_EQUAL(listener.Count(), 0);
413
414 listener.Clear();
415 }
416
417 // propagate cds with code-break from seq 3 to seq 1 and 2
TestCdsFromLastBioseqWithCodeBreak()418 void TestCdsFromLastBioseqWithCodeBreak()
419 {
420 size_t front_insert = 5;
421 CRef<CSeq_align> align;
422 CRef<CSeq_entry> entry, seq1, seq2, seq3;
423 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
424
425 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
426 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
427 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
428
429 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, false, false);
430 CRef<CSeq_loc> subloc = CreateLoc(3, 5, id3, false, false);
431 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
432 AddCodeBreak(cds, subloc);
433
434 CBioseq_Handle bsh1, bsh2, bsh3;
435 CRef<CScope> scope;
436 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
437
438 CMessageListener_Basic listener;
439
440 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
441 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
442 BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
443 BOOST_CHECK_EQUAL(listener.Count(), 1);
444 BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of translation exception"), true);
445 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_CodeBreakLocation);
446
447 listener.Clear();
448
449 edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
450 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
451 BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
452 BOOST_CHECK_EQUAL(listener.Count(), 1);
453 BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of translation exception"), true);
454 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_CodeBreakLocation);
455
456 listener.Clear();
457 }
458
459 // propagate trna with anticodon from seq 1 to seq 2 and 3
TestTrnaAnticodon(bool subloc_partial5,bool subloc_partial3)460 void TestTrnaAnticodon(bool subloc_partial5, bool subloc_partial3)
461 {
462 size_t front_insert = 5;
463 CRef<CSeq_align> align;
464 CRef<CSeq_entry> entry, seq1, seq2, seq3;
465 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
466
467 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
468 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
469 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
470
471 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, false, false);
472 CRef<CSeq_loc> subloc = CreateLoc(3, 5, id1, subloc_partial5, subloc_partial3);
473 CRef<CSeq_feat> trna = CreateTrna(main_loc, seq1);
474 AddAnticodon(trna, subloc);
475
476 CBioseq_Handle bsh1, bsh2, bsh3;
477 CRef<CScope> scope;
478 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
479
480 CMessageListener_Basic listener;
481
482 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
483 CRef<CSeq_loc> expected_subloc1 = CreateLoc(3+front_insert, 5+front_insert, id2, subloc_partial5, subloc_partial3);
484 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*trna);
485 BOOST_CHECK_EQUAL(new_feat1->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), true);
486 BOOST_CHECK(expected_subloc1->Equals(new_feat1->GetData().GetRna().GetExt().GetTRNA().GetAnticodon()));
487 BOOST_CHECK_EQUAL(listener.Count(), 0);
488
489 listener.Clear();
490
491 edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
492 CRef<CSeq_loc> expected_subloc2 = CreateLoc(3+front_insert*2, 5+front_insert*2, id3, subloc_partial5, subloc_partial3);
493 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*trna);
494 BOOST_CHECK_EQUAL(new_feat2->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), true);
495 BOOST_CHECK(expected_subloc2->Equals(new_feat2->GetData().GetRna().GetExt().GetTRNA().GetAnticodon()));
496 BOOST_CHECK_EQUAL(listener.Count(), 0);
497
498 listener.Clear();
499 }
500
501 // propagate trna with anticodon from seq 3 to seq 1 and 2
TestTrnaAnticodonFromLastBioseq()502 void TestTrnaAnticodonFromLastBioseq()
503 {
504 size_t front_insert = 5;
505 CRef<CSeq_align> align;
506 CRef<CSeq_entry> entry, seq1, seq2, seq3;
507 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
508
509 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
510 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
511 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
512
513 CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, false, false);
514 CRef<CSeq_loc> subloc = CreateLoc(3, 5, id3, false, false);
515 CRef<CSeq_feat> trna = CreateTrna(main_loc, seq1);
516 AddAnticodon(trna, subloc);
517
518 CBioseq_Handle bsh1, bsh2, bsh3;
519 CRef<CScope> scope;
520 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
521
522 CMessageListener_Basic listener;
523
524 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
525 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*trna);
526 BOOST_CHECK_EQUAL(new_feat1->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), false);
527 BOOST_CHECK_EQUAL(listener.Count(), 1);
528 BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of anticodon"), true);
529 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_AnticodonLocation);
530
531 listener.Clear();
532
533 edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
534 CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*trna);
535 BOOST_CHECK_EQUAL(new_feat2->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), false);
536 BOOST_CHECK_EQUAL(listener.Count(), 1);
537 BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of anticodon"), true);
538 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_AnticodonLocation);
539
540 listener.Clear();
541 }
542
543 // propagate cds outside of the alignment from seq 3 to seq 1
TestCdsFromLastBioseqOutsideAlign()544 void TestCdsFromLastBioseqOutsideAlign()
545 {
546 size_t front_insert = 5;
547 CRef<CSeq_align> align;
548 CRef<CSeq_entry> entry, seq1, seq2, seq3;
549 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
550
551 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
552 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
553 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
554
555 CRef<CSeq_loc> main_loc = CreateLoc(0, 5, id3, false, false);
556 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
557
558 CBioseq_Handle bsh1, bsh2, bsh3;
559 CRef<CScope> scope;
560 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
561
562 CMessageListener_Basic listener;
563
564 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
565 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
566 BOOST_CHECK(new_feat1.IsNull());
567
568 listener.Clear();
569 }
570
571 // propagate 2 exon cds with 1 exon outside of the alignment from seq 3 to seq 1
TestTwoIntCdsFromLastBioseqOutsideAlign()572 void TestTwoIntCdsFromLastBioseqOutsideAlign()
573 {
574 size_t front_insert = 5;
575 CRef<CSeq_align> align;
576 CRef<CSeq_entry> entry, seq1, seq2, seq3;
577 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
578
579 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
580 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
581 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
582
583 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(0, 5, 20, 30, eNa_strand_plus, id3, false, false);
584 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
585
586 CBioseq_Handle bsh1, bsh2, bsh3;
587 CRef<CScope> scope;
588 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
589
590 CMessageListener_Basic listener;
591
592 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
593 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
594 CRef<CSeq_loc> expected_loc1 = CreateLoc(20-front_insert*2, 30-front_insert*2, id1, true, false);
595 expected_loc1->SetInt().SetStrand(eNa_strand_plus);
596 BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
597 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
598 BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
599 BOOST_CHECK_EQUAL(listener.Count(), 0);
600
601 listener.Clear();
602 }
603
604 // propagate 2 exon cds on minus strand from seq 3 to seq 1
TestTwoIntCdsOnMinusStrand()605 void TestTwoIntCdsOnMinusStrand()
606 {
607 size_t front_insert = 5;
608 CRef<CSeq_align> align;
609 CRef<CSeq_entry> entry, seq1, seq2, seq3;
610 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
611
612 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
613 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
614 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
615
616 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(20, 30, 5, 15, eNa_strand_minus, id3, true, true);
617 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
618 CBioseq_Handle bsh1, bsh2, bsh3;
619 CRef<CScope> scope;
620 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
621 // cout << "Bad order: " << sequence::BadSeqLocSortOrder(bsh3, *main_loc) << endl;
622 CMessageListener_Basic listener;
623
624 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
625 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
626 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 20, 0, 5, eNa_strand_minus, id1, true, true);
627 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
628 BOOST_CHECK_EQUAL(listener.Count(), 0);
629
630 listener.Clear();
631 }
632
633 // test partial when the stop is cut off
TestPartialWhenCutStop(bool partial3)634 void TestPartialWhenCutStop(bool partial3)
635 {
636 size_t front_insert = 5;
637 CRef<CSeq_align> align;
638 CRef<CSeq_entry> entry, seq1, seq2, seq3;
639 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
640
641 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
642 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
643 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
644
645 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 40, eNa_strand_plus, id1, false, partial3);
646 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
647 CBioseq_Handle bsh1, bsh2, bsh3;
648 CRef<CScope> scope;
649 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
650
651 CDense_seg& denseg = align->SetSegs().SetDenseg();
652 denseg.SetNumseg(1);
653 denseg.ResetLens();
654 denseg.SetLens().push_back(30);
655 denseg.ResetStarts();
656 denseg.SetStarts().push_back(0);
657 denseg.SetStarts().push_back(0);
658 denseg.SetStarts().push_back(0);
659
660 CMessageListener_Basic listener;
661
662 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
663 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
664 /*CSeq_loc_Mapper_Options mapper_options(CSeq_loc_Mapper::fTrimMappedLocation);
665 CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper(*bsh1.GetSeqId(), *bsh2.GetSeqId(), *align, &bsh2.GetScope(), mapper_options));
666 mapper->SetMergeAll();
667 mapper->SetGapRemove();
668 mapper->SetFuzzOption(CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr);
669 CRef<CSeq_loc> new_loc = mapper->Map(cds->GetLocation());
670 new_loc->ChangeToMix();
671 cout << MSerial_AsnText << cds->GetLocation();
672 cout << MSerial_AsnText << *new_loc;
673 */
674 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 15, 20, 29, eNa_strand_plus, id2, false, true);
675 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
676 // BOOST_CHECK(expected_loc1->Equals(*new_loc));
677 BOOST_CHECK_EQUAL(listener.Count(), 0);
678 listener.Clear();
679 }
680
681 // test partial when the last interval is cut off
TestPartialWhenCutLastInterval(bool partial3)682 void TestPartialWhenCutLastInterval(bool partial3)
683 {
684 size_t front_insert = 5;
685 CRef<CSeq_align> align;
686 CRef<CSeq_entry> entry, seq1, seq2, seq3;
687 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
688
689 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
690 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
691 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
692
693 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 40, 50, eNa_strand_plus, id1, false, partial3);
694 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
695 CBioseq_Handle bsh1, bsh2, bsh3;
696 CRef<CScope> scope;
697 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
698
699 CDense_seg& denseg = align->SetSegs().SetDenseg();
700 denseg.SetNumseg(1);
701 denseg.ResetLens();
702 denseg.SetLens().push_back(30);
703 denseg.ResetStarts();
704 denseg.SetStarts().push_back(0);
705 denseg.SetStarts().push_back(0);
706 denseg.SetStarts().push_back(0);
707
708 CMessageListener_Basic listener;
709
710 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
711 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
712 /*CSeq_loc_Mapper_Options mapper_options(CSeq_loc_Mapper::fTrimMappedLocation);
713 CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper(*bsh1.GetSeqId(), *bsh2.GetSeqId(), *align, &bsh2.GetScope(), mapper_options));
714 mapper->SetMergeAll();
715 mapper->SetGapRemove();
716 mapper->SetFuzzOption(CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr);
717 CRef<CSeq_loc> new_loc = mapper->Map(cds->GetLocation());
718 cout << MSerial_AsnText << cds->GetLocation();
719 cout << MSerial_AsnText << *new_loc;
720 */
721 CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, true);
722 expected_loc1->SetInt().SetStrand(eNa_strand_plus);
723 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
724 // BOOST_CHECK(expected_loc1->Equals(*new_loc));
725 BOOST_CHECK_EQUAL(listener.Count(), 0);
726 listener.Clear();
727 }
728
729 // test partial when the start is cut off
TestPartialWhenCutStart(bool partial5)730 void TestPartialWhenCutStart(bool partial5)
731 {
732 size_t front_insert = 5;
733 CRef<CSeq_align> align;
734 CRef<CSeq_entry> entry, seq1, seq2, seq3;
735 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
736
737 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
738 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
739 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
740
741 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 25, eNa_strand_plus, id1, partial5, false);
742 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
743 CBioseq_Handle bsh1, bsh2, bsh3;
744 CRef<CScope> scope;
745 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
746
747 CDense_seg& denseg = align->SetSegs().SetDenseg();
748 denseg.SetNumseg(1);
749 denseg.ResetLens();
750 denseg.SetLens().push_back(30);
751 denseg.ResetStarts();
752 denseg.SetStarts().push_back(10);
753 denseg.SetStarts().push_back(10);
754 denseg.SetStarts().push_back(10);
755
756 CMessageListener_Basic listener;
757
758 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
759 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
760 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 15, 20, 25, eNa_strand_plus, id2, true, false);
761 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
762 BOOST_CHECK_EQUAL(listener.Count(), 0);
763 listener.Clear();
764 }
765
766 // test fuse abutting intervals
TestFuseAbuttingIntervals()767 void TestFuseAbuttingIntervals()
768 {
769 size_t front_insert = 5;
770 CRef<CSeq_align> align;
771 CRef<CSeq_entry> entry, seq1, seq2, seq3;
772 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
773
774 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
775 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
776 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
777
778 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 12, 17, 25, eNa_strand_plus, id1, false, false);
779 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
780 CBioseq_Handle bsh1, bsh2, bsh3;
781 CRef<CScope> scope;
782 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
783
784 CDense_seg& denseg = align->SetSegs().SetDenseg();
785 denseg.SetNumseg(3);
786 denseg.ResetLens();
787 denseg.SetLens().push_back(10);
788 denseg.SetLens().push_back(10);
789 denseg.SetLens().push_back(10);
790 denseg.ResetStarts();
791 denseg.SetStarts().push_back(0);
792 denseg.SetStarts().push_back(0);
793 denseg.SetStarts().push_back(0);
794 denseg.SetStarts().push_back(10);
795 denseg.SetStarts().push_back(-1);
796 denseg.SetStarts().push_back(10);
797 denseg.SetStarts().push_back(20);
798 denseg.SetStarts().push_back(10);
799 denseg.SetStarts().push_back(20);;
800
801 CMessageListener_Basic listener;
802
803 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
804 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
805 CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, false);
806 expected_loc1->SetInt().SetStrand(eNa_strand_plus);
807 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
808 BOOST_CHECK_EQUAL(listener.Count(), 0);
809 listener.Clear();
810 }
811
812 // test do not fuse abutting intervals
TestDoNotFuseAbuttingIntervals()813 void TestDoNotFuseAbuttingIntervals()
814 {
815 size_t front_insert = 5;
816 CRef<CSeq_align> align;
817 CRef<CSeq_entry> entry, seq1, seq2, seq3;
818 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
819
820 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
821 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
822 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
823
824 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 12, 17, 25, eNa_strand_plus, id1, false, false);
825 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
826 CBioseq_Handle bsh1, bsh2, bsh3;
827 CRef<CScope> scope;
828 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
829
830 CDense_seg& denseg = align->SetSegs().SetDenseg();
831 denseg.SetNumseg(3);
832 denseg.ResetLens();
833 denseg.SetLens().push_back(10);
834 denseg.SetLens().push_back(10);
835 denseg.SetLens().push_back(10);
836 denseg.ResetStarts();
837 denseg.SetStarts().push_back(0);
838 denseg.SetStarts().push_back(0);
839 denseg.SetStarts().push_back(0);
840 denseg.SetStarts().push_back(10);
841 denseg.SetStarts().push_back(-1);
842 denseg.SetStarts().push_back(10);
843 denseg.SetStarts().push_back(20);
844 denseg.SetStarts().push_back(10);
845 denseg.SetStarts().push_back(20);;
846
847 CMessageListener_Basic listener;
848
849 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
850 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
851 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 9, 10, 15, eNa_strand_plus, id2, false, false);
852 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
853 BOOST_CHECK_EQUAL(listener.Count(), 0);
854 listener.Clear();
855 }
856
857 // test extend over gaps
TestExtendOverGap()858 void TestExtendOverGap()
859 {
860 size_t front_insert = 5;
861 CRef<CSeq_align> align;
862 CRef<CSeq_entry> entry, seq1, seq2, seq3;
863 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
864
865 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
866 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
867 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
868
869 CRef<CSeq_loc> main_loc = CreateLoc(5, 25, id1, false, false);
870 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
871 CBioseq_Handle bsh1, bsh2, bsh3;
872 CRef<CScope> scope;
873 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
874
875 CDense_seg& denseg = align->SetSegs().SetDenseg();
876 denseg.SetNumseg(3);
877 denseg.ResetLens();
878 denseg.SetLens().push_back(10);
879 denseg.SetLens().push_back(10);
880 denseg.SetLens().push_back(10);
881 denseg.ResetStarts();
882 denseg.SetStarts().push_back(0);
883 denseg.SetStarts().push_back(0);
884 denseg.SetStarts().push_back(0);
885 denseg.SetStarts().push_back(10);
886 denseg.SetStarts().push_back(-1);
887 denseg.SetStarts().push_back(10);
888 denseg.SetStarts().push_back(20);
889 denseg.SetStarts().push_back(20);
890 denseg.SetStarts().push_back(20);;
891
892 CMessageListener_Basic listener;
893
894 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
895 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
896 CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 25, id2, false, false);
897 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
898 BOOST_CHECK_EQUAL(listener.Count(), 0);
899 listener.Clear();
900 }
901
902 // test do not extend over gaps
TestDoNotExtendOverGap()903 void TestDoNotExtendOverGap()
904 {
905 size_t front_insert = 5;
906 CRef<CSeq_align> align;
907 CRef<CSeq_entry> entry, seq1, seq2, seq3;
908 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
909
910 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
911 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
912 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
913
914 CRef<CSeq_loc> main_loc = CreateLoc(5, 25, id2, false, false);
915 CRef<CSeq_feat> cds = CreateCds(main_loc, seq2);
916 CBioseq_Handle bsh1, bsh2, bsh3;
917 CRef<CScope> scope;
918 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
919
920 CDense_seg& denseg = align->SetSegs().SetDenseg();
921 denseg.SetNumseg(3);
922 denseg.ResetLens();
923 denseg.SetLens().push_back(10);
924 denseg.SetLens().push_back(10);
925 denseg.SetLens().push_back(10);
926 denseg.ResetStarts();
927 denseg.SetStarts().push_back(0);
928 denseg.SetStarts().push_back(0);
929 denseg.SetStarts().push_back(0);
930 denseg.SetStarts().push_back(10);
931 denseg.SetStarts().push_back(-1);
932 denseg.SetStarts().push_back(10);
933 denseg.SetStarts().push_back(20);
934 denseg.SetStarts().push_back(20);
935 denseg.SetStarts().push_back(20);;
936
937 CMessageListener_Basic listener;
938
939 edit::CFeaturePropagator propagator1(bsh2, bsh1, *align, false, false, false, false, &listener);
940 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
941 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 9, 20, 25, eNa_strand_plus, id1, false, false);
942 expected_loc1->ResetStrand();
943 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
944 BOOST_CHECK_EQUAL(listener.Count(), 0);
945 listener.Clear();
946 }
947
948 // test ordered vs. joined locations
TestOrderedLoc()949 void TestOrderedLoc()
950 {
951 size_t front_insert = 5;
952 CRef<CSeq_align> align;
953 CRef<CSeq_entry> entry, seq1, seq2, seq3;
954 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
955
956 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
957 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
958 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
959
960 CRef<CSeq_loc> main_loc = CreateOrderedLoc(5, 15, 20, 30, eNa_strand_plus, id3, true, true);
961 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
962 CBioseq_Handle bsh1, bsh2, bsh3;
963 CRef<CScope> scope;
964 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
965 CMessageListener_Basic listener;
966
967 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
968 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
969 CRef<CSeq_loc> expected_loc1 = CreateOrderedLoc(0, 5, 10, 20, eNa_strand_plus, id1, true, true);
970 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
971 BOOST_CHECK_EQUAL(listener.Count(), 0);
972
973 listener.Clear();
974 }
975
976 // test circular topology
TestCircularTopology()977 void TestCircularTopology()
978 {
979 size_t front_insert = 5;
980 CRef<CSeq_align> align;
981 CRef<CSeq_entry> entry, seq1, seq2, seq3;
982 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
983
984 seq1->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
985 seq2->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
986 seq3->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
987
988 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
989 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
990 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
991
992 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(50, 59, 0, 5, eNa_strand_plus, id1, false, false);
993 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
994 CBioseq_Handle bsh1, bsh2, bsh3;
995 CRef<CScope> scope;
996 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
997
998 CDense_seg& denseg = align->SetSegs().SetDenseg();
999 denseg.SetNumseg(3);
1000 denseg.ResetLens();
1001 denseg.SetLens().push_back(20);
1002 denseg.SetLens().push_back(20);
1003 denseg.SetLens().push_back(20);
1004 denseg.ResetStarts();
1005 denseg.SetStarts().push_back(0);
1006 denseg.SetStarts().push_back(0);
1007 denseg.SetStarts().push_back(0);
1008 denseg.SetStarts().push_back(20);
1009 denseg.SetStarts().push_back(-1);
1010 denseg.SetStarts().push_back(-1);
1011 denseg.SetStarts().push_back(40);
1012 denseg.SetStarts().push_back(45);
1013 denseg.SetStarts().push_back(50);;
1014
1015 CMessageListener_Basic listener;
1016
1017 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
1018 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1019 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(55, 64, 0, 5, eNa_strand_plus, id2, false, false);
1020 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1021 BOOST_CHECK_EQUAL(listener.Count(), 0);
1022 listener.Clear();
1023
1024 }
1025
1026
1027
1028 // test point location inside alignment
TestPointLocInside()1029 void TestPointLocInside()
1030 {
1031 size_t front_insert = 5;
1032 CRef<CSeq_align> align;
1033 CRef<CSeq_entry> entry, seq1, seq2, seq3;
1034 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1035
1036 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1037 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1038 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1039
1040 CRef<CSeq_loc> main_loc = CreatePointLoc(15, id3);
1041 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
1042
1043 CBioseq_Handle bsh1, bsh2, bsh3;
1044 CRef<CScope> scope;
1045 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1046
1047 CMessageListener_Basic listener;
1048
1049 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
1050 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1051 CRef<CSeq_loc> expected_loc1 = CreatePointLoc(5, id1);
1052 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1053 BOOST_CHECK_EQUAL(listener.Count(), 0);
1054
1055 listener.Clear();
1056 }
1057
1058 // test point location outside alignment
TestPointLocOutside()1059 void TestPointLocOutside()
1060 {
1061 size_t front_insert = 5;
1062 CRef<CSeq_align> align;
1063 CRef<CSeq_entry> entry, seq1, seq2, seq3;
1064 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1065
1066 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1067 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1068 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1069
1070 CRef<CSeq_loc> main_loc = CreatePointLoc(5, id3);
1071 CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
1072
1073 CBioseq_Handle bsh1, bsh2, bsh3;
1074 CRef<CScope> scope;
1075 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1076
1077 CMessageListener_Basic listener;
1078
1079 edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
1080 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1081 BOOST_CHECK(new_feat1.IsNull());
1082
1083 listener.Clear();
1084 }
1085
1086 // test partial when the stop is cut off and do not extend
TestPartialWhenCutStopDoNotExtend(bool partial3)1087 void TestPartialWhenCutStopDoNotExtend(bool partial3)
1088 {
1089 size_t front_insert = 5;
1090 CRef<CSeq_align> align;
1091 CRef<CSeq_entry> entry, seq1, seq2, seq3;
1092 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1093
1094 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1095 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1096 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1097
1098 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 40, eNa_strand_plus, id1, false, partial3);
1099 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1100 CBioseq_Handle bsh1, bsh2, bsh3;
1101 CRef<CScope> scope;
1102 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1103
1104 CDense_seg& denseg = align->SetSegs().SetDenseg();
1105 denseg.SetNumseg(1);
1106 denseg.ResetLens();
1107 denseg.SetLens().push_back(30);
1108 denseg.ResetStarts();
1109 denseg.SetStarts().push_back(0);
1110 denseg.SetStarts().push_back(0);
1111 denseg.SetStarts().push_back(0);
1112
1113 CMessageListener_Basic listener;
1114
1115 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1116 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1117 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 15, 20, 29, eNa_strand_plus, id2, false, true);
1118 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1119 BOOST_CHECK_EQUAL(listener.Count(), 0);
1120 listener.Clear();
1121 }
1122
1123 // test partial when the last interval is cut off and do not extend
TestPartialWhenCutLastIntervalDoNotExtend(bool partial3)1124 void TestPartialWhenCutLastIntervalDoNotExtend(bool partial3)
1125 {
1126 size_t front_insert = 5;
1127 CRef<CSeq_align> align;
1128 CRef<CSeq_entry> entry, seq1, seq2, seq3;
1129 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1130
1131 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1132 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1133 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1134
1135 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 40, 50, eNa_strand_plus, id1, false, partial3);
1136 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1137 CBioseq_Handle bsh1, bsh2, bsh3;
1138 CRef<CScope> scope;
1139 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1140
1141 CDense_seg& denseg = align->SetSegs().SetDenseg();
1142 denseg.SetNumseg(1);
1143 denseg.ResetLens();
1144 denseg.SetLens().push_back(30);
1145 denseg.ResetStarts();
1146 denseg.SetStarts().push_back(0);
1147 denseg.SetStarts().push_back(0);
1148 denseg.SetStarts().push_back(0);
1149
1150 CMessageListener_Basic listener;
1151
1152 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1153 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1154 CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, true);
1155 expected_loc1->SetInt().SetStrand(eNa_strand_plus);
1156 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1157 BOOST_CHECK_EQUAL(listener.Count(), 0);
1158 listener.Clear();
1159 }
1160
1161 // test partial when the start is cut off and do not extend
TestPartialWhenCutStartDoNotExtend(bool partial5)1162 void TestPartialWhenCutStartDoNotExtend(bool partial5)
1163 {
1164 size_t front_insert = 5;
1165 CRef<CSeq_align> align;
1166 CRef<CSeq_entry> entry, seq1, seq2, seq3;
1167 tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1168
1169 const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1170 const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1171 const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1172
1173 CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 25, eNa_strand_plus, id1, partial5, false);
1174 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1175 CBioseq_Handle bsh1, bsh2, bsh3;
1176 CRef<CScope> scope;
1177 tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1178
1179 CDense_seg& denseg = align->SetSegs().SetDenseg();
1180 denseg.SetNumseg(1);
1181 denseg.ResetLens();
1182 denseg.SetLens().push_back(30);
1183 denseg.ResetStarts();
1184 denseg.SetStarts().push_back(10);
1185 denseg.SetStarts().push_back(10);
1186 denseg.SetStarts().push_back(10);
1187
1188 CMessageListener_Basic listener;
1189
1190 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1191 CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1192 CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 15, 20, 25, eNa_strand_plus, id2, true, false);
1193 BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1194 BOOST_CHECK_EQUAL(listener.Count(), 0);
1195 listener.Clear();
1196 }
1197
TestFeatInsideGap(bool is_minus)1198 void TestFeatInsideGap(bool is_minus)
1199 {
1200 CRef<CSeq_entry> entry(new CSeq_entry);
1201
1202 string str1 = "TCACTCTTTGAAAAAAAAAA";
1203 CRef<CSeq_entry> seq1(new CSeq_entry);
1204 CRef< CSeq_id > id1(new CSeq_id);
1205 id1->SetLocal().SetStr("seq1");
1206 seq1->SetSeq().SetId().push_back(id1);
1207 seq1->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(str1);
1208 seq1->SetSeq().SetInst().SetLength(str1.length());
1209 seq1->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1210 seq1->SetSeq().SetInst().SetMol(CSeq_inst::eMol_na);
1211 entry->SetSet().SetSeq_set().push_back(seq1);
1212
1213 string str2 = "TCACTGAAAAAAAAAA";
1214 CRef<CSeq_entry> seq2(new CSeq_entry);
1215 CRef< CSeq_id > id2(new CSeq_id);
1216 id2->SetLocal().SetStr("seq2");
1217 seq2->SetSeq().SetId().push_back(id2);
1218 seq2->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(str2);
1219 seq2->SetSeq().SetInst().SetLength(str2.length());
1220 seq2->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1221 seq2->SetSeq().SetInst().SetMol(CSeq_inst::eMol_na);
1222 entry->SetSet().SetSeq_set().push_back(seq2);
1223
1224 CRef<CSeq_align> align(new CSeq_align());
1225 align->SetType(objects::CSeq_align::eType_global);
1226 align->SetDim(entry->GetSet().GetSeq_set().size());
1227 align->SetSegs().SetDenseg().SetIds().push_back(id1);
1228 align->SetSegs().SetDenseg().SetIds().push_back(id2);
1229
1230 auto& denseg = align->SetSegs().SetDenseg();
1231 denseg.SetNumseg(3);
1232 denseg.SetLens().push_back(5);
1233 denseg.SetLens().push_back(4);
1234 denseg.SetLens().push_back(11);
1235 denseg.SetDim(entry->GetSet().GetSeq_set().size());
1236 denseg.SetStarts().push_back(0);
1237 denseg.SetStarts().push_back(0);
1238 denseg.SetStarts().push_back(5);
1239 denseg.SetStarts().push_back(-1);
1240 denseg.SetStarts().push_back(9);
1241 denseg.SetStarts().push_back(5);
1242
1243 CRef<CSeq_annot> annot(new CSeq_annot());
1244 annot->SetData().SetAlign().push_back(align);
1245 entry->SetSet().SetAnnot().push_back(annot);
1246
1247 CRef<CSeq_loc> main_loc = CreateLoc(6, 7, *id1, false, false, is_minus);
1248 CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1249
1250 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1251 CRef<CScope> scope(new CScope(*object_manager));
1252 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1253 CBioseq_CI bi(seh);
1254 CBioseq_Handle bsh1 = *bi;
1255 ++bi;
1256 CBioseq_Handle bsh2 = *bi;
1257
1258 CMessageListener_Basic listener;
1259
1260 edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
1261 CRef<CSeq_feat> new_feat = propagator1.Propagate(*cds);
1262 BOOST_CHECK(new_feat.IsNull());
1263 BOOST_CHECK_EQUAL(listener.Count(), 1);
1264
1265 listener.Clear();
1266 }
1267
BOOST_AUTO_TEST_CASE(Test_FeaturePropagation)1268 BOOST_AUTO_TEST_CASE(Test_FeaturePropagation)
1269 {
1270 TestCds(false, false);
1271 TestCds(false, true);
1272 TestCds(true, false);
1273 TestCds(true, true);
1274
1275 TestCdsWithCodeBreak(false, false);
1276 TestCdsWithCodeBreak(false, true);
1277 TestCdsWithCodeBreak(true, false);
1278 TestCdsWithCodeBreak(true, true);
1279
1280 TestCdsFromLastBioseq(false, false);
1281 TestCdsFromLastBioseq(false, true);
1282 TestCdsFromLastBioseq(true, false);
1283 TestCdsFromLastBioseq(true, true);
1284
1285 TestCdsFromLastBioseqWithCodeBreak();
1286
1287 TestTrnaAnticodon(false, false);
1288 TestTrnaAnticodon(false, true);
1289 TestTrnaAnticodon(true, false);
1290 TestTrnaAnticodon(true, true);
1291
1292 TestTrnaAnticodonFromLastBioseq();
1293
1294 TestCdsFromLastBioseqOutsideAlign();
1295
1296 TestTwoIntCdsFromLastBioseqOutsideAlign();
1297
1298 TestTwoIntCdsOnMinusStrand();
1299
1300 TestPartialWhenCutStop(false);
1301 TestPartialWhenCutStop(true);
1302 TestPartialWhenCutLastInterval(false);
1303 TestPartialWhenCutLastInterval(true);
1304 TestPartialWhenCutStart(false);
1305 TestPartialWhenCutStart(true);
1306
1307 TestFuseAbuttingIntervals();
1308 TestDoNotFuseAbuttingIntervals();
1309 TestExtendOverGap();
1310 TestDoNotExtendOverGap();
1311 TestOrderedLoc();
1312 TestCircularTopology();
1313 TestPointLocInside();
1314 TestPointLocOutside();
1315
1316 TestPartialWhenCutStopDoNotExtend(false);
1317 TestPartialWhenCutStopDoNotExtend(true);
1318 TestPartialWhenCutLastIntervalDoNotExtend(false);
1319 TestPartialWhenCutLastIntervalDoNotExtend(true);
1320 TestPartialWhenCutStartDoNotExtend(false);
1321 TestPartialWhenCutStartDoNotExtend(true);
1322
1323 TestFeatInsideGap(false);
1324 TestFeatInsideGap(true);
1325 }
1326
1327
CheckPropagatedCDSLocation(CSeq_entry & entry,const CSeq_feat & cds,bool stop_at_stop,bool fix_partials,const vector<CRef<CSeq_loc>> & expected_loc)1328 void CheckPropagatedCDSLocation(CSeq_entry& entry, const CSeq_feat& cds,
1329 bool stop_at_stop, bool fix_partials,
1330 const vector<CRef<CSeq_loc> >& expected_loc)
1331 {
1332 CRef<CSeq_align> align = entry.SetSet().SetAnnot().front()->SetData().SetAlign().front();
1333 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1334
1335 CRef<CScope> scope(new CScope(*object_manager));
1336 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (entry);
1337
1338 CBioseq_CI b(seh);
1339 CBioseq_Handle src = *b;
1340 ++b;
1341 size_t offset = 0;
1342 while (b) {
1343 CMessageListener_Basic listener;
1344 edit::CFeaturePropagator propagator(src, *b, *align, stop_at_stop, fix_partials, true, true, &listener);
1345
1346 CRef<CSeq_feat> new_feat = propagator.Propagate(cds);
1347 BOOST_CHECK_EQUAL(new_feat->GetData().GetSubtype(), CSeqFeatData::eSubtype_cdregion);
1348 CheckPropagatedLocation(*(expected_loc[offset]), new_feat->GetLocation());
1349 BOOST_CHECK_EQUAL(listener.Count(), 0);
1350 listener.Clear();
1351 offset++;
1352 ++b;
1353 }
1354
1355 }
1356
1357
InsertStop(CBioseq & seq,size_t pos)1358 void InsertStop(CBioseq& seq, size_t pos)
1359 {
1360 string na = seq.GetInst().GetSeq_data().GetIupacna();
1361 string before = na.substr(0, pos);
1362 string after = na.substr(pos + 3);
1363 na = before + "TAA" + after;
1364 seq.SetInst().SetSeq_data().SetIupacna().Set(na);
1365 }
1366
1367
BOOST_AUTO_TEST_CASE(Test_CdRegionAlterations)1368 BOOST_AUTO_TEST_CASE(Test_CdRegionAlterations)
1369 {
1370 size_t front_insert = 5;
1371 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1372
1373 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1374
1375 CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(first, 15);
1376 cds->SetData().SetCdregion();
1377
1378 // for this test, there are no stops
1379 vector<CRef<CSeq_loc> > expected_loc;
1380
1381 CRef<CSeq_loc> loc1(new CSeq_loc());
1382 loc1->SetInt().SetFrom(front_insert);
1383 loc1->SetInt().SetTo(15 + front_insert);
1384 loc1->SetInt().SetId().SetLocal().SetStr("good2");
1385 loc1->SetPartialStart(false, eExtreme_Biological);
1386 loc1->SetPartialStop(false, eExtreme_Biological);
1387 expected_loc.push_back(loc1);
1388
1389 CRef<CSeq_loc> loc2(new CSeq_loc());
1390 loc2->SetInt().SetFrom(front_insert * 2);
1391 loc2->SetInt().SetTo(15 + front_insert * 2);
1392 loc2->SetInt().SetId().SetLocal().SetStr("good3");
1393 loc2->SetPartialStart(false, eExtreme_Biological);
1394 loc2->SetPartialStop(false, eExtreme_Biological);
1395 expected_loc.push_back(loc2);
1396
1397 CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1398 loc1->SetPartialStart(true, eExtreme_Biological);
1399 loc2->SetPartialStart(true, eExtreme_Biological);
1400 loc1->SetPartialStop(true, eExtreme_Biological);
1401 loc2->SetPartialStop(true, eExtreme_Biological);
1402 CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1403 CheckPropagatedCDSLocation(*entry, *cds, false, true, expected_loc);
1404
1405 loc1->SetPartialStart(false, eExtreme_Biological);
1406 loc2->SetPartialStart(false, eExtreme_Biological);
1407 loc1->SetPartialStop(false, eExtreme_Biological);
1408 loc2->SetPartialStop(false, eExtreme_Biological);
1409
1410
1411 // repeat test with stops inserted for extension
1412 size_t offset = 0;
1413 for (auto s : entry->SetSet().SetSeq_set()) {
1414 if (offset > 0) {
1415 InsertStop(s->SetSeq(), 15 + (front_insert * offset) + 6);
1416 }
1417 offset++;
1418 }
1419 loc1->SetInt().SetTo(15 + front_insert + 8);
1420 loc2->SetInt().SetTo(15 + 2 * front_insert + 8);
1421
1422 CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1423 loc1->SetPartialStart(true, eExtreme_Biological);
1424 loc2->SetPartialStart(true, eExtreme_Biological);
1425 CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1426 loc1->SetPartialStart(false, eExtreme_Biological);
1427 loc2->SetPartialStart(false, eExtreme_Biological);
1428
1429
1430 // repeat test with stops inserted for truncation
1431 offset = 0;
1432 for (auto s : entry->SetSet().SetSeq_set()) {
1433 if (offset > 0) {
1434 // need to make three stop codons, frame will go to be the longest one
1435 InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 14);
1436 InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 10);
1437 InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 6);
1438 }
1439 offset++;
1440 }
1441 loc1->SetInt().SetTo(15 + front_insert - 4);
1442 loc2->SetInt().SetTo(15 + 2 * front_insert - 4);
1443
1444 CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1445 loc1->SetPartialStart(true, eExtreme_Biological);
1446 loc2->SetPartialStart(true, eExtreme_Biological);
1447 CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1448 }
1449
1450
ImproveAlignment(CSeq_align & align,size_t front_insert)1451 void ImproveAlignment(CSeq_align& align, size_t front_insert)
1452 {
1453 CDense_seg& denseg = align.SetSegs().SetDenseg();
1454 denseg.SetNumseg(3);
1455 denseg.ResetLens();
1456 denseg.SetLens().push_back(front_insert);
1457 denseg.SetLens().push_back(front_insert);
1458 denseg.SetLens().push_back(60);
1459 denseg.ResetStarts();
1460 denseg.SetStarts().push_back(-1);
1461 denseg.SetStarts().push_back(-1);
1462 denseg.SetStarts().push_back(0);
1463 denseg.SetStarts().push_back(-1);
1464 denseg.SetStarts().push_back(0);
1465 denseg.SetStarts().push_back(front_insert);
1466 denseg.SetStarts().push_back(0);
1467 denseg.SetStarts().push_back(front_insert);
1468 denseg.SetStarts().push_back(front_insert * 2);
1469 }
1470
1471 // TODO? Bad alignment!
1472
BOOST_AUTO_TEST_CASE(Test_PropagateAll)1473 BOOST_AUTO_TEST_CASE(Test_PropagateAll)
1474 {
1475 size_t front_insert = 10;
1476 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1477 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1478 // make a better alignment, with some sequences in the gap at the front
1479 ImproveAlignment(*align, front_insert);
1480
1481 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1482
1483 // will not be able to propagate the first feature to either of the
1484 // other sequences.
1485 // second feature can only be propagated to the middle sequence.
1486 // third feature can be propagated to all.
1487
1488 CRef<CSeq_feat> misc1 = unit_test_util::AddMiscFeature(last, front_insert - 1);
1489 CRef<CSeq_feat> misc2 = unit_test_util::AddMiscFeature(last, (2 * front_insert) - 1);
1490 CRef<CSeq_feat> misc3 = unit_test_util::AddMiscFeature(last, 4 * front_insert);
1491
1492 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1493
1494 CRef<CScope> scope(new CScope(*object_manager));
1495 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1496
1497 CBioseq_CI b1(seh);
1498 ++b1;
1499 ++b1;
1500 CBioseq_Handle src = *b1;
1501
1502 CBioseq_CI b(seh);
1503
1504 CMessageListener_Basic listener;
1505 edit::CFeaturePropagator propagator1(src, *b, *align, false, false, true, true, &listener);
1506 vector<CRef<CSeq_feat> > first_feats = propagator1.PropagateAll();
1507 BOOST_CHECK_EQUAL(first_feats.size(), 1);
1508 BOOST_CHECK_EQUAL(listener.Count(), 2);
1509 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-20 to lcl|good1");
1510 BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good1");
1511 listener.Clear();
1512
1513 ++b;
1514 edit::CFeaturePropagator propagator2(src, *b, *align, false, false, true, true, &listener);
1515 vector<CRef<CSeq_feat> > second_feats = propagator2.PropagateAll();
1516 BOOST_CHECK_EQUAL(second_feats.size(), 2);
1517 BOOST_CHECK_EQUAL(listener.Count(), 1);
1518 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good2");
1519 }
1520
BOOST_AUTO_TEST_CASE(Test_PropagateAllReportFailures)1521 BOOST_AUTO_TEST_CASE(Test_PropagateAllReportFailures)
1522 {
1523 size_t front_insert = 10;
1524 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1525 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1526 // make a better alignment, with some sequences in the gap at the front
1527 ImproveAlignment(*align, front_insert);
1528
1529 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1530
1531 // will not be able to propagate the first feature to either of the
1532 // other sequences.
1533 // second feature can only be propagated to the middle sequence.
1534 // third feature can be propagated to all.
1535
1536 CRef<CSeq_feat> misc1 = unit_test_util::AddMiscFeature(last, front_insert - 1);
1537 CRef<CSeq_feat> misc2 = unit_test_util::AddMiscFeature(last, (2 * front_insert) - 1);
1538 CRef<CSeq_feat> misc3 = unit_test_util::AddMiscFeature(last, 4 * front_insert);
1539
1540 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1541
1542 CRef<CScope> scope(new CScope(*object_manager));
1543 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1544
1545 CBioseq_CI b1(seh);
1546 ++b1;
1547 ++b1;
1548 CBioseq_Handle src = *b1;
1549
1550 CBioseq_CI b(seh);
1551
1552 CMessageListener_Basic listener;
1553 edit::CFeaturePropagator propagator1(src, *b, *align, false, false, true, true, &listener);
1554 vector<CConstRef<CSeq_feat> > failures1;
1555 vector<CRef<CSeq_feat> > first_feats = propagator1.PropagateAllReportFailures(failures1);
1556 BOOST_CHECK_EQUAL(first_feats.size(), 1);
1557 BOOST_CHECK_EQUAL(listener.Count(), 2);
1558 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-20 to lcl|good1");
1559 BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good1");
1560 listener.Clear();
1561
1562 ++b;
1563 edit::CFeaturePropagator propagator2(src, *b, *align, false, false, true, true, &listener);
1564 vector<CConstRef<CSeq_feat> > failures2;
1565 vector<CRef<CSeq_feat> > second_feats = propagator2.PropagateAllReportFailures(failures2);
1566 BOOST_CHECK_EQUAL(second_feats.size(), 2);
1567 BOOST_CHECK_EQUAL(listener.Count(), 1);
1568 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good2");
1569 }
1570
s_FindHighestFeatId(const CSeq_entry_Handle entry)1571 CObject_id::TId s_FindHighestFeatId(const CSeq_entry_Handle entry)
1572 {
1573 CObject_id::TId id = 0;
1574 for (CFeat_CI feat_it(entry); feat_it; ++feat_it) {
1575 if (feat_it->IsSetId()) {
1576 const CFeat_id& feat_id = feat_it->GetId();
1577 if (feat_id.IsLocal() && feat_id.GetLocal().IsId() && feat_id.GetLocal().GetId() > id) {
1578 id = feat_id.GetLocal().GetId();
1579 }
1580 }
1581 }
1582 return id;
1583 }
1584
GetGoodSeqEntryWithFeatureIds(int & feat_id)1585 CSeq_entry_Handle GetGoodSeqEntryWithFeatureIds(int& feat_id)
1586 {
1587 size_t front_insert = 5;
1588 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1589 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1590
1591 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1592 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1593
1594 CRef<CSeq_loc> main_loc(new CSeq_loc());
1595 main_loc->SetInt().SetFrom(0);
1596 main_loc->SetInt().SetTo(15);
1597 main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1598
1599 CRef<CSeq_loc> subloc(new CSeq_loc());
1600 subloc->SetInt().SetFrom(3);
1601 subloc->SetInt().SetTo(5);
1602 subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1603
1604 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1605 gene->SetData().SetGene().SetLocus("gene locus");
1606 gene->SetId().SetLocal().SetId(++feat_id);
1607 gene->SetLocation().Assign(*main_loc);
1608
1609 CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1610 mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1611 mrna->SetId().SetLocal().SetId(++feat_id);
1612 mrna->SetLocation().Assign(*main_loc);
1613
1614 CRef<CSeq_feat> cds_withoutprot = unit_test_util::AddMiscFeature(first, 15);
1615 cds_withoutprot->SetComment("CDS without product");
1616 cds_withoutprot->SetData().SetCdregion();
1617 cds_withoutprot->SetId().SetLocal().SetId(++feat_id);
1618 cds_withoutprot->SetLocation().SetInt().SetFrom(10);
1619 cds_withoutprot->SetLocation().SetInt().SetTo(25);
1620 cds_withoutprot->SetLocation().SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1621
1622 CRef<CSeq_feat> cds_withprot = unit_test_util::MakeMiscFeature(unit_test_util::IdFromEntry(first), 15);
1623 cds_withprot->SetComment("CDS with product");
1624 cds_withprot->SetData().SetCdregion();
1625 cds_withprot->SetId().SetLocal().SetId(++feat_id);
1626 cds_withprot->SetLocation().Assign(*main_loc);
1627
1628 // constructing the protein sequence
1629 CRef<CSeq_entry> prot_entry(new CSeq_entry());
1630 prot_entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1631 prot_entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
1632 prot_entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("-WPKL");
1633 prot_entry->SetSeq().SetInst().SetLength(5);
1634
1635 const string prot_id = "good1_1";
1636 CRef<CSeq_id> id(new CSeq_id());
1637 id->SetLocal().SetStr(prot_id);
1638 prot_entry->SetSeq().SetId().push_back(id);
1639
1640 CRef<CSeqdesc> mdesc(new CSeqdesc());
1641 mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
1642 prot_entry->SetSeq().SetDescr().Set().push_back(mdesc);
1643
1644 CRef<CSeq_feat> prot_feat(new CSeq_feat());
1645 prot_feat->SetData().SetProt().SetName().push_back("hypothetical protein");
1646 prot_feat->SetLocation().SetInt().SetId().Assign(*(prot_entry->GetSeq().GetId().front()));
1647 prot_feat->SetLocation().SetInt().SetFrom(0);
1648 prot_feat->SetLocation().SetInt().SetTo(prot_entry->GetSeq().GetInst().GetLength() - 1);
1649 prot_feat->SetId().SetLocal().SetId(++feat_id);
1650 unit_test_util::AddFeat(prot_feat, prot_entry);
1651
1652 cds_withprot->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
1653
1654 CRef<CBioseq_set> set(new CBioseq_set());
1655 set->SetClass(CBioseq_set::eClass_nuc_prot);
1656 set->SetSeq_set().push_back(first);
1657 set->SetSeq_set().push_back(prot_entry);
1658 CRef<CSeq_entry> set_entry(new CSeq_entry());
1659 set_entry->SetSet(*set);
1660
1661 unit_test_util::AddFeat(cds_withprot, set_entry);
1662
1663 auto it = entry->SetSet().SetSeq_set().begin();
1664 it = entry->SetSet().SetSeq_set().erase(it);
1665
1666 entry->SetSet().SetSeq_set().insert(it, set_entry);
1667
1668 //cout << MSerial_AsnText << *entry << endl;
1669
1670 // add entry to the scope
1671 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1672 CRef<CScope> scope(new CScope(*object_manager));
1673 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1674
1675 return seh;
1676 }
1677
1678
BOOST_AUTO_TEST_CASE(Test_PropagateFeatsTo2Sequences_UsingFeatureIds)1679 BOOST_AUTO_TEST_CASE(Test_PropagateFeatsTo2Sequences_UsingFeatureIds)
1680 {
1681 int feat_id = 0;
1682 CSeq_entry_Handle seh = GetGoodSeqEntryWithFeatureIds(feat_id);
1683 CScope& scope = seh.GetScope();
1684
1685 BOOST_CHECK(feat_id == 5);
1686 BOOST_TEST_MESSAGE("A set containing " + NStr::IntToString(feat_id) + " five features");
1687 CFeat_CI gene_it(seh, SAnnotSelector(CSeqFeatData::e_Gene));
1688 CConstRef<CSeq_feat> gene = gene_it->GetOriginalSeq_feat();
1689 CFeat_CI mrna_it(seh, SAnnotSelector(CSeqFeatData::eSubtype_mRNA));
1690 CConstRef<CSeq_feat> mrna = mrna_it->GetOriginalSeq_feat();
1691 CFeat_CI cds_it(seh, SAnnotSelector(CSeqFeatData::e_Cdregion));
1692 CConstRef<CSeq_feat> cds_withoutprot;
1693 CConstRef<CSeq_feat> cds_withprot;
1694 CConstRef<CSeq_feat> protein;
1695 for (; cds_it; ++cds_it) {
1696 if (cds_it->IsSetProduct()) {
1697 cds_withprot = cds_it->GetOriginalSeq_feat();
1698 CFeat_CI prot_it(scope.GetBioseqHandle(cds_it->GetProduct()));
1699 protein = prot_it->GetOriginalSeq_feat();
1700 }
1701 else {
1702 cds_withoutprot = cds_it->GetOriginalSeq_feat();
1703 }
1704 }
1705
1706 BOOST_CHECK(!gene.IsNull());
1707 BOOST_CHECK(!mrna.IsNull());
1708 BOOST_CHECK(!cds_withoutprot.IsNull());
1709 BOOST_CHECK(!cds_withprot.IsNull());
1710 BOOST_CHECK(!protein.IsNull());
1711
1712 CAlign_CI align_it(seh);
1713 CConstRef<CSeq_align> align(&*align_it);
1714 BOOST_CHECK(!align.IsNull());
1715
1716 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1717 BOOST_CHECK(maxFeatId == feat_id);
1718
1719 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1720 CBioseq_Handle src_bseq = *b_iter;
1721 CBioseq_Handle target_bseq1 = *(++b_iter);
1722 CBioseq_Handle target_bseq2 = *(++b_iter);
1723
1724
1725 BOOST_TEST_MESSAGE("Propagating to the second sequence");
1726 CMessageListener_Basic listener;
1727 edit::CFeaturePropagator propagator1(src_bseq, target_bseq1, *align, true, true, true, true, &listener, &maxFeatId);
1728 CRef<CSeq_feat> propagated_gene1 = propagator1.Propagate(*gene);
1729
1730 BOOST_CHECK_EQUAL(listener.Count(), 0);
1731 BOOST_CHECK(propagated_gene1->IsSetId());
1732 BOOST_CHECK(propagated_gene1->GetId().GetLocal().GetId() == (++feat_id));
1733
1734 CRef<CSeq_feat> propagated_mrna1 = propagator1.Propagate(*mrna);
1735 BOOST_CHECK_EQUAL(listener.Count(), 0);
1736 BOOST_CHECK(propagated_mrna1->IsSetId());
1737 BOOST_CHECK(propagated_mrna1->GetId().GetLocal().GetId() == ++feat_id);
1738
1739 CRef<CSeq_feat> propagated_cds_woprot1 = propagator1.Propagate(*cds_withoutprot);
1740 BOOST_CHECK_EQUAL(listener.Count(), 0);
1741 BOOST_CHECK(!propagated_cds_woprot1->IsSetProduct());
1742 BOOST_CHECK(propagated_cds_woprot1->IsSetId());
1743 BOOST_CHECK(propagated_cds_woprot1->GetId().GetLocal().GetId() == ++feat_id);
1744
1745 CRef<CSeq_feat> propagated_cds_wprot1 = propagator1.Propagate(*cds_withprot);
1746 BOOST_CHECK_EQUAL(listener.Count(), 0);
1747 BOOST_CHECK(!propagated_cds_wprot1->IsSetProduct()); // this cds also does not have a product at this point
1748 BOOST_CHECK(propagated_cds_wprot1->IsSetId());
1749 BOOST_CHECK(propagated_cds_wprot1->GetId().GetLocal().GetId() == ++feat_id);
1750
1751 CRef<CSeq_feat> propagated_prot1 = propagator1.ConstructProteinFeatureForPropagatedCodingRegion(*cds_withprot, *propagated_cds_wprot1);
1752 BOOST_CHECK(propagated_prot1->IsSetId());
1753 BOOST_CHECK(propagated_prot1->GetId().GetLocal().GetId() == ++feat_id);
1754 listener.Clear();
1755
1756 BOOST_TEST_MESSAGE("Propagating to the third sequence");
1757 edit::CFeaturePropagator propagator2(src_bseq, target_bseq2, *align, true, true, true, true, &listener, &maxFeatId);
1758 CRef<CSeq_feat> propagated_gene2 = propagator2.Propagate(*gene);
1759 BOOST_CHECK_EQUAL(listener.Count(), 0);
1760 BOOST_CHECK(propagated_gene2->IsSetId());
1761 BOOST_CHECK(propagated_gene2->GetId().GetLocal().GetId() == (++feat_id));
1762
1763 CRef<CSeq_feat> propagated_mrna2 = propagator2.Propagate(*mrna);
1764 BOOST_CHECK_EQUAL(listener.Count(), 0);
1765 BOOST_CHECK(propagated_mrna2->IsSetId());
1766 BOOST_CHECK(propagated_mrna2->GetId().GetLocal().GetId() == ++feat_id);
1767 listener.Clear();
1768
1769 CRef<CSeq_feat> propagated_cds_woprot2 = propagator2.Propagate(*cds_withoutprot);
1770 BOOST_CHECK_EQUAL(listener.Count(), 0);
1771 BOOST_CHECK(!propagated_cds_woprot2->IsSetProduct());
1772 BOOST_CHECK(propagated_cds_woprot2->IsSetId());
1773 BOOST_CHECK(propagated_cds_woprot2->GetId().GetLocal().GetId() == ++feat_id);
1774
1775 CRef<CSeq_feat> propagated_cds_wprot2 = propagator2.Propagate(*cds_withprot);
1776 BOOST_CHECK_EQUAL(listener.Count(), 0);
1777 BOOST_CHECK(!propagated_cds_wprot2->IsSetProduct());
1778 BOOST_CHECK(propagated_cds_wprot2->IsSetId());
1779 BOOST_CHECK(propagated_cds_wprot2->GetId().GetLocal().GetId() == ++feat_id);
1780
1781 CRef<CSeq_feat> propagated_prot2 = propagator2.ConstructProteinFeatureForPropagatedCodingRegion(*cds_withprot, *propagated_cds_wprot2);
1782 BOOST_CHECK(propagated_prot2->IsSetId());
1783 BOOST_CHECK(propagated_prot2->GetId().GetLocal().GetId() == ++feat_id);
1784 listener.Clear();
1785 }
1786
1787
BOOST_AUTO_TEST_CASE(Test_PropagateAllFeatures_UsingFeatureIds)1788 BOOST_AUTO_TEST_CASE(Test_PropagateAllFeatures_UsingFeatureIds)
1789 {
1790 int feat_id = 0;
1791 CSeq_entry_Handle seh = GetGoodSeqEntryWithFeatureIds(feat_id);
1792 CScope& scope = seh.GetScope();
1793
1794 BOOST_CHECK(feat_id == 5);
1795 BOOST_TEST_MESSAGE("A set containing " + NStr::IntToString(feat_id) + " five features");
1796 CFeat_CI gene_it(seh, SAnnotSelector(CSeqFeatData::e_Gene));
1797 CConstRef<CSeq_feat> gene = gene_it->GetOriginalSeq_feat();
1798 CFeat_CI mrna_it(seh, SAnnotSelector(CSeqFeatData::eSubtype_mRNA));
1799 CConstRef<CSeq_feat> mrna = mrna_it->GetOriginalSeq_feat();
1800 CFeat_CI cds_it(seh, SAnnotSelector(CSeqFeatData::e_Cdregion));
1801 CConstRef<CSeq_feat> cds_withoutprot;
1802 CConstRef<CSeq_feat> cds_withprot;
1803 CConstRef<CSeq_feat> protein;
1804 for (; cds_it; ++cds_it) {
1805 if (cds_it->IsSetProduct()) {
1806 cds_withprot = cds_it->GetOriginalSeq_feat();
1807 CFeat_CI prot_it(scope.GetBioseqHandle(cds_it->GetProduct()));
1808 protein = prot_it->GetOriginalSeq_feat();
1809 }
1810 else {
1811 cds_withoutprot = cds_it->GetOriginalSeq_feat();
1812 }
1813 }
1814
1815 BOOST_CHECK(!gene.IsNull());
1816 BOOST_CHECK(!mrna.IsNull());
1817 BOOST_CHECK(!cds_withoutprot.IsNull());
1818 BOOST_CHECK(!cds_withprot.IsNull());
1819 BOOST_CHECK(!protein.IsNull());
1820
1821 CAlign_CI align_it(seh);
1822 CConstRef<CSeq_align> align(&*align_it);
1823 BOOST_CHECK(!align.IsNull());
1824
1825 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1826 BOOST_CHECK(maxFeatId == feat_id);
1827
1828 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1829 CBioseq_Handle src_bseq = *b_iter;
1830 CBioseq_Handle target_bseq = *(++b_iter);
1831
1832
1833 CMessageListener_Basic listener;
1834 edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1835 vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateAll();
1836 BOOST_CHECK_EQUAL(listener.Count(), 0);
1837
1838 BOOST_CHECK(propagated_feats.size() == feat_id - 1); // it's 'feat_id-1' because the protein is not propagated
1839 for (auto& it : propagated_feats) {
1840 BOOST_CHECK(it->IsSetId());
1841 BOOST_CHECK(it->GetId().GetLocal().GetId() == (++feat_id));
1842 }
1843 listener.Clear();
1844 }
1845
CreateXRefLink(CSeq_feat & from_feat,CSeq_feat & to_feat)1846 void CreateXRefLink(CSeq_feat& from_feat, CSeq_feat& to_feat)
1847 {
1848 CRef<CSeqFeatXref> xref(new CSeqFeatXref);
1849 xref->SetId(to_feat.SetId());
1850 from_feat.SetXref().push_back(xref);
1851 }
1852
BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs)1853 BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs)
1854 {
1855 size_t front_insert = 5;
1856 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1857 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1858
1859 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1860 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1861
1862 CRef<CSeq_loc> main_loc(new CSeq_loc());
1863 main_loc->SetInt().SetFrom(0);
1864 main_loc->SetInt().SetTo(15);
1865 main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1866
1867 CRef<CSeq_loc> subloc(new CSeq_loc());
1868 subloc->SetInt().SetFrom(3);
1869 subloc->SetInt().SetTo(5);
1870 subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1871
1872 int feat_id = 0;
1873 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1874 gene->SetData().SetGene().SetLocus("gene locus");
1875 gene->SetId().SetLocal().SetId(++feat_id);
1876 gene->SetLocation().Assign(*main_loc);
1877
1878 CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1879 mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1880 mrna->SetId().SetLocal().SetId(++feat_id);
1881 mrna->SetLocation().Assign(*main_loc);
1882
1883 CreateXRefLink(*mrna, *gene);
1884
1885 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1886 CRef<CScope> scope(new CScope(*object_manager));
1887 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1888
1889 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1890
1891 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1892 CBioseq_Handle src_bseq = *b_iter;
1893 CBioseq_Handle target_bseq = *(++b_iter);
1894
1895 BOOST_TEST_MESSAGE("When both mrna and gene are propagated");
1896 CMessageListener_Basic listener;
1897 edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1898 vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ gene, mrna });
1899 BOOST_CHECK_EQUAL(listener.Count(), 0);
1900
1901 auto prop_gene = propagated_feats.front();
1902 BOOST_CHECK(prop_gene->IsSetId());
1903 BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
1904 BOOST_CHECK(!prop_gene->IsSetXref());
1905
1906 auto prop_mrna = propagated_feats.back();
1907 BOOST_CHECK(prop_mrna->IsSetId());
1908 BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
1909 BOOST_TEST_MESSAGE("the Xref is also propagated");
1910 BOOST_CHECK(prop_mrna->IsSetXref());
1911 CSeqFeatXref xref;
1912 xref.SetId(prop_gene->SetId());
1913 BOOST_CHECK(prop_mrna->HasSeqFeatXref(xref.GetId()));
1914 listener.Clear();
1915 }
1916
BOOST_AUTO_TEST_CASE(Test_Propagate1FeatureWithXrefs)1917 BOOST_AUTO_TEST_CASE(Test_Propagate1FeatureWithXrefs)
1918 {
1919 size_t front_insert = 5;
1920 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1921 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1922
1923 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1924 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1925
1926 CRef<CSeq_loc> main_loc(new CSeq_loc());
1927 main_loc->SetInt().SetFrom(0);
1928 main_loc->SetInt().SetTo(15);
1929 main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1930
1931 CRef<CSeq_loc> subloc(new CSeq_loc());
1932 subloc->SetInt().SetFrom(3);
1933 subloc->SetInt().SetTo(5);
1934 subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1935
1936 int feat_id = 0;
1937 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1938 gene->SetData().SetGene().SetLocus("gene locus");
1939 gene->SetId().SetLocal().SetId(++feat_id);
1940 gene->SetLocation().Assign(*main_loc);
1941
1942 CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1943 mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1944 mrna->SetId().SetLocal().SetId(++feat_id);
1945 mrna->SetLocation().Assign(*main_loc);
1946
1947 CreateXRefLink(*mrna, *gene);
1948
1949 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1950 CRef<CScope> scope(new CScope(*object_manager));
1951 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1952
1953 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1954
1955 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1956 CBioseq_Handle src_bseq = *b_iter;
1957 CBioseq_Handle target_bseq = *(++b_iter);
1958
1959 BOOST_TEST_MESSAGE("When the mrna is propagated alone");
1960 CMessageListener_Basic listener;
1961 edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1962 vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ mrna });
1963 BOOST_CHECK_EQUAL(listener.Count(), 0);
1964
1965 auto prop_mrna = propagated_feats.front();
1966 BOOST_CHECK(prop_mrna->IsSetId());
1967 BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
1968 BOOST_TEST_MESSAGE("the Xref is missing");
1969 BOOST_CHECK(!prop_mrna->IsSetXref());
1970 listener.Clear();
1971 }
1972
BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs_RevOrder)1973 BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs_RevOrder)
1974 {
1975 size_t front_insert = 5;
1976 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1977 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1978
1979 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1980 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1981
1982 CRef<CSeq_loc> main_loc(new CSeq_loc());
1983 main_loc->SetInt().SetFrom(0);
1984 main_loc->SetInt().SetTo(15);
1985 main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1986
1987 CRef<CSeq_loc> subloc(new CSeq_loc());
1988 subloc->SetInt().SetFrom(3);
1989 subloc->SetInt().SetTo(5);
1990 subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1991
1992 int feat_id = 0;
1993 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1994 gene->SetData().SetGene().SetLocus("gene locus");
1995 gene->SetId().SetLocal().SetId(++feat_id);
1996 gene->SetLocation().Assign(*main_loc);
1997
1998 CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1999 mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
2000 mrna->SetId().SetLocal().SetId(++feat_id);
2001 mrna->SetLocation().Assign(*main_loc);
2002
2003 CreateXRefLink(*gene, *mrna);
2004
2005 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2006 CRef<CScope> scope(new CScope(*object_manager));
2007 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2008
2009 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2010
2011 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2012 CBioseq_Handle src_bseq = *b_iter;
2013 CBioseq_Handle target_bseq = *(++b_iter);
2014
2015 CMessageListener_Basic listener;
2016 edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
2017 vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ gene, mrna });
2018 BOOST_CHECK_EQUAL(listener.Count(), 0);
2019
2020 auto prop_gene = propagated_feats.front();
2021 BOOST_CHECK_EQUAL(listener.Count(), 0);
2022 BOOST_CHECK(prop_gene->IsSetId());
2023 BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
2024 BOOST_CHECK(prop_gene->IsSetXref());
2025
2026 auto prop_mrna = propagated_feats.back();
2027 CSeqFeatXref xref;
2028 xref.SetId(prop_mrna->SetId());
2029 BOOST_CHECK(prop_gene->HasSeqFeatXref(xref.GetId()));
2030
2031 BOOST_CHECK(prop_mrna->IsSetId());
2032 BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
2033 BOOST_CHECK(!prop_mrna->IsSetXref());
2034 listener.Clear();
2035 }
2036
BOOST_AUTO_TEST_CASE(Test_PropagateFeaturesWithXrefsWithCDS)2037 BOOST_AUTO_TEST_CASE(Test_PropagateFeaturesWithXrefsWithCDS)
2038 {
2039 size_t front_insert = 5;
2040 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
2041 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2042
2043 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2044 CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
2045
2046 CRef<CSeq_loc> main_loc(new CSeq_loc());
2047 main_loc->SetInt().SetFrom(0);
2048 main_loc->SetInt().SetTo(15);
2049 main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2050
2051 CRef<CSeq_loc> subloc(new CSeq_loc());
2052 subloc->SetInt().SetFrom(3);
2053 subloc->SetInt().SetTo(5);
2054 subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2055
2056 int feat_id = 0;
2057 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
2058 gene->SetData().SetGene().SetLocus("gene locus");
2059 gene->SetId().SetLocal().SetId(++feat_id);
2060 gene->SetLocation().Assign(*main_loc);
2061
2062 CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
2063 mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
2064 mrna->SetId().SetLocal().SetId(++feat_id);
2065 mrna->SetLocation().Assign(*main_loc);
2066
2067 CreateXRefLink(*mrna, *gene);
2068
2069 CRef<CSeq_feat> cds_withoutprot = unit_test_util::AddMiscFeature(first, 15);
2070 cds_withoutprot->SetData().SetCdregion();
2071 cds_withoutprot->SetId().SetLocal().SetId(++feat_id);
2072 cds_withoutprot->SetLocation().SetInt().SetFrom(10);
2073 cds_withoutprot->SetLocation().SetInt().SetTo(25);
2074 cds_withoutprot->SetLocation().SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2075
2076 CRef<CSeq_feat> cds_withprot = unit_test_util::MakeMiscFeature(unit_test_util::IdFromEntry(first), 15);
2077 cds_withprot->SetComment("CDS with product");
2078 cds_withprot->SetData().SetCdregion();
2079 cds_withprot->SetId().SetLocal().SetId(++feat_id);
2080 cds_withprot->SetLocation().Assign(*main_loc);
2081
2082 CreateXRefLink(*cds_withprot, *gene);
2083 CreateXRefLink(*mrna, *cds_withprot);
2084 CreateXRefLink(*cds_withprot, *mrna);
2085
2086 // constructing the protein sequence
2087 CRef<CSeq_entry> prot_entry(new CSeq_entry());
2088 prot_entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
2089 prot_entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
2090 prot_entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("-WPKL");
2091 prot_entry->SetSeq().SetInst().SetLength(5);
2092
2093 const string prot_id = "good1_1";
2094 CRef<CSeq_id> id(new CSeq_id());
2095 id->SetLocal().SetStr(prot_id);
2096 prot_entry->SetSeq().SetId().push_back(id);
2097
2098 CRef<CSeqdesc> mdesc(new CSeqdesc());
2099 mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
2100 prot_entry->SetSeq().SetDescr().Set().push_back(mdesc);
2101
2102 CRef<CSeq_feat> prot_feat(new CSeq_feat());
2103 prot_feat->SetData().SetProt().SetName().push_back("hypothetical protein");
2104 prot_feat->SetLocation().SetInt().SetId().Assign(*(prot_entry->GetSeq().GetId().front()));
2105 prot_feat->SetLocation().SetInt().SetFrom(0);
2106 prot_feat->SetLocation().SetInt().SetTo(prot_entry->GetSeq().GetInst().GetLength() - 1);
2107 prot_feat->SetId().SetLocal().SetId(++feat_id);
2108 unit_test_util::AddFeat(prot_feat, prot_entry);
2109
2110 cds_withprot->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
2111
2112 CRef<CBioseq_set> set(new CBioseq_set());
2113 set->SetClass(CBioseq_set::eClass_nuc_prot);
2114 set->SetSeq_set().push_back(first);
2115 set->SetSeq_set().push_back(prot_entry);
2116 CRef<CSeq_entry> set_entry(new CSeq_entry());
2117 set_entry->SetSet(*set);
2118
2119 unit_test_util::AddFeat(cds_withprot, set_entry);
2120
2121 auto it = entry->SetSet().SetSeq_set().begin();
2122 it = entry->SetSet().SetSeq_set().erase(it);
2123 entry->SetSet().SetSeq_set().insert(it, set_entry);
2124
2125 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2126 CRef<CScope> scope(new CScope(*object_manager));
2127 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2128
2129 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2130
2131 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2132 CBioseq_Handle src_bseq = *b_iter;
2133 CBioseq_Handle target_bseq = *(++b_iter);
2134
2135 CMessageListener_Basic listener;
2136 edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
2137 vector<CConstRef<CSeq_feat>> feat_list{ gene, mrna, cds_withoutprot, cds_withprot };
2138 vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList(feat_list);
2139 BOOST_CHECK_EQUAL(listener.Count(), 0);
2140
2141 BOOST_CHECK(propagated_feats.size() == feat_id );
2142
2143 auto feat_it = propagated_feats.begin();
2144 auto prop_gene = *feat_it;
2145 BOOST_CHECK(prop_gene->IsSetId());
2146 BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
2147 BOOST_CHECK(!prop_gene->IsSetXref());
2148
2149 ++feat_it;
2150 auto prop_mrna = *feat_it;
2151 BOOST_CHECK(prop_mrna->IsSetId());
2152 BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
2153 BOOST_CHECK(prop_mrna->IsSetXref());
2154 BOOST_CHECK(prop_mrna->GetXref().size() == 2);
2155
2156 ++feat_it;
2157 auto prop_cds_withoutprot = *feat_it;
2158 BOOST_CHECK(prop_cds_withoutprot->IsSetId());
2159 BOOST_CHECK(prop_cds_withoutprot->GetId().GetLocal().GetId() == (++feat_id));
2160 BOOST_CHECK(!prop_cds_withoutprot->IsSetXref());
2161
2162 ++feat_it;
2163 auto prop_cds = *feat_it;
2164 BOOST_CHECK(prop_cds->IsSetId());
2165 BOOST_CHECK(prop_cds->GetId().GetLocal().GetId() == (++feat_id));
2166 BOOST_CHECK(prop_cds->IsSetXref());
2167
2168 CSeqFeatXref mrna_xref1;
2169 mrna_xref1.SetId(prop_gene->SetId());
2170 BOOST_CHECK(prop_mrna->HasSeqFeatXref(mrna_xref1.GetId()));
2171 mrna_xref1.SetId(prop_cds->SetId());
2172 BOOST_CHECK(prop_mrna->HasSeqFeatXref(mrna_xref1.GetId()));
2173
2174 CSeqFeatXref cds_xref;
2175 cds_xref.SetId(prop_gene->SetId());
2176 BOOST_CHECK(prop_cds->HasSeqFeatXref(cds_xref.GetId()));
2177 cds_xref.SetId(prop_mrna->SetId());
2178 BOOST_CHECK(prop_cds->HasSeqFeatXref(cds_xref.GetId()));
2179
2180 ++feat_it;
2181 auto prop_protein = *feat_it;
2182 BOOST_CHECK(prop_protein->IsSetId());
2183 BOOST_CHECK(prop_protein->GetId().GetLocal().GetId() == (++feat_id));
2184 listener.Clear();
2185 }
2186
2187
BuildAlignmentWithInternalGap()2188 CRef<CSeq_entry> BuildAlignmentWithInternalGap()
2189 {
2190 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSet();
2191
2192 CRef<objects::CSeq_entry> seq4 = unit_test_util::BuildGoodSeq();
2193 unit_test_util::ChangeId(seq4, "4");
2194 entry->SetSet().SetSeq_set().push_back(seq4);
2195
2196 CRef<objects::CSeq_align> align(new CSeq_align());
2197 align->SetType(objects::CSeq_align::eType_global);
2198 align->SetDim(entry->GetSet().GetSeq_set().size());
2199
2200 // assign IDs
2201 for (auto& s : entry->SetSet().SetSeq_set()) {
2202 CRef<CSeq_id> id(new CSeq_id());
2203 id->Assign(*(s->GetSeq().GetId().front()));
2204 align->SetSegs().SetDenseg().SetIds().push_back(id);
2205 }
2206
2207 auto s = entry->SetSet().SetSeq_set().begin();
2208 auto first_seq = (*s)->GetSeq().GetInst().GetSeq_data().GetIupacna().Get(); // original
2209 s++;
2210 // second sequence: remove beginning
2211 (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(20, 40));
2212 (*s)->SetSeq().SetInst().SetLength(40);
2213 s++;
2214 // third sequence: remove part of the middle
2215 (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(0, 20) + first_seq.substr(40, 20));
2216 (*s)->SetSeq().SetInst().SetLength(40);
2217 s++;
2218 // fourth sequence: remove end
2219 (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(0, 40));
2220 (*s)->SetSeq().SetInst().SetLength(40);
2221
2222 // now make first sequence longer than alignment
2223 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2224 first->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AAAAATTTTTGGGGGCCCCC" + first_seq + "AAAAATTTTTGGGGGCCCCC");
2225 first->SetSeq().SetInst().SetLength(100);
2226
2227
2228 auto& denseg = align->SetSegs().SetDenseg();
2229 denseg.SetNumseg(3);
2230 denseg.SetLens().push_back(20);
2231 denseg.SetLens().push_back(20);
2232 denseg.SetLens().push_back(20);
2233 denseg.SetDim(entry->GetSet().GetSeq_set().size());
2234 // first segment - second sequence missing
2235 denseg.SetStarts().push_back(20);
2236 denseg.SetStarts().push_back(-1);
2237 denseg.SetStarts().push_back(0);
2238 denseg.SetStarts().push_back(0);
2239 // second segment - third sequence is gap
2240 denseg.SetStarts().push_back(40);
2241 denseg.SetStarts().push_back(0);
2242 denseg.SetStarts().push_back(-1);
2243 denseg.SetStarts().push_back(20);
2244 // third segment - fourth sequence is gap
2245 denseg.SetStarts().push_back(60);
2246 denseg.SetStarts().push_back(20);
2247 denseg.SetStarts().push_back(20);
2248 denseg.SetStarts().push_back(-1);
2249
2250 CRef<CSeq_annot> annot(new CSeq_annot());
2251 annot->SetData().SetAlign().push_back(align);
2252 entry->SetSet().SetAnnot().push_back(annot);
2253 return entry;
2254 }
2255
2256
BOOST_AUTO_TEST_CASE(Test_DoNotPropagateToGap_RW_887)2257 BOOST_AUTO_TEST_CASE(Test_DoNotPropagateToGap_RW_887)
2258 {
2259 CRef<CSeq_entry> entry = BuildAlignmentWithInternalGap();
2260 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2261 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2262
2263 // before alignment
2264 CRef<CSeq_feat> gene1 = unit_test_util::AddMiscFeature(first);
2265 gene1->SetData().SetGene().SetLocus("gene locus");
2266 gene1->SetLocation().SetInt().SetFrom(0);
2267 gene1->SetLocation().SetInt().SetTo(19);
2268
2269 // first gap
2270 CRef<CSeq_feat> gene2 = unit_test_util::AddMiscFeature(first);
2271 gene2->SetData().SetGene().SetLocus("gene locus");
2272 gene2->SetLocation().SetInt().SetFrom(20);
2273 gene2->SetLocation().SetInt().SetTo(39);
2274
2275 // second gap
2276 CRef<CSeq_feat> gene3 = unit_test_util::AddMiscFeature(first);
2277 gene3->SetData().SetGene().SetLocus("gene locus");
2278 gene3->SetLocation().SetInt().SetFrom(40);
2279 gene3->SetLocation().SetInt().SetTo(59);
2280
2281 // third gap
2282 CRef<CSeq_feat> gene4 = unit_test_util::AddMiscFeature(first);
2283 gene4->SetData().SetGene().SetLocus("gene locus");
2284 gene4->SetLocation().SetInt().SetFrom(60);
2285 gene4->SetLocation().SetInt().SetTo(79);
2286
2287 // after alignment
2288 CRef<CSeq_feat> gene5 = unit_test_util::AddMiscFeature(first);
2289 gene5->SetData().SetGene().SetLocus("gene locus");
2290 gene5->SetLocation().SetInt().SetFrom(80);
2291 gene5->SetLocation().SetInt().SetTo(99);
2292
2293 vector<CConstRef<CSeq_feat>> feat_list{ gene1, gene2, gene3, gene4, gene5 };
2294
2295 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2296 CRef<CScope> scope(new CScope(*object_manager));
2297 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2298
2299 CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2300
2301
2302 CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2303 CBioseq_Handle src_bseq = *b_iter;
2304
2305 ++b_iter;
2306
2307 CMessageListener_Basic listener;
2308 edit::CFeaturePropagator propagator_to_2(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2309 vector<CRef<CSeq_feat>> propagated_feats = propagator_to_2.PropagateFeatureList(feat_list);
2310 BOOST_CHECK_EQUAL(listener.Count(), 3);
2311 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good2");
2312 BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:21-40 to lcl|good2");
2313 BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good2");
2314 listener.Clear();
2315
2316 ++b_iter;
2317 edit::CFeaturePropagator propagator_to_3(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2318 propagated_feats = propagator_to_3.PropagateFeatureList(feat_list);
2319 BOOST_CHECK_EQUAL(listener.Count(), 3);
2320 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good3");
2321 BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:41-60 to lcl|good3");
2322 BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good3");
2323 listener.Clear();
2324
2325 ++b_iter;
2326 edit::CFeaturePropagator propagator_to_4(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2327 propagated_feats = propagator_to_4.PropagateFeatureList(feat_list);
2328 BOOST_CHECK_EQUAL(listener.Count(), 3);
2329 BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good4");
2330 BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:61-80 to lcl|good4");
2331 BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good4");
2332 listener.Clear();
2333
2334 }
2335
2336
2337 #if 0
2338 // checked in by mistake
2339 BOOST_AUTO_TEST_CASE(Test_MergeIntervals)
2340 {
2341 size_t front_insert = 5;
2342 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
2343 CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2344
2345 CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2346
2347 CRef<CSeq_feat> misc = unit_test_util::AddMiscFeature(first);
2348 CRef<CSeq_loc> l1(new CSeq_loc());
2349 l1->Assign(misc->GetLocation());
2350 CRef<CSeq_loc> l2(new CSeq_loc());
2351 l2->Assign(misc->GetLocation());
2352 l2->SetInt().SetFrom(l1->GetStop(eExtreme_Biological) + 1);
2353 l2->SetInt().SetTo(l2->GetInt().GetFrom() + 15);
2354 misc->SetLocation().SetMix().Set().push_back(l1);
2355 misc->SetLocation().SetMix().Set().push_back(l2);
2356
2357 vector<CRef<CSeq_loc> > expected_loc;
2358
2359 CRef<CSeq_loc> loc1(new CSeq_loc());
2360 loc1->SetInt().SetFrom(front_insert);
2361 loc1->SetInt().SetTo(30 + front_insert);
2362 loc1->SetInt().SetId().SetLocal().SetStr("good2");
2363 loc1->SetPartialStart(false, eExtreme_Biological);
2364 loc1->SetPartialStop(false, eExtreme_Biological);
2365 expected_loc.push_back(loc1);
2366
2367 CRef<CSeq_loc> loc2(new CSeq_loc());
2368 loc2->SetInt().SetFrom(front_insert * 2);
2369 loc2->SetInt().SetTo(30 + front_insert * 2);
2370 loc2->SetInt().SetId().SetLocal().SetStr("good3");
2371 loc2->SetPartialStart(false, eExtreme_Biological);
2372 loc2->SetPartialStop(false, eExtreme_Biological);
2373 expected_loc.push_back(loc2);
2374
2375 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2376
2377 CRef<CScope> scope(new CScope(*object_manager));
2378 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
2379 CMessageListener_Basic listener;
2380
2381 CBioseq_CI b(seh);
2382 CBioseq_Handle src = *b;
2383 ++b;
2384 size_t offset = 0;
2385 while (b) {
2386 edit::CFeaturePropagator propagator(src, *b, *align, false, false, true, true, &listener);
2387
2388 CRef<CSeq_feat> new_feat = propagator.Propagate(*misc);
2389 CheckPropagatedLocation(*(expected_loc[offset]), new_feat->GetLocation());
2390 BOOST_CHECK_EQUAL(listener.Count(), 0);
2391 listener.Clear();
2392 offset++;
2393 ++b;
2394 }
2395
2396 }
2397 #endif
2398
2399
2400 END_SCOPE(objects)
2401 END_NCBI_SCOPE
2402
2403