1 /*  $Id: unit_test_feature_propagate.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Colleen Bollin, NCBI
27 *
28 * File Description:
29 *   Unit tests for feature propagation.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <corelib/ncbi_system.hpp>
37 
38 // This macro should be defined before inclusion of test_boost.hpp in all
39 // "*.cpp" files inside executable except one. It is like function main() for
40 // non-Boost.Test executables is defined only in one *.cpp file - other files
41 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
42 // then test_boost.hpp will define such "main()" function for tests.
43 //
44 // Usually if your unit tests contain only one *.cpp file you should not
45 // care about this macro at all.
46 //
47 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
48 
49 
50 // This header must be included before all Boost.Test headers if there are any
51 #include <corelib/test_boost.hpp>
52 
53 #include <objects/misc/sequence_macros.hpp>
54 #include <objects/seqset/Seq_entry.hpp>
55 #include <objects/seq/Seq_ext.hpp>
56 #include <objects/seq/Seq_literal.hpp>
57 #include <objects/seq/Seq_data.hpp>
58 #include <objects/seqalign/Seq_align.hpp>
59 #include <objects/seqalign/Dense_seg.hpp>
60 #include <objects/seqfeat/Imp_feat.hpp>
61 #include <objects/seqfeat/Cdregion.hpp>
62 #include <objects/seqfeat/Code_break.hpp>
63 #include <objects/seqloc/Seq_id.hpp>
64 #include <objects/seqloc/Seq_loc.hpp>
65 #include <objects/seqloc/Seq_interval.hpp>
66 #include <objects/seq/seqport_util.hpp>
67 #include <objmgr/object_manager.hpp>
68 #include <objmgr/scope.hpp>
69 #include <objmgr/bioseq_ci.hpp>
70 #include <objmgr/feat_ci.hpp>
71 #include <objmgr/seq_vector.hpp>
72 #include <objmgr/util/sequence.hpp>
73 #include <objmgr/util/seq_loc_util.hpp>
74 #include <objmgr/align_ci.hpp>
75 
76 #include <objects/seqalign/Dense_seg.hpp>
77 
78 #include <objtools/unit_test_util/unit_test_util.hpp>
79 
80 #include <objtools/edit/feature_propagate.hpp>
81 
82 #include <corelib/ncbiapp.hpp>
83 
84 #include <common/test_assert.h>  /* This header must go last */
85 
86 
87 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)88 BEGIN_SCOPE(objects)
89 
90 
91 
92 
93 
94 NCBITEST_INIT_TREE()
95 {
96     if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
97     }
98 }
99 
100 static bool s_debugMode = false;
101 
NCBITEST_INIT_CMDLINE(arg_desc)102 NCBITEST_INIT_CMDLINE(arg_desc)
103 {
104     // Here we make descriptions of command line parameters that we are
105     // going to use.
106 
107     arg_desc->AddFlag( "debug_mode",
108         "Debugging mode writes errors seen for each test" );
109 }
110 
NCBITEST_AUTO_INIT()111 NCBITEST_AUTO_INIT()
112 {
113     // initialization function body
114 
115     const CArgs& args = CNcbiApplication::Instance()->GetArgs();
116     if (args["debug_mode"]) {
117         s_debugMode = true;
118     }
119 }
120 
121 
CheckPropagatedLocation(const CSeq_loc & expected,const CSeq_loc & propagated)122 void CheckPropagatedLocation(const CSeq_loc& expected, const CSeq_loc& propagated)
123 {
124     BOOST_CHECK(expected.Equals(propagated));
125 }
126 
127 /*
128 good1: 60
129 good2: 65
130 good3: 70
131   annot {
132     {
133       data align {
134         {
135           type global,
136           dim 3,
137           segs denseg {
138             dim 3,
139             numseg 1,
140             ids {
141               local str "good1",
142               local str "good2",
143               local str "good3"
144             },
145             starts {
146               0,
147               5,
148               10
149             },
150             lens {
151               60
152             }
153           }
154         }
155       }
156     }
157   }
158 }
159 */
160 
CreateBioseqsAndAlign(size_t front_insert)161 tuple<CRef<CSeq_entry>, CRef<CSeq_align>, CRef<CSeq_entry>, CRef<CSeq_entry>, CRef<CSeq_entry> >  CreateBioseqsAndAlign(size_t front_insert)
162 {
163     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
164     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
165     auto it = entry->SetSet().SetSeq_set().begin();
166     CRef<CSeq_entry> seq1 = *it;
167     ++it;
168     CRef<CSeq_entry> seq2 = *it;
169     ++it;
170     CRef<CSeq_entry> seq3 = *it;
171     return make_tuple(entry, align, seq1, seq2, seq3);
172 }
173 
AddBioseqsToScope(CRef<CSeq_entry> entry)174 tuple<CBioseq_Handle, CBioseq_Handle, CBioseq_Handle, CRef<CScope> > AddBioseqsToScope(CRef<CSeq_entry> entry)
175 {
176     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
177 
178     CRef<CScope> scope(new CScope(*object_manager));
179     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
180     CBioseq_CI bi(seh);
181     CBioseq_Handle bsh1 = *bi;
182     ++bi;
183     CBioseq_Handle bsh2 = *bi;
184     ++bi;
185     CBioseq_Handle bsh3 = *bi;
186 
187     return make_tuple(bsh1,bsh2,bsh3, scope);
188 }
189 
CreateLoc(TSeqPos from,TSeqPos to,const CSeq_id & id,bool loc_partial5,bool loc_partial3,bool is_minus_strand=false)190 CRef<CSeq_loc> CreateLoc(TSeqPos from, TSeqPos to, const CSeq_id &id, bool loc_partial5, bool loc_partial3, bool is_minus_strand = false)
191 {
192     CRef<CSeq_loc> loc(new CSeq_loc());
193     loc->SetInt().SetFrom(from);
194     loc->SetInt().SetTo(to);
195     loc->SetInt().SetId().Assign(id);
196     if (is_minus_strand) {
197         loc->SetInt().SetStrand(eNa_strand_minus);
198     }
199     loc->SetPartialStart(loc_partial5, eExtreme_Biological);
200     loc->SetPartialStop(loc_partial3, eExtreme_Biological);
201     return loc;
202 }
203 
CreateTwoIntLoc(TSeqPos from1,TSeqPos to1,TSeqPos from2,TSeqPos to2,ENa_strand strand,const CSeq_id & id,bool loc_partial5,bool loc_partial3)204 CRef<CSeq_loc> CreateTwoIntLoc(TSeqPos from1, TSeqPos to1, TSeqPos from2, TSeqPos to2, ENa_strand strand, const CSeq_id &id, bool loc_partial5, bool loc_partial3)
205 {
206     CRef<CSeq_loc> loc1(new CSeq_loc());
207     loc1->SetInt().SetFrom(from1);
208     loc1->SetInt().SetTo(to1);
209     loc1->SetInt().SetId().Assign(id);
210     loc1->SetInt().SetStrand(strand);
211 
212     CRef<CSeq_loc> loc2(new CSeq_loc());
213     loc2->SetInt().SetFrom(from2);
214     loc2->SetInt().SetTo(to2);
215     loc2->SetInt().SetId().Assign(id);
216     loc2->SetInt().SetStrand(strand);
217 
218     CRef<CSeq_loc> loc(new CSeq_loc());
219     loc->SetMix().AddSeqLoc(*loc1);
220     loc->SetMix().AddSeqLoc(*loc2);
221     loc->SetPartialStart(loc_partial5, eExtreme_Biological);
222     loc->SetPartialStop(loc_partial3, eExtreme_Biological);
223     return loc;
224 }
225 
CreateOrderedLoc(TSeqPos from1,TSeqPos to1,TSeqPos from2,TSeqPos to2,ENa_strand strand,const CSeq_id & id,bool loc_partial5,bool loc_partial3)226 CRef<CSeq_loc> CreateOrderedLoc(TSeqPos from1, TSeqPos to1, TSeqPos from2, TSeqPos to2, ENa_strand strand, const CSeq_id &id, bool loc_partial5, bool loc_partial3)
227 {
228     CRef<CSeq_loc> loc1(new CSeq_loc());
229     loc1->SetInt().SetFrom(from1);
230     loc1->SetInt().SetTo(to1);
231     loc1->SetInt().SetId().Assign(id);
232     loc1->SetInt().SetStrand(strand);
233 
234     CRef<CSeq_loc> loc2(new CSeq_loc());
235     loc2->SetNull();
236 
237     CRef<CSeq_loc> loc3(new CSeq_loc());
238     loc3->SetInt().SetFrom(from2);
239     loc3->SetInt().SetTo(to2);
240     loc3->SetInt().SetId().Assign(id);
241     loc3->SetInt().SetStrand(strand);
242 
243     CRef<CSeq_loc> loc(new CSeq_loc());
244     loc->SetMix().AddSeqLoc(*loc1);
245     loc->SetMix().AddSeqLoc(*loc2);
246     loc->SetMix().AddSeqLoc(*loc3);
247     loc->SetPartialStart(loc_partial5, eExtreme_Biological);
248     loc->SetPartialStop(loc_partial3, eExtreme_Biological);
249     return loc;
250 }
251 
CreatePointLoc(TSeqPos pos,const CSeq_id & id)252 CRef<CSeq_loc> CreatePointLoc(TSeqPos pos, const CSeq_id &id)
253 {
254     CRef<CSeq_loc> loc(new CSeq_loc());
255     loc->SetPnt().SetPoint(pos);
256     loc->SetPnt().SetId().Assign(id);
257     return loc;
258 }
259 
CreateCds(CRef<CSeq_loc> main_loc,CRef<CSeq_entry> seq)260 CRef<CSeq_feat> CreateCds(CRef<CSeq_loc> main_loc, CRef<CSeq_entry> seq)
261 {
262     CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(seq, 15);
263     cds->SetData().SetCdregion();
264     cds->SetLocation().Assign(*main_loc);
265     return cds;
266 }
267 
AddCodeBreak(CRef<CSeq_feat> cds,CRef<CSeq_loc> subloc)268 void AddCodeBreak(CRef<CSeq_feat> cds, CRef<CSeq_loc> subloc)
269 {
270     CRef<CCode_break> cbr(new CCode_break());
271     cbr->SetLoc().Assign(*subloc);
272     cds->SetData().SetCdregion().SetCode_break().push_back(cbr);
273 }
274 
CreateTrna(CRef<CSeq_loc> main_loc,CRef<CSeq_entry> seq)275 CRef<CSeq_feat> CreateTrna(CRef<CSeq_loc> main_loc, CRef<CSeq_entry> seq)
276 {
277     CRef<CSeq_feat> trna = unit_test_util::AddMiscFeature(seq, 15);
278     trna->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
279     trna->SetLocation().Assign(*main_loc);
280     return trna;
281 }
282 
AddAnticodon(CRef<CSeq_feat> trna,CRef<CSeq_loc> subloc)283 void AddAnticodon(CRef<CSeq_feat> trna, CRef<CSeq_loc> subloc)
284 {
285     trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*subloc);
286 }
287 
288 
289 // propagate cds without code-break from seq 1 to seq 2 and 3
TestCds(bool loc_partial5,bool loc_partial3)290 void TestCds(bool loc_partial5, bool loc_partial3)
291 {
292     size_t front_insert = 5;
293     CRef<CSeq_align> align;
294     CRef<CSeq_entry> entry, seq1, seq2, seq3;
295     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
296 
297     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
298     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
299     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
300 
301     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, loc_partial5, loc_partial3);
302     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
303 
304     CBioseq_Handle bsh1, bsh2, bsh3;
305     CRef<CScope> scope;
306     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
307 
308     CMessageListener_Basic listener;
309 
310     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
311     CRef<CSeq_loc> expected_loc1 = CreateLoc(front_insert, 15+front_insert, id2, loc_partial5, loc_partial3);
312     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
313     BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
314     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
315     BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
316     BOOST_CHECK_EQUAL(listener.Count(), 0);
317 
318     listener.Clear();
319 
320     edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
321     CRef<CSeq_loc> expected_loc2 = CreateLoc(front_insert*2, 15+front_insert*2, id3, loc_partial5, loc_partial3);
322     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
323     BOOST_CHECK_EQUAL(new_feat2->GetData().GetSubtype(), cds->GetData().GetSubtype());
324     BOOST_CHECK(expected_loc2->Equals(new_feat2->GetLocation()));
325     BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
326     BOOST_CHECK_EQUAL(listener.Count(), 0);
327 
328     listener.Clear();
329 }
330 
331 
332 
333 // propagate cds with code-break from seq 1 to seq 2 and 3
TestCdsWithCodeBreak(bool subloc_partial5,bool subloc_partial3)334 void TestCdsWithCodeBreak(bool subloc_partial5, bool subloc_partial3)
335 {
336     size_t front_insert = 5;
337     CRef<CSeq_align> align;
338     CRef<CSeq_entry> entry, seq1, seq2, seq3;
339     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
340 
341     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
342     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
343     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
344 
345     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, false, false);
346     CRef<CSeq_loc> subloc = CreateLoc(3, 5, id1, subloc_partial5, subloc_partial3);
347     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
348     AddCodeBreak(cds, subloc);
349 
350     CBioseq_Handle bsh1, bsh2, bsh3;
351     CRef<CScope> scope;
352     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
353 
354     CMessageListener_Basic listener;
355 
356     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
357     CRef<CSeq_loc> expected_subloc1 = CreateLoc(3+front_insert, 5+front_insert, id2, subloc_partial5, subloc_partial3);
358     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
359     BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), true);
360     BOOST_CHECK(expected_subloc1->Equals(new_feat1->GetData().GetCdregion().GetCode_break().front()->GetLoc()));
361     BOOST_CHECK_EQUAL(listener.Count(), 0);
362 
363     listener.Clear();
364 
365     edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
366     CRef<CSeq_loc> expected_subloc2 = CreateLoc(3+front_insert*2, 5+front_insert*2, id3, subloc_partial5, subloc_partial3);
367     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
368     BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), true);
369     BOOST_CHECK(expected_subloc2->Equals(new_feat2->GetData().GetCdregion().GetCode_break().front()->GetLoc()));
370     BOOST_CHECK_EQUAL(listener.Count(), 0);
371 
372     listener.Clear();
373 }
374 
375 // propagate cds without code-break from seq 3 to seq 1 and 2
TestCdsFromLastBioseq(bool loc_partial5,bool loc_partial3)376 void TestCdsFromLastBioseq(bool loc_partial5, bool loc_partial3)
377 {
378     size_t front_insert = 5;
379     CRef<CSeq_align> align;
380     CRef<CSeq_entry> entry, seq1, seq2, seq3;
381     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
382 
383     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
384     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
385     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
386 
387     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, loc_partial5, loc_partial3);
388     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
389 
390     CBioseq_Handle bsh1, bsh2, bsh3;
391     CRef<CScope> scope;
392     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
393 
394     CMessageListener_Basic listener;
395 
396     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
397     CRef<CSeq_loc> expected_loc1 = CreateLoc(0, 5, id1, true, loc_partial3);
398     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
399     BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
400     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
401     BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
402     BOOST_CHECK_EQUAL(listener.Count(), 0);
403 
404     listener.Clear();
405 
406     edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
407     CRef<CSeq_loc> expected_loc2 = CreateLoc(5, 10, id2, true, loc_partial3);
408     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
409     BOOST_CHECK_EQUAL(new_feat2->GetData().GetSubtype(), cds->GetData().GetSubtype());
410     BOOST_CHECK(expected_loc2->Equals(new_feat2->GetLocation()));
411     BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
412     BOOST_CHECK_EQUAL(listener.Count(), 0);
413 
414     listener.Clear();
415 }
416 
417 // propagate cds with code-break from seq 3 to seq 1 and 2
TestCdsFromLastBioseqWithCodeBreak()418 void TestCdsFromLastBioseqWithCodeBreak()
419 {
420     size_t front_insert = 5;
421     CRef<CSeq_align> align;
422     CRef<CSeq_entry> entry, seq1, seq2, seq3;
423     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
424 
425     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
426     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
427     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
428 
429     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, false, false);
430     CRef<CSeq_loc> subloc = CreateLoc(3, 5, id3, false, false);
431     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
432     AddCodeBreak(cds, subloc);
433 
434     CBioseq_Handle bsh1, bsh2, bsh3;
435     CRef<CScope> scope;
436     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
437 
438     CMessageListener_Basic listener;
439 
440     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
441     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
442     BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
443     BOOST_CHECK_EQUAL(listener.Count(), 1);
444     BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of translation exception"), true);
445     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_CodeBreakLocation);
446 
447     listener.Clear();
448 
449     edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
450     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*cds);
451     BOOST_CHECK_EQUAL(new_feat2->GetData().GetCdregion().IsSetCode_break(), false);
452     BOOST_CHECK_EQUAL(listener.Count(), 1);
453     BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of translation exception"), true);
454     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_CodeBreakLocation);
455 
456     listener.Clear();
457 }
458 
459 // propagate trna with anticodon from seq 1 to seq 2 and 3
TestTrnaAnticodon(bool subloc_partial5,bool subloc_partial3)460 void TestTrnaAnticodon(bool subloc_partial5, bool subloc_partial3)
461 {
462     size_t front_insert = 5;
463     CRef<CSeq_align> align;
464     CRef<CSeq_entry> entry, seq1, seq2, seq3;
465     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
466 
467     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
468     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
469     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
470 
471     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id1, false, false);
472     CRef<CSeq_loc> subloc = CreateLoc(3, 5, id1, subloc_partial5, subloc_partial3);
473     CRef<CSeq_feat> trna = CreateTrna(main_loc, seq1);
474     AddAnticodon(trna, subloc);
475 
476     CBioseq_Handle bsh1, bsh2, bsh3;
477     CRef<CScope> scope;
478     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
479 
480     CMessageListener_Basic listener;
481 
482     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
483     CRef<CSeq_loc> expected_subloc1 = CreateLoc(3+front_insert, 5+front_insert, id2, subloc_partial5, subloc_partial3);
484     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*trna);
485     BOOST_CHECK_EQUAL(new_feat1->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), true);
486     BOOST_CHECK(expected_subloc1->Equals(new_feat1->GetData().GetRna().GetExt().GetTRNA().GetAnticodon()));
487     BOOST_CHECK_EQUAL(listener.Count(), 0);
488 
489     listener.Clear();
490 
491     edit::CFeaturePropagator propagator2(bsh1, bsh3, *align, false, false, true, true, &listener);
492     CRef<CSeq_loc> expected_subloc2 = CreateLoc(3+front_insert*2, 5+front_insert*2, id3, subloc_partial5, subloc_partial3);
493     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*trna);
494     BOOST_CHECK_EQUAL(new_feat2->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), true);
495     BOOST_CHECK(expected_subloc2->Equals(new_feat2->GetData().GetRna().GetExt().GetTRNA().GetAnticodon()));
496     BOOST_CHECK_EQUAL(listener.Count(), 0);
497 
498     listener.Clear();
499 }
500 
501 // propagate trna with anticodon from seq 3 to seq 1 and 2
TestTrnaAnticodonFromLastBioseq()502 void TestTrnaAnticodonFromLastBioseq()
503 {
504     size_t front_insert = 5;
505     CRef<CSeq_align> align;
506     CRef<CSeq_entry> entry, seq1, seq2, seq3;
507     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
508 
509     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
510     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
511     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
512 
513     CRef<CSeq_loc> main_loc = CreateLoc(0, 15, id3, false, false);
514     CRef<CSeq_loc> subloc = CreateLoc(3, 5, id3, false, false);
515     CRef<CSeq_feat> trna = CreateTrna(main_loc, seq1);
516     AddAnticodon(trna, subloc);
517 
518     CBioseq_Handle bsh1, bsh2, bsh3;
519     CRef<CScope> scope;
520     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
521 
522     CMessageListener_Basic listener;
523 
524     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
525     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*trna);
526     BOOST_CHECK_EQUAL(new_feat1->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), false);
527     BOOST_CHECK_EQUAL(listener.Count(), 1);
528     BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of anticodon"), true);
529     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_AnticodonLocation);
530 
531     listener.Clear();
532 
533     edit::CFeaturePropagator propagator2(bsh3, bsh2, *align, false, false, true, true, &listener);
534     CRef<CSeq_feat> new_feat2 = propagator2.Propagate(*trna);
535     BOOST_CHECK_EQUAL(new_feat2->GetData().GetRna().GetExt().GetTRNA().IsSetAnticodon(), false);
536     BOOST_CHECK_EQUAL(listener.Count(), 1);
537     BOOST_CHECK_EQUAL(NStr::StartsWith(listener.GetMessage(0).GetText(), "Unable to propagate location of anticodon"), true);
538     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetCode(), edit::CFeaturePropagator::eFeaturePropagationProblem_AnticodonLocation);
539 
540     listener.Clear();
541 }
542 
543 // propagate cds outside of the alignment from seq 3 to seq 1
TestCdsFromLastBioseqOutsideAlign()544 void TestCdsFromLastBioseqOutsideAlign()
545 {
546     size_t front_insert = 5;
547     CRef<CSeq_align> align;
548     CRef<CSeq_entry> entry, seq1, seq2, seq3;
549     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
550 
551     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
552     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
553     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
554 
555     CRef<CSeq_loc> main_loc = CreateLoc(0, 5, id3, false, false);
556     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
557 
558     CBioseq_Handle bsh1, bsh2, bsh3;
559     CRef<CScope> scope;
560     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
561 
562     CMessageListener_Basic listener;
563 
564     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
565     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
566     BOOST_CHECK(new_feat1.IsNull());
567 
568     listener.Clear();
569 }
570 
571 // propagate 2 exon cds with 1 exon outside of the alignment from seq 3 to seq 1
TestTwoIntCdsFromLastBioseqOutsideAlign()572 void TestTwoIntCdsFromLastBioseqOutsideAlign()
573 {
574     size_t front_insert = 5;
575     CRef<CSeq_align> align;
576     CRef<CSeq_entry> entry, seq1, seq2, seq3;
577     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
578 
579     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
580     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
581     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
582 
583     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(0, 5, 20, 30, eNa_strand_plus, id3, false, false);
584     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
585 
586     CBioseq_Handle bsh1, bsh2, bsh3;
587     CRef<CScope> scope;
588     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
589 
590     CMessageListener_Basic listener;
591 
592     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
593     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
594     CRef<CSeq_loc> expected_loc1 = CreateLoc(20-front_insert*2, 30-front_insert*2, id1, true, false);
595     expected_loc1->SetInt().SetStrand(eNa_strand_plus);
596     BOOST_CHECK_EQUAL(new_feat1->GetData().GetSubtype(), cds->GetData().GetSubtype());
597     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
598     BOOST_CHECK_EQUAL(new_feat1->GetData().GetCdregion().IsSetCode_break(), false);
599     BOOST_CHECK_EQUAL(listener.Count(), 0);
600 
601     listener.Clear();
602 }
603 
604 // propagate 2 exon cds on minus strand from seq 3 to seq 1
TestTwoIntCdsOnMinusStrand()605 void TestTwoIntCdsOnMinusStrand()
606 {
607     size_t front_insert = 5;
608     CRef<CSeq_align> align;
609     CRef<CSeq_entry> entry, seq1, seq2, seq3;
610     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
611 
612     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
613     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
614     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
615 
616     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(20, 30, 5, 15, eNa_strand_minus, id3, true, true);
617     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
618     CBioseq_Handle bsh1, bsh2, bsh3;
619     CRef<CScope> scope;
620     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
621 //    cout << "Bad order: " << sequence::BadSeqLocSortOrder(bsh3, *main_loc) << endl;
622     CMessageListener_Basic listener;
623 
624     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
625     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
626     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 20, 0, 5, eNa_strand_minus, id1, true, true);
627     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
628     BOOST_CHECK_EQUAL(listener.Count(), 0);
629 
630     listener.Clear();
631 }
632 
633 // test partial when the stop is cut off
TestPartialWhenCutStop(bool partial3)634 void TestPartialWhenCutStop(bool partial3)
635 {
636     size_t front_insert = 5;
637     CRef<CSeq_align> align;
638     CRef<CSeq_entry> entry, seq1, seq2, seq3;
639     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
640 
641     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
642     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
643     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
644 
645     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 40, eNa_strand_plus, id1, false, partial3);
646     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
647     CBioseq_Handle bsh1, bsh2, bsh3;
648     CRef<CScope> scope;
649     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
650 
651     CDense_seg& denseg = align->SetSegs().SetDenseg();
652     denseg.SetNumseg(1);
653     denseg.ResetLens();
654     denseg.SetLens().push_back(30);
655     denseg.ResetStarts();
656     denseg.SetStarts().push_back(0);
657     denseg.SetStarts().push_back(0);
658     denseg.SetStarts().push_back(0);
659 
660     CMessageListener_Basic listener;
661 
662     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
663     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
664     /*CSeq_loc_Mapper_Options mapper_options(CSeq_loc_Mapper::fTrimMappedLocation);
665     CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper(*bsh1.GetSeqId(), *bsh2.GetSeqId(), *align, &bsh2.GetScope(), mapper_options));
666     mapper->SetMergeAll();
667     mapper->SetGapRemove();
668     mapper->SetFuzzOption(CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr);
669     CRef<CSeq_loc> new_loc = mapper->Map(cds->GetLocation());
670     new_loc->ChangeToMix();
671     cout << MSerial_AsnText << cds->GetLocation();
672     cout << MSerial_AsnText << *new_loc;
673     */
674     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 15, 20, 29, eNa_strand_plus, id2, false, true);
675     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
676 //    BOOST_CHECK(expected_loc1->Equals(*new_loc));
677     BOOST_CHECK_EQUAL(listener.Count(), 0);
678     listener.Clear();
679 }
680 
681 // test partial when the last interval is cut off
TestPartialWhenCutLastInterval(bool partial3)682 void TestPartialWhenCutLastInterval(bool partial3)
683 {
684     size_t front_insert = 5;
685     CRef<CSeq_align> align;
686     CRef<CSeq_entry> entry, seq1, seq2, seq3;
687     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
688 
689     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
690     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
691     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
692 
693     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 40, 50, eNa_strand_plus, id1, false, partial3);
694     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
695     CBioseq_Handle bsh1, bsh2, bsh3;
696     CRef<CScope> scope;
697     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
698 
699     CDense_seg& denseg = align->SetSegs().SetDenseg();
700     denseg.SetNumseg(1);
701     denseg.ResetLens();
702     denseg.SetLens().push_back(30);
703     denseg.ResetStarts();
704     denseg.SetStarts().push_back(0);
705     denseg.SetStarts().push_back(0);
706     denseg.SetStarts().push_back(0);
707 
708     CMessageListener_Basic listener;
709 
710     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
711     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
712     /*CSeq_loc_Mapper_Options mapper_options(CSeq_loc_Mapper::fTrimMappedLocation);
713     CRef<CSeq_loc_Mapper> mapper(new CSeq_loc_Mapper(*bsh1.GetSeqId(), *bsh2.GetSeqId(), *align, &bsh2.GetScope(), mapper_options));
714     mapper->SetMergeAll();
715     mapper->SetGapRemove();
716     mapper->SetFuzzOption(CSeq_loc_Mapper::fFuzzOption_RemoveLimTlOrTr);
717     CRef<CSeq_loc> new_loc = mapper->Map(cds->GetLocation());
718     cout << MSerial_AsnText << cds->GetLocation();
719     cout << MSerial_AsnText << *new_loc;
720     */
721     CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, true);
722     expected_loc1->SetInt().SetStrand(eNa_strand_plus);
723     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
724 // BOOST_CHECK(expected_loc1->Equals(*new_loc));
725     BOOST_CHECK_EQUAL(listener.Count(), 0);
726     listener.Clear();
727 }
728 
729 // test partial when the start is cut off
TestPartialWhenCutStart(bool partial5)730 void TestPartialWhenCutStart(bool partial5)
731 {
732     size_t front_insert = 5;
733     CRef<CSeq_align> align;
734     CRef<CSeq_entry> entry, seq1, seq2, seq3;
735     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
736 
737     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
738     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
739     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
740 
741     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 25, eNa_strand_plus, id1, partial5, false);
742     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
743     CBioseq_Handle bsh1, bsh2, bsh3;
744     CRef<CScope> scope;
745     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
746 
747     CDense_seg& denseg = align->SetSegs().SetDenseg();
748     denseg.SetNumseg(1);
749     denseg.ResetLens();
750     denseg.SetLens().push_back(30);
751     denseg.ResetStarts();
752     denseg.SetStarts().push_back(10);
753     denseg.SetStarts().push_back(10);
754     denseg.SetStarts().push_back(10);
755 
756     CMessageListener_Basic listener;
757 
758     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
759     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
760     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 15, 20, 25, eNa_strand_plus, id2, true, false);
761     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
762     BOOST_CHECK_EQUAL(listener.Count(), 0);
763     listener.Clear();
764 }
765 
766 // test fuse abutting intervals
TestFuseAbuttingIntervals()767 void TestFuseAbuttingIntervals()
768 {
769     size_t front_insert = 5;
770     CRef<CSeq_align> align;
771     CRef<CSeq_entry> entry, seq1, seq2, seq3;
772     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
773 
774     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
775     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
776     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
777 
778     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 12, 17, 25, eNa_strand_plus, id1, false, false);
779     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
780     CBioseq_Handle bsh1, bsh2, bsh3;
781     CRef<CScope> scope;
782     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
783 
784     CDense_seg& denseg = align->SetSegs().SetDenseg();
785     denseg.SetNumseg(3);
786     denseg.ResetLens();
787     denseg.SetLens().push_back(10);
788     denseg.SetLens().push_back(10);
789     denseg.SetLens().push_back(10);
790     denseg.ResetStarts();
791     denseg.SetStarts().push_back(0);
792     denseg.SetStarts().push_back(0);
793     denseg.SetStarts().push_back(0);
794     denseg.SetStarts().push_back(10);
795     denseg.SetStarts().push_back(-1);
796     denseg.SetStarts().push_back(10);
797     denseg.SetStarts().push_back(20);
798     denseg.SetStarts().push_back(10);
799     denseg.SetStarts().push_back(20);;
800 
801     CMessageListener_Basic listener;
802 
803     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
804     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
805     CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, false);
806     expected_loc1->SetInt().SetStrand(eNa_strand_plus);
807     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
808     BOOST_CHECK_EQUAL(listener.Count(), 0);
809     listener.Clear();
810 }
811 
812 // test do not fuse abutting intervals
TestDoNotFuseAbuttingIntervals()813 void TestDoNotFuseAbuttingIntervals()
814 {
815     size_t front_insert = 5;
816     CRef<CSeq_align> align;
817     CRef<CSeq_entry> entry, seq1, seq2, seq3;
818     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
819 
820     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
821     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
822     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
823 
824     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 12, 17, 25, eNa_strand_plus, id1, false, false);
825     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
826     CBioseq_Handle bsh1, bsh2, bsh3;
827     CRef<CScope> scope;
828     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
829 
830     CDense_seg& denseg = align->SetSegs().SetDenseg();
831     denseg.SetNumseg(3);
832     denseg.ResetLens();
833     denseg.SetLens().push_back(10);
834     denseg.SetLens().push_back(10);
835     denseg.SetLens().push_back(10);
836     denseg.ResetStarts();
837     denseg.SetStarts().push_back(0);
838     denseg.SetStarts().push_back(0);
839     denseg.SetStarts().push_back(0);
840     denseg.SetStarts().push_back(10);
841     denseg.SetStarts().push_back(-1);
842     denseg.SetStarts().push_back(10);
843     denseg.SetStarts().push_back(20);
844     denseg.SetStarts().push_back(10);
845     denseg.SetStarts().push_back(20);;
846 
847     CMessageListener_Basic listener;
848 
849     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
850     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
851     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 9, 10, 15, eNa_strand_plus, id2, false, false);
852     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
853     BOOST_CHECK_EQUAL(listener.Count(), 0);
854     listener.Clear();
855 }
856 
857 // test extend over gaps
TestExtendOverGap()858 void TestExtendOverGap()
859 {
860     size_t front_insert = 5;
861     CRef<CSeq_align> align;
862     CRef<CSeq_entry> entry, seq1, seq2, seq3;
863     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
864 
865     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
866     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
867     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
868 
869     CRef<CSeq_loc> main_loc = CreateLoc(5, 25, id1, false, false);
870     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
871     CBioseq_Handle bsh1, bsh2, bsh3;
872     CRef<CScope> scope;
873     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
874 
875     CDense_seg& denseg = align->SetSegs().SetDenseg();
876     denseg.SetNumseg(3);
877     denseg.ResetLens();
878     denseg.SetLens().push_back(10);
879     denseg.SetLens().push_back(10);
880     denseg.SetLens().push_back(10);
881     denseg.ResetStarts();
882     denseg.SetStarts().push_back(0);
883     denseg.SetStarts().push_back(0);
884     denseg.SetStarts().push_back(0);
885     denseg.SetStarts().push_back(10);
886     denseg.SetStarts().push_back(-1);
887     denseg.SetStarts().push_back(10);
888     denseg.SetStarts().push_back(20);
889     denseg.SetStarts().push_back(20);
890     denseg.SetStarts().push_back(20);;
891 
892     CMessageListener_Basic listener;
893 
894     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
895     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
896     CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 25, id2, false, false);
897     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
898     BOOST_CHECK_EQUAL(listener.Count(), 0);
899     listener.Clear();
900 }
901 
902 // test do not extend over gaps
TestDoNotExtendOverGap()903 void TestDoNotExtendOverGap()
904 {
905     size_t front_insert = 5;
906     CRef<CSeq_align> align;
907     CRef<CSeq_entry> entry, seq1, seq2, seq3;
908     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
909 
910     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
911     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
912     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
913 
914     CRef<CSeq_loc> main_loc = CreateLoc(5, 25, id2, false, false);
915     CRef<CSeq_feat> cds = CreateCds(main_loc, seq2);
916     CBioseq_Handle bsh1, bsh2, bsh3;
917     CRef<CScope> scope;
918     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
919 
920     CDense_seg& denseg = align->SetSegs().SetDenseg();
921     denseg.SetNumseg(3);
922     denseg.ResetLens();
923     denseg.SetLens().push_back(10);
924     denseg.SetLens().push_back(10);
925     denseg.SetLens().push_back(10);
926     denseg.ResetStarts();
927     denseg.SetStarts().push_back(0);
928     denseg.SetStarts().push_back(0);
929     denseg.SetStarts().push_back(0);
930     denseg.SetStarts().push_back(10);
931     denseg.SetStarts().push_back(-1);
932     denseg.SetStarts().push_back(10);
933     denseg.SetStarts().push_back(20);
934     denseg.SetStarts().push_back(20);
935     denseg.SetStarts().push_back(20);;
936 
937     CMessageListener_Basic listener;
938 
939     edit::CFeaturePropagator propagator1(bsh2, bsh1, *align, false, false, false, false, &listener);
940     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
941     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 9, 20, 25, eNa_strand_plus, id1, false, false);
942     expected_loc1->ResetStrand();
943     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
944     BOOST_CHECK_EQUAL(listener.Count(), 0);
945     listener.Clear();
946 }
947 
948 // test ordered vs. joined locations
TestOrderedLoc()949 void TestOrderedLoc()
950 {
951     size_t front_insert = 5;
952     CRef<CSeq_align> align;
953     CRef<CSeq_entry> entry, seq1, seq2, seq3;
954     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
955 
956     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
957     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
958     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
959 
960     CRef<CSeq_loc> main_loc = CreateOrderedLoc(5, 15, 20, 30, eNa_strand_plus, id3, true, true);
961     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
962     CBioseq_Handle bsh1, bsh2, bsh3;
963     CRef<CScope> scope;
964     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
965     CMessageListener_Basic listener;
966 
967     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
968     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
969     CRef<CSeq_loc> expected_loc1 = CreateOrderedLoc(0, 5, 10, 20, eNa_strand_plus, id1, true, true);
970     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
971     BOOST_CHECK_EQUAL(listener.Count(), 0);
972 
973     listener.Clear();
974 }
975 
976 // test circular topology
TestCircularTopology()977 void TestCircularTopology()
978 {
979     size_t front_insert = 5;
980     CRef<CSeq_align> align;
981     CRef<CSeq_entry> entry, seq1, seq2, seq3;
982     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
983 
984     seq1->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
985     seq2->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
986     seq3->SetSeq().SetInst().SetTopology(CSeq_inst::eTopology_circular);
987 
988     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
989     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
990     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
991 
992     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(50, 59, 0, 5, eNa_strand_plus, id1, false, false);
993     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
994     CBioseq_Handle bsh1, bsh2, bsh3;
995     CRef<CScope> scope;
996     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
997 
998     CDense_seg& denseg = align->SetSegs().SetDenseg();
999     denseg.SetNumseg(3);
1000     denseg.ResetLens();
1001     denseg.SetLens().push_back(20);
1002     denseg.SetLens().push_back(20);
1003     denseg.SetLens().push_back(20);
1004     denseg.ResetStarts();
1005     denseg.SetStarts().push_back(0);
1006     denseg.SetStarts().push_back(0);
1007     denseg.SetStarts().push_back(0);
1008     denseg.SetStarts().push_back(20);
1009     denseg.SetStarts().push_back(-1);
1010     denseg.SetStarts().push_back(-1);
1011     denseg.SetStarts().push_back(40);
1012     denseg.SetStarts().push_back(45);
1013     denseg.SetStarts().push_back(50);;
1014 
1015     CMessageListener_Basic listener;
1016 
1017     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, false, true, &listener);
1018     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1019     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(55, 64, 0, 5, eNa_strand_plus, id2, false, false);
1020     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1021     BOOST_CHECK_EQUAL(listener.Count(), 0);
1022     listener.Clear();
1023 
1024 }
1025 
1026 
1027 
1028 // test point location inside alignment
TestPointLocInside()1029 void TestPointLocInside()
1030 {
1031     size_t front_insert = 5;
1032     CRef<CSeq_align> align;
1033     CRef<CSeq_entry> entry, seq1, seq2, seq3;
1034     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1035 
1036     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1037     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1038     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1039 
1040     CRef<CSeq_loc> main_loc = CreatePointLoc(15, id3);
1041     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
1042 
1043     CBioseq_Handle bsh1, bsh2, bsh3;
1044     CRef<CScope> scope;
1045     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1046 
1047     CMessageListener_Basic listener;
1048 
1049     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
1050     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1051     CRef<CSeq_loc> expected_loc1 = CreatePointLoc(5, id1);
1052     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1053     BOOST_CHECK_EQUAL(listener.Count(), 0);
1054 
1055     listener.Clear();
1056 }
1057 
1058 // test point location outside alignment
TestPointLocOutside()1059 void TestPointLocOutside()
1060 {
1061     size_t front_insert = 5;
1062     CRef<CSeq_align> align;
1063     CRef<CSeq_entry> entry, seq1, seq2, seq3;
1064     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1065 
1066     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1067     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1068     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1069 
1070     CRef<CSeq_loc> main_loc = CreatePointLoc(5, id3);
1071     CRef<CSeq_feat> cds = CreateCds(main_loc, seq3);
1072 
1073     CBioseq_Handle bsh1, bsh2, bsh3;
1074     CRef<CScope> scope;
1075     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1076 
1077     CMessageListener_Basic listener;
1078 
1079     edit::CFeaturePropagator propagator1(bsh3, bsh1, *align, false, false, true, true, &listener);
1080     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1081     BOOST_CHECK(new_feat1.IsNull());
1082 
1083     listener.Clear();
1084 }
1085 
1086 // test partial when the stop is cut off and do not extend
TestPartialWhenCutStopDoNotExtend(bool partial3)1087 void TestPartialWhenCutStopDoNotExtend(bool partial3)
1088 {
1089     size_t front_insert = 5;
1090     CRef<CSeq_align> align;
1091     CRef<CSeq_entry> entry, seq1, seq2, seq3;
1092     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1093 
1094     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1095     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1096     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1097 
1098     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 40, eNa_strand_plus, id1, false, partial3);
1099     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1100     CBioseq_Handle bsh1, bsh2, bsh3;
1101     CRef<CScope> scope;
1102     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1103 
1104     CDense_seg& denseg = align->SetSegs().SetDenseg();
1105     denseg.SetNumseg(1);
1106     denseg.ResetLens();
1107     denseg.SetLens().push_back(30);
1108     denseg.ResetStarts();
1109     denseg.SetStarts().push_back(0);
1110     denseg.SetStarts().push_back(0);
1111     denseg.SetStarts().push_back(0);
1112 
1113     CMessageListener_Basic listener;
1114 
1115     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1116     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1117     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(5, 15, 20, 29, eNa_strand_plus, id2, false, true);
1118     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1119     BOOST_CHECK_EQUAL(listener.Count(), 0);
1120     listener.Clear();
1121 }
1122 
1123 // test partial when the last interval is cut off and do not extend
TestPartialWhenCutLastIntervalDoNotExtend(bool partial3)1124 void TestPartialWhenCutLastIntervalDoNotExtend(bool partial3)
1125 {
1126     size_t front_insert = 5;
1127     CRef<CSeq_align> align;
1128     CRef<CSeq_entry> entry, seq1, seq2, seq3;
1129     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1130 
1131     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1132     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1133     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1134 
1135     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 40, 50, eNa_strand_plus, id1, false, partial3);
1136     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1137     CBioseq_Handle bsh1, bsh2, bsh3;
1138     CRef<CScope> scope;
1139     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1140 
1141     CDense_seg& denseg = align->SetSegs().SetDenseg();
1142     denseg.SetNumseg(1);
1143     denseg.ResetLens();
1144     denseg.SetLens().push_back(30);
1145     denseg.ResetStarts();
1146     denseg.SetStarts().push_back(0);
1147     denseg.SetStarts().push_back(0);
1148     denseg.SetStarts().push_back(0);
1149 
1150     CMessageListener_Basic listener;
1151 
1152     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1153     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1154     CRef<CSeq_loc> expected_loc1 = CreateLoc(5, 15, id2, false, true);
1155     expected_loc1->SetInt().SetStrand(eNa_strand_plus);
1156     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1157     BOOST_CHECK_EQUAL(listener.Count(), 0);
1158     listener.Clear();
1159 }
1160 
1161 // test partial when the start is cut off and do not extend
TestPartialWhenCutStartDoNotExtend(bool partial5)1162 void TestPartialWhenCutStartDoNotExtend(bool partial5)
1163 {
1164     size_t front_insert = 5;
1165     CRef<CSeq_align> align;
1166     CRef<CSeq_entry> entry, seq1, seq2, seq3;
1167     tie(entry, align, seq1, seq2, seq3) = CreateBioseqsAndAlign(front_insert);
1168 
1169     const CSeq_id &id1 = *seq1->GetSeq().GetId().front();
1170     const CSeq_id &id2 = *seq2->GetSeq().GetId().front();
1171     const CSeq_id &id3 = *seq3->GetSeq().GetId().front();
1172 
1173     CRef<CSeq_loc> main_loc = CreateTwoIntLoc(5, 15, 20, 25, eNa_strand_plus, id1, partial5, false);
1174     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1175     CBioseq_Handle bsh1, bsh2, bsh3;
1176     CRef<CScope> scope;
1177     tie(bsh1,bsh2,bsh3,scope) = AddBioseqsToScope(entry);
1178 
1179     CDense_seg& denseg = align->SetSegs().SetDenseg();
1180     denseg.SetNumseg(1);
1181     denseg.ResetLens();
1182     denseg.SetLens().push_back(30);
1183     denseg.ResetStarts();
1184     denseg.SetStarts().push_back(10);
1185     denseg.SetStarts().push_back(10);
1186     denseg.SetStarts().push_back(10);
1187 
1188     CMessageListener_Basic listener;
1189 
1190     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, false, &listener);
1191     CRef<CSeq_feat> new_feat1 = propagator1.Propagate(*cds);
1192     CRef<CSeq_loc> expected_loc1 = CreateTwoIntLoc(10, 15, 20, 25, eNa_strand_plus, id2, true, false);
1193     BOOST_CHECK(expected_loc1->Equals(new_feat1->GetLocation()));
1194     BOOST_CHECK_EQUAL(listener.Count(), 0);
1195     listener.Clear();
1196 }
1197 
TestFeatInsideGap(bool is_minus)1198 void TestFeatInsideGap(bool is_minus)
1199 {
1200     CRef<CSeq_entry> entry(new CSeq_entry);
1201 
1202     string str1 = "TCACTCTTTGAAAAAAAAAA";
1203     CRef<CSeq_entry> seq1(new CSeq_entry);
1204     CRef< CSeq_id > id1(new CSeq_id);
1205     id1->SetLocal().SetStr("seq1");
1206     seq1->SetSeq().SetId().push_back(id1);
1207     seq1->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(str1);
1208     seq1->SetSeq().SetInst().SetLength(str1.length());
1209     seq1->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1210     seq1->SetSeq().SetInst().SetMol(CSeq_inst::eMol_na);
1211     entry->SetSet().SetSeq_set().push_back(seq1);
1212 
1213     string str2 = "TCACTGAAAAAAAAAA";
1214     CRef<CSeq_entry> seq2(new CSeq_entry);
1215     CRef< CSeq_id > id2(new CSeq_id);
1216     id2->SetLocal().SetStr("seq2");
1217     seq2->SetSeq().SetId().push_back(id2);
1218     seq2->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(str2);
1219     seq2->SetSeq().SetInst().SetLength(str2.length());
1220     seq2->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1221     seq2->SetSeq().SetInst().SetMol(CSeq_inst::eMol_na);
1222     entry->SetSet().SetSeq_set().push_back(seq2);
1223 
1224     CRef<CSeq_align> align(new CSeq_align());
1225     align->SetType(objects::CSeq_align::eType_global);
1226     align->SetDim(entry->GetSet().GetSeq_set().size());
1227     align->SetSegs().SetDenseg().SetIds().push_back(id1);
1228     align->SetSegs().SetDenseg().SetIds().push_back(id2);
1229 
1230     auto& denseg = align->SetSegs().SetDenseg();
1231     denseg.SetNumseg(3);
1232     denseg.SetLens().push_back(5);
1233     denseg.SetLens().push_back(4);
1234     denseg.SetLens().push_back(11);
1235     denseg.SetDim(entry->GetSet().GetSeq_set().size());
1236     denseg.SetStarts().push_back(0);
1237     denseg.SetStarts().push_back(0);
1238     denseg.SetStarts().push_back(5);
1239     denseg.SetStarts().push_back(-1);
1240     denseg.SetStarts().push_back(9);
1241     denseg.SetStarts().push_back(5);
1242 
1243     CRef<CSeq_annot> annot(new CSeq_annot());
1244     annot->SetData().SetAlign().push_back(align);
1245     entry->SetSet().SetAnnot().push_back(annot);
1246 
1247     CRef<CSeq_loc> main_loc = CreateLoc(6, 7, *id1, false, false, is_minus);
1248     CRef<CSeq_feat> cds = CreateCds(main_loc, seq1);
1249 
1250     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1251     CRef<CScope> scope(new CScope(*object_manager));
1252     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1253     CBioseq_CI bi(seh);
1254     CBioseq_Handle bsh1 = *bi;
1255     ++bi;
1256     CBioseq_Handle bsh2 = *bi;
1257 
1258     CMessageListener_Basic listener;
1259 
1260     edit::CFeaturePropagator propagator1(bsh1, bsh2, *align, false, false, true, true, &listener);
1261     CRef<CSeq_feat> new_feat = propagator1.Propagate(*cds);
1262     BOOST_CHECK(new_feat.IsNull());
1263     BOOST_CHECK_EQUAL(listener.Count(), 1);
1264 
1265     listener.Clear();
1266 }
1267 
BOOST_AUTO_TEST_CASE(Test_FeaturePropagation)1268 BOOST_AUTO_TEST_CASE(Test_FeaturePropagation)
1269 {
1270     TestCds(false, false);
1271     TestCds(false, true);
1272     TestCds(true, false);
1273     TestCds(true, true);
1274 
1275     TestCdsWithCodeBreak(false, false);
1276     TestCdsWithCodeBreak(false, true);
1277     TestCdsWithCodeBreak(true, false);
1278     TestCdsWithCodeBreak(true, true);
1279 
1280     TestCdsFromLastBioseq(false, false);
1281     TestCdsFromLastBioseq(false, true);
1282     TestCdsFromLastBioseq(true, false);
1283     TestCdsFromLastBioseq(true, true);
1284 
1285     TestCdsFromLastBioseqWithCodeBreak();
1286 
1287     TestTrnaAnticodon(false, false);
1288     TestTrnaAnticodon(false, true);
1289     TestTrnaAnticodon(true, false);
1290     TestTrnaAnticodon(true, true);
1291 
1292     TestTrnaAnticodonFromLastBioseq();
1293 
1294     TestCdsFromLastBioseqOutsideAlign();
1295 
1296     TestTwoIntCdsFromLastBioseqOutsideAlign();
1297 
1298     TestTwoIntCdsOnMinusStrand();
1299 
1300     TestPartialWhenCutStop(false);
1301     TestPartialWhenCutStop(true);
1302     TestPartialWhenCutLastInterval(false);
1303     TestPartialWhenCutLastInterval(true);
1304     TestPartialWhenCutStart(false);
1305     TestPartialWhenCutStart(true);
1306 
1307     TestFuseAbuttingIntervals();
1308     TestDoNotFuseAbuttingIntervals();
1309     TestExtendOverGap();
1310     TestDoNotExtendOverGap();
1311     TestOrderedLoc();
1312     TestCircularTopology();
1313     TestPointLocInside();
1314     TestPointLocOutside();
1315 
1316     TestPartialWhenCutStopDoNotExtend(false);
1317     TestPartialWhenCutStopDoNotExtend(true);
1318     TestPartialWhenCutLastIntervalDoNotExtend(false);
1319     TestPartialWhenCutLastIntervalDoNotExtend(true);
1320     TestPartialWhenCutStartDoNotExtend(false);
1321     TestPartialWhenCutStartDoNotExtend(true);
1322 
1323     TestFeatInsideGap(false);
1324     TestFeatInsideGap(true);
1325 }
1326 
1327 
CheckPropagatedCDSLocation(CSeq_entry & entry,const CSeq_feat & cds,bool stop_at_stop,bool fix_partials,const vector<CRef<CSeq_loc>> & expected_loc)1328 void CheckPropagatedCDSLocation(CSeq_entry& entry, const CSeq_feat& cds,
1329                                 bool stop_at_stop, bool fix_partials,
1330                                 const vector<CRef<CSeq_loc> >& expected_loc)
1331 {
1332     CRef<CSeq_align> align = entry.SetSet().SetAnnot().front()->SetData().SetAlign().front();
1333     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1334 
1335     CRef<CScope> scope(new CScope(*object_manager));
1336     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (entry);
1337 
1338     CBioseq_CI b(seh);
1339     CBioseq_Handle src = *b;
1340     ++b;
1341     size_t offset = 0;
1342     while (b) {
1343         CMessageListener_Basic listener;
1344         edit::CFeaturePropagator propagator(src, *b, *align, stop_at_stop, fix_partials, true, true, &listener);
1345 
1346         CRef<CSeq_feat> new_feat = propagator.Propagate(cds);
1347         BOOST_CHECK_EQUAL(new_feat->GetData().GetSubtype(), CSeqFeatData::eSubtype_cdregion);
1348         CheckPropagatedLocation(*(expected_loc[offset]), new_feat->GetLocation());
1349         BOOST_CHECK_EQUAL(listener.Count(), 0);
1350         listener.Clear();
1351         offset++;
1352         ++b;
1353     }
1354 
1355 }
1356 
1357 
InsertStop(CBioseq & seq,size_t pos)1358 void InsertStop(CBioseq& seq, size_t pos)
1359 {
1360     string na = seq.GetInst().GetSeq_data().GetIupacna();
1361     string before = na.substr(0, pos);
1362     string after = na.substr(pos + 3);
1363     na = before + "TAA" + after;
1364     seq.SetInst().SetSeq_data().SetIupacna().Set(na);
1365 }
1366 
1367 
BOOST_AUTO_TEST_CASE(Test_CdRegionAlterations)1368 BOOST_AUTO_TEST_CASE(Test_CdRegionAlterations)
1369 {
1370     size_t front_insert = 5;
1371     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1372 
1373     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1374 
1375     CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(first, 15);
1376     cds->SetData().SetCdregion();
1377 
1378     // for this test, there are no stops
1379     vector<CRef<CSeq_loc> > expected_loc;
1380 
1381     CRef<CSeq_loc> loc1(new CSeq_loc());
1382     loc1->SetInt().SetFrom(front_insert);
1383     loc1->SetInt().SetTo(15 + front_insert);
1384     loc1->SetInt().SetId().SetLocal().SetStr("good2");
1385     loc1->SetPartialStart(false, eExtreme_Biological);
1386     loc1->SetPartialStop(false, eExtreme_Biological);
1387     expected_loc.push_back(loc1);
1388 
1389     CRef<CSeq_loc> loc2(new CSeq_loc());
1390     loc2->SetInt().SetFrom(front_insert * 2);
1391     loc2->SetInt().SetTo(15 + front_insert * 2);
1392     loc2->SetInt().SetId().SetLocal().SetStr("good3");
1393     loc2->SetPartialStart(false, eExtreme_Biological);
1394     loc2->SetPartialStop(false, eExtreme_Biological);
1395     expected_loc.push_back(loc2);
1396 
1397     CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1398     loc1->SetPartialStart(true, eExtreme_Biological);
1399     loc2->SetPartialStart(true, eExtreme_Biological);
1400     loc1->SetPartialStop(true, eExtreme_Biological);
1401     loc2->SetPartialStop(true, eExtreme_Biological);
1402     CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1403     CheckPropagatedCDSLocation(*entry, *cds, false, true, expected_loc);
1404 
1405     loc1->SetPartialStart(false, eExtreme_Biological);
1406     loc2->SetPartialStart(false, eExtreme_Biological);
1407     loc1->SetPartialStop(false, eExtreme_Biological);
1408     loc2->SetPartialStop(false, eExtreme_Biological);
1409 
1410 
1411     // repeat test with stops inserted for extension
1412     size_t offset = 0;
1413     for (auto s : entry->SetSet().SetSeq_set()) {
1414         if (offset > 0) {
1415             InsertStop(s->SetSeq(), 15 + (front_insert * offset) + 6);
1416         }
1417         offset++;
1418     }
1419     loc1->SetInt().SetTo(15 + front_insert + 8);
1420     loc2->SetInt().SetTo(15 + 2 * front_insert + 8);
1421 
1422     CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1423     loc1->SetPartialStart(true, eExtreme_Biological);
1424     loc2->SetPartialStart(true, eExtreme_Biological);
1425     CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1426     loc1->SetPartialStart(false, eExtreme_Biological);
1427     loc2->SetPartialStart(false, eExtreme_Biological);
1428 
1429 
1430     // repeat test with stops inserted for truncation
1431     offset = 0;
1432     for (auto s : entry->SetSet().SetSeq_set()) {
1433         if (offset > 0) {
1434             // need to make three stop codons, frame will go to be the longest one
1435             InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 14);
1436             InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 10);
1437             InsertStop(s->SetSeq(), 15 + (front_insert * offset) - 6);
1438         }
1439         offset++;
1440     }
1441     loc1->SetInt().SetTo(15 + front_insert - 4);
1442     loc2->SetInt().SetTo(15 + 2 * front_insert - 4);
1443 
1444     CheckPropagatedCDSLocation(*entry, *cds, true, false, expected_loc);
1445     loc1->SetPartialStart(true, eExtreme_Biological);
1446     loc2->SetPartialStart(true, eExtreme_Biological);
1447     CheckPropagatedCDSLocation(*entry, *cds, true, true, expected_loc);
1448 }
1449 
1450 
ImproveAlignment(CSeq_align & align,size_t front_insert)1451 void ImproveAlignment(CSeq_align& align, size_t front_insert)
1452 {
1453     CDense_seg& denseg = align.SetSegs().SetDenseg();
1454     denseg.SetNumseg(3);
1455     denseg.ResetLens();
1456     denseg.SetLens().push_back(front_insert);
1457     denseg.SetLens().push_back(front_insert);
1458     denseg.SetLens().push_back(60);
1459     denseg.ResetStarts();
1460     denseg.SetStarts().push_back(-1);
1461     denseg.SetStarts().push_back(-1);
1462     denseg.SetStarts().push_back(0);
1463     denseg.SetStarts().push_back(-1);
1464     denseg.SetStarts().push_back(0);
1465     denseg.SetStarts().push_back(front_insert);
1466     denseg.SetStarts().push_back(0);
1467     denseg.SetStarts().push_back(front_insert);
1468     denseg.SetStarts().push_back(front_insert * 2);
1469 }
1470 
1471 // TODO? Bad alignment!
1472 
BOOST_AUTO_TEST_CASE(Test_PropagateAll)1473 BOOST_AUTO_TEST_CASE(Test_PropagateAll)
1474 {
1475     size_t front_insert = 10;
1476     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1477     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1478     // make a better alignment, with some sequences in the gap at the front
1479     ImproveAlignment(*align, front_insert);
1480 
1481     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1482 
1483     // will not be able to propagate the first feature to either of the
1484     // other sequences.
1485     // second feature can only be propagated to the middle sequence.
1486     // third feature can be propagated to all.
1487 
1488     CRef<CSeq_feat> misc1 = unit_test_util::AddMiscFeature(last, front_insert - 1);
1489     CRef<CSeq_feat> misc2 = unit_test_util::AddMiscFeature(last, (2 * front_insert) - 1);
1490     CRef<CSeq_feat> misc3 = unit_test_util::AddMiscFeature(last, 4 * front_insert);
1491 
1492     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1493 
1494     CRef<CScope> scope(new CScope(*object_manager));
1495     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1496 
1497     CBioseq_CI b1(seh);
1498     ++b1;
1499     ++b1;
1500     CBioseq_Handle src = *b1;
1501 
1502     CBioseq_CI b(seh);
1503 
1504     CMessageListener_Basic listener;
1505     edit::CFeaturePropagator propagator1(src, *b, *align, false, false, true, true, &listener);
1506     vector<CRef<CSeq_feat> > first_feats = propagator1.PropagateAll();
1507     BOOST_CHECK_EQUAL(first_feats.size(), 1);
1508     BOOST_CHECK_EQUAL(listener.Count(), 2);
1509     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-20 to lcl|good1");
1510     BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good1");
1511     listener.Clear();
1512 
1513     ++b;
1514     edit::CFeaturePropagator propagator2(src, *b, *align, false, false, true, true, &listener);
1515     vector<CRef<CSeq_feat> > second_feats = propagator2.PropagateAll();
1516     BOOST_CHECK_EQUAL(second_feats.size(), 2);
1517     BOOST_CHECK_EQUAL(listener.Count(), 1);
1518     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good2");
1519 }
1520 
BOOST_AUTO_TEST_CASE(Test_PropagateAllReportFailures)1521 BOOST_AUTO_TEST_CASE(Test_PropagateAllReportFailures)
1522 {
1523     size_t front_insert = 10;
1524     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1525     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1526     // make a better alignment, with some sequences in the gap at the front
1527     ImproveAlignment(*align, front_insert);
1528 
1529     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1530 
1531     // will not be able to propagate the first feature to either of the
1532     // other sequences.
1533     // second feature can only be propagated to the middle sequence.
1534     // third feature can be propagated to all.
1535 
1536     CRef<CSeq_feat> misc1 = unit_test_util::AddMiscFeature(last, front_insert - 1);
1537     CRef<CSeq_feat> misc2 = unit_test_util::AddMiscFeature(last, (2 * front_insert) - 1);
1538     CRef<CSeq_feat> misc3 = unit_test_util::AddMiscFeature(last, 4 * front_insert);
1539 
1540     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1541 
1542     CRef<CScope> scope(new CScope(*object_manager));
1543     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1544 
1545     CBioseq_CI b1(seh);
1546     ++b1;
1547     ++b1;
1548     CBioseq_Handle src = *b1;
1549 
1550     CBioseq_CI b(seh);
1551 
1552     CMessageListener_Basic listener;
1553     edit::CFeaturePropagator propagator1(src, *b, *align, false, false, true, true, &listener);
1554     vector<CConstRef<CSeq_feat> > failures1;
1555     vector<CRef<CSeq_feat> > first_feats = propagator1.PropagateAllReportFailures(failures1);
1556     BOOST_CHECK_EQUAL(first_feats.size(), 1);
1557     BOOST_CHECK_EQUAL(listener.Count(), 2);
1558     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-20 to lcl|good1");
1559     BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good1");
1560     listener.Clear();
1561 
1562     ++b;
1563     edit::CFeaturePropagator propagator2(src, *b, *align, false, false, true, true, &listener);
1564     vector<CConstRef<CSeq_feat> > failures2;
1565     vector<CRef<CSeq_feat> > second_feats = propagator2.PropagateAllReportFailures(failures2);
1566     BOOST_CHECK_EQUAL(second_feats.size(), 2);
1567     BOOST_CHECK_EQUAL(listener.Count(), 1);
1568     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good3:1-10 to lcl|good2");
1569 }
1570 
s_FindHighestFeatId(const CSeq_entry_Handle entry)1571 CObject_id::TId s_FindHighestFeatId(const CSeq_entry_Handle entry)
1572 {
1573     CObject_id::TId id = 0;
1574     for (CFeat_CI feat_it(entry); feat_it; ++feat_it) {
1575         if (feat_it->IsSetId()) {
1576             const CFeat_id& feat_id = feat_it->GetId();
1577             if (feat_id.IsLocal() && feat_id.GetLocal().IsId() && feat_id.GetLocal().GetId() > id) {
1578                 id = feat_id.GetLocal().GetId();
1579             }
1580         }
1581     }
1582     return id;
1583 }
1584 
GetGoodSeqEntryWithFeatureIds(int & feat_id)1585 CSeq_entry_Handle GetGoodSeqEntryWithFeatureIds(int& feat_id)
1586 {
1587     size_t front_insert = 5;
1588     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1589     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1590 
1591     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1592     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1593 
1594     CRef<CSeq_loc> main_loc(new CSeq_loc());
1595     main_loc->SetInt().SetFrom(0);
1596     main_loc->SetInt().SetTo(15);
1597     main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1598 
1599     CRef<CSeq_loc> subloc(new CSeq_loc());
1600     subloc->SetInt().SetFrom(3);
1601     subloc->SetInt().SetTo(5);
1602     subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1603 
1604     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1605     gene->SetData().SetGene().SetLocus("gene locus");
1606     gene->SetId().SetLocal().SetId(++feat_id);
1607     gene->SetLocation().Assign(*main_loc);
1608 
1609     CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1610     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1611     mrna->SetId().SetLocal().SetId(++feat_id);
1612     mrna->SetLocation().Assign(*main_loc);
1613 
1614     CRef<CSeq_feat> cds_withoutprot = unit_test_util::AddMiscFeature(first, 15);
1615     cds_withoutprot->SetComment("CDS without product");
1616     cds_withoutprot->SetData().SetCdregion();
1617     cds_withoutprot->SetId().SetLocal().SetId(++feat_id);
1618     cds_withoutprot->SetLocation().SetInt().SetFrom(10);
1619     cds_withoutprot->SetLocation().SetInt().SetTo(25);
1620     cds_withoutprot->SetLocation().SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1621 
1622     CRef<CSeq_feat> cds_withprot = unit_test_util::MakeMiscFeature(unit_test_util::IdFromEntry(first), 15);
1623     cds_withprot->SetComment("CDS with product");
1624     cds_withprot->SetData().SetCdregion();
1625     cds_withprot->SetId().SetLocal().SetId(++feat_id);
1626     cds_withprot->SetLocation().Assign(*main_loc);
1627 
1628     // constructing the protein sequence
1629     CRef<CSeq_entry> prot_entry(new CSeq_entry());
1630     prot_entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1631     prot_entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
1632     prot_entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("-WPKL");
1633     prot_entry->SetSeq().SetInst().SetLength(5);
1634 
1635     const string prot_id = "good1_1";
1636     CRef<CSeq_id> id(new CSeq_id());
1637     id->SetLocal().SetStr(prot_id);
1638     prot_entry->SetSeq().SetId().push_back(id);
1639 
1640     CRef<CSeqdesc> mdesc(new CSeqdesc());
1641     mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
1642     prot_entry->SetSeq().SetDescr().Set().push_back(mdesc);
1643 
1644     CRef<CSeq_feat> prot_feat(new CSeq_feat());
1645     prot_feat->SetData().SetProt().SetName().push_back("hypothetical protein");
1646     prot_feat->SetLocation().SetInt().SetId().Assign(*(prot_entry->GetSeq().GetId().front()));
1647     prot_feat->SetLocation().SetInt().SetFrom(0);
1648     prot_feat->SetLocation().SetInt().SetTo(prot_entry->GetSeq().GetInst().GetLength() - 1);
1649     prot_feat->SetId().SetLocal().SetId(++feat_id);
1650     unit_test_util::AddFeat(prot_feat, prot_entry);
1651 
1652     cds_withprot->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
1653 
1654     CRef<CBioseq_set> set(new CBioseq_set());
1655     set->SetClass(CBioseq_set::eClass_nuc_prot);
1656     set->SetSeq_set().push_back(first);
1657     set->SetSeq_set().push_back(prot_entry);
1658     CRef<CSeq_entry> set_entry(new CSeq_entry());
1659     set_entry->SetSet(*set);
1660 
1661     unit_test_util::AddFeat(cds_withprot, set_entry);
1662 
1663     auto it = entry->SetSet().SetSeq_set().begin();
1664     it = entry->SetSet().SetSeq_set().erase(it);
1665 
1666     entry->SetSet().SetSeq_set().insert(it, set_entry);
1667 
1668     //cout << MSerial_AsnText << *entry << endl;
1669 
1670     // add entry to the scope
1671     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1672     CRef<CScope> scope(new CScope(*object_manager));
1673     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1674 
1675     return seh;
1676 }
1677 
1678 
BOOST_AUTO_TEST_CASE(Test_PropagateFeatsTo2Sequences_UsingFeatureIds)1679 BOOST_AUTO_TEST_CASE(Test_PropagateFeatsTo2Sequences_UsingFeatureIds)
1680 {
1681     int feat_id = 0;
1682     CSeq_entry_Handle seh = GetGoodSeqEntryWithFeatureIds(feat_id);
1683     CScope& scope = seh.GetScope();
1684 
1685     BOOST_CHECK(feat_id == 5);
1686     BOOST_TEST_MESSAGE("A set containing " + NStr::IntToString(feat_id) + " five features");
1687     CFeat_CI gene_it(seh, SAnnotSelector(CSeqFeatData::e_Gene));
1688     CConstRef<CSeq_feat> gene = gene_it->GetOriginalSeq_feat();
1689     CFeat_CI mrna_it(seh, SAnnotSelector(CSeqFeatData::eSubtype_mRNA));
1690     CConstRef<CSeq_feat> mrna = mrna_it->GetOriginalSeq_feat();
1691     CFeat_CI cds_it(seh, SAnnotSelector(CSeqFeatData::e_Cdregion));
1692     CConstRef<CSeq_feat> cds_withoutprot;
1693     CConstRef<CSeq_feat> cds_withprot;
1694     CConstRef<CSeq_feat> protein;
1695     for (; cds_it; ++cds_it) {
1696         if (cds_it->IsSetProduct()) {
1697             cds_withprot = cds_it->GetOriginalSeq_feat();
1698             CFeat_CI prot_it(scope.GetBioseqHandle(cds_it->GetProduct()));
1699             protein = prot_it->GetOriginalSeq_feat();
1700         }
1701         else {
1702             cds_withoutprot = cds_it->GetOriginalSeq_feat();
1703         }
1704     }
1705 
1706     BOOST_CHECK(!gene.IsNull());
1707     BOOST_CHECK(!mrna.IsNull());
1708     BOOST_CHECK(!cds_withoutprot.IsNull());
1709     BOOST_CHECK(!cds_withprot.IsNull());
1710     BOOST_CHECK(!protein.IsNull());
1711 
1712     CAlign_CI align_it(seh);
1713     CConstRef<CSeq_align> align(&*align_it);
1714     BOOST_CHECK(!align.IsNull());
1715 
1716     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1717     BOOST_CHECK(maxFeatId == feat_id);
1718 
1719     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1720     CBioseq_Handle src_bseq = *b_iter;
1721     CBioseq_Handle target_bseq1 = *(++b_iter);
1722     CBioseq_Handle target_bseq2 = *(++b_iter);
1723 
1724 
1725     BOOST_TEST_MESSAGE("Propagating to the second sequence");
1726     CMessageListener_Basic listener;
1727     edit::CFeaturePropagator propagator1(src_bseq, target_bseq1, *align, true, true, true, true, &listener, &maxFeatId);
1728     CRef<CSeq_feat> propagated_gene1 = propagator1.Propagate(*gene);
1729 
1730     BOOST_CHECK_EQUAL(listener.Count(), 0);
1731     BOOST_CHECK(propagated_gene1->IsSetId());
1732     BOOST_CHECK(propagated_gene1->GetId().GetLocal().GetId() == (++feat_id));
1733 
1734     CRef<CSeq_feat> propagated_mrna1 = propagator1.Propagate(*mrna);
1735     BOOST_CHECK_EQUAL(listener.Count(), 0);
1736     BOOST_CHECK(propagated_mrna1->IsSetId());
1737     BOOST_CHECK(propagated_mrna1->GetId().GetLocal().GetId() == ++feat_id);
1738 
1739     CRef<CSeq_feat> propagated_cds_woprot1 = propagator1.Propagate(*cds_withoutprot);
1740     BOOST_CHECK_EQUAL(listener.Count(), 0);
1741     BOOST_CHECK(!propagated_cds_woprot1->IsSetProduct());
1742     BOOST_CHECK(propagated_cds_woprot1->IsSetId());
1743     BOOST_CHECK(propagated_cds_woprot1->GetId().GetLocal().GetId() == ++feat_id);
1744 
1745     CRef<CSeq_feat> propagated_cds_wprot1 = propagator1.Propagate(*cds_withprot);
1746     BOOST_CHECK_EQUAL(listener.Count(), 0);
1747     BOOST_CHECK(!propagated_cds_wprot1->IsSetProduct()); // this cds also does not have a product at this point
1748     BOOST_CHECK(propagated_cds_wprot1->IsSetId());
1749     BOOST_CHECK(propagated_cds_wprot1->GetId().GetLocal().GetId() == ++feat_id);
1750 
1751     CRef<CSeq_feat> propagated_prot1 = propagator1.ConstructProteinFeatureForPropagatedCodingRegion(*cds_withprot, *propagated_cds_wprot1);
1752     BOOST_CHECK(propagated_prot1->IsSetId());
1753     BOOST_CHECK(propagated_prot1->GetId().GetLocal().GetId() == ++feat_id);
1754     listener.Clear();
1755 
1756     BOOST_TEST_MESSAGE("Propagating to the third sequence");
1757     edit::CFeaturePropagator propagator2(src_bseq, target_bseq2, *align, true, true, true, true, &listener, &maxFeatId);
1758     CRef<CSeq_feat> propagated_gene2 = propagator2.Propagate(*gene);
1759     BOOST_CHECK_EQUAL(listener.Count(), 0);
1760     BOOST_CHECK(propagated_gene2->IsSetId());
1761     BOOST_CHECK(propagated_gene2->GetId().GetLocal().GetId() == (++feat_id));
1762 
1763     CRef<CSeq_feat> propagated_mrna2 = propagator2.Propagate(*mrna);
1764     BOOST_CHECK_EQUAL(listener.Count(), 0);
1765     BOOST_CHECK(propagated_mrna2->IsSetId());
1766     BOOST_CHECK(propagated_mrna2->GetId().GetLocal().GetId() == ++feat_id);
1767     listener.Clear();
1768 
1769     CRef<CSeq_feat> propagated_cds_woprot2 = propagator2.Propagate(*cds_withoutprot);
1770     BOOST_CHECK_EQUAL(listener.Count(), 0);
1771     BOOST_CHECK(!propagated_cds_woprot2->IsSetProduct());
1772     BOOST_CHECK(propagated_cds_woprot2->IsSetId());
1773     BOOST_CHECK(propagated_cds_woprot2->GetId().GetLocal().GetId() == ++feat_id);
1774 
1775     CRef<CSeq_feat> propagated_cds_wprot2 = propagator2.Propagate(*cds_withprot);
1776     BOOST_CHECK_EQUAL(listener.Count(), 0);
1777     BOOST_CHECK(!propagated_cds_wprot2->IsSetProduct());
1778     BOOST_CHECK(propagated_cds_wprot2->IsSetId());
1779     BOOST_CHECK(propagated_cds_wprot2->GetId().GetLocal().GetId() == ++feat_id);
1780 
1781     CRef<CSeq_feat> propagated_prot2 = propagator2.ConstructProteinFeatureForPropagatedCodingRegion(*cds_withprot, *propagated_cds_wprot2);
1782     BOOST_CHECK(propagated_prot2->IsSetId());
1783     BOOST_CHECK(propagated_prot2->GetId().GetLocal().GetId() == ++feat_id);
1784     listener.Clear();
1785 }
1786 
1787 
BOOST_AUTO_TEST_CASE(Test_PropagateAllFeatures_UsingFeatureIds)1788 BOOST_AUTO_TEST_CASE(Test_PropagateAllFeatures_UsingFeatureIds)
1789 {
1790     int feat_id = 0;
1791     CSeq_entry_Handle seh = GetGoodSeqEntryWithFeatureIds(feat_id);
1792     CScope& scope = seh.GetScope();
1793 
1794     BOOST_CHECK(feat_id == 5);
1795     BOOST_TEST_MESSAGE("A set containing " + NStr::IntToString(feat_id) + " five features");
1796     CFeat_CI gene_it(seh, SAnnotSelector(CSeqFeatData::e_Gene));
1797     CConstRef<CSeq_feat> gene = gene_it->GetOriginalSeq_feat();
1798     CFeat_CI mrna_it(seh, SAnnotSelector(CSeqFeatData::eSubtype_mRNA));
1799     CConstRef<CSeq_feat> mrna = mrna_it->GetOriginalSeq_feat();
1800     CFeat_CI cds_it(seh, SAnnotSelector(CSeqFeatData::e_Cdregion));
1801     CConstRef<CSeq_feat> cds_withoutprot;
1802     CConstRef<CSeq_feat> cds_withprot;
1803     CConstRef<CSeq_feat> protein;
1804     for (; cds_it; ++cds_it) {
1805         if (cds_it->IsSetProduct()) {
1806             cds_withprot = cds_it->GetOriginalSeq_feat();
1807             CFeat_CI prot_it(scope.GetBioseqHandle(cds_it->GetProduct()));
1808             protein = prot_it->GetOriginalSeq_feat();
1809         }
1810         else {
1811             cds_withoutprot = cds_it->GetOriginalSeq_feat();
1812         }
1813     }
1814 
1815     BOOST_CHECK(!gene.IsNull());
1816     BOOST_CHECK(!mrna.IsNull());
1817     BOOST_CHECK(!cds_withoutprot.IsNull());
1818     BOOST_CHECK(!cds_withprot.IsNull());
1819     BOOST_CHECK(!protein.IsNull());
1820 
1821     CAlign_CI align_it(seh);
1822     CConstRef<CSeq_align> align(&*align_it);
1823     BOOST_CHECK(!align.IsNull());
1824 
1825     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1826     BOOST_CHECK(maxFeatId == feat_id);
1827 
1828     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1829     CBioseq_Handle src_bseq = *b_iter;
1830     CBioseq_Handle target_bseq = *(++b_iter);
1831 
1832 
1833     CMessageListener_Basic listener;
1834     edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1835     vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateAll();
1836     BOOST_CHECK_EQUAL(listener.Count(), 0);
1837 
1838     BOOST_CHECK(propagated_feats.size() == feat_id - 1); // it's 'feat_id-1' because the protein is not propagated
1839     for (auto& it : propagated_feats) {
1840         BOOST_CHECK(it->IsSetId());
1841         BOOST_CHECK(it->GetId().GetLocal().GetId() == (++feat_id));
1842     }
1843     listener.Clear();
1844 }
1845 
CreateXRefLink(CSeq_feat & from_feat,CSeq_feat & to_feat)1846 void CreateXRefLink(CSeq_feat& from_feat, CSeq_feat& to_feat)
1847 {
1848     CRef<CSeqFeatXref> xref(new CSeqFeatXref);
1849     xref->SetId(to_feat.SetId());
1850     from_feat.SetXref().push_back(xref);
1851 }
1852 
BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs)1853 BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs)
1854 {
1855     size_t front_insert = 5;
1856     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1857     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1858 
1859     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1860     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1861 
1862     CRef<CSeq_loc> main_loc(new CSeq_loc());
1863     main_loc->SetInt().SetFrom(0);
1864     main_loc->SetInt().SetTo(15);
1865     main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1866 
1867     CRef<CSeq_loc> subloc(new CSeq_loc());
1868     subloc->SetInt().SetFrom(3);
1869     subloc->SetInt().SetTo(5);
1870     subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1871 
1872     int feat_id = 0;
1873     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1874     gene->SetData().SetGene().SetLocus("gene locus");
1875     gene->SetId().SetLocal().SetId(++feat_id);
1876     gene->SetLocation().Assign(*main_loc);
1877 
1878     CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1879     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1880     mrna->SetId().SetLocal().SetId(++feat_id);
1881     mrna->SetLocation().Assign(*main_loc);
1882 
1883     CreateXRefLink(*mrna, *gene);
1884 
1885     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1886     CRef<CScope> scope(new CScope(*object_manager));
1887     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1888 
1889     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1890 
1891     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1892     CBioseq_Handle src_bseq = *b_iter;
1893     CBioseq_Handle target_bseq = *(++b_iter);
1894 
1895     BOOST_TEST_MESSAGE("When both mrna and gene are propagated");
1896     CMessageListener_Basic listener;
1897     edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1898     vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ gene, mrna });
1899     BOOST_CHECK_EQUAL(listener.Count(), 0);
1900 
1901     auto prop_gene = propagated_feats.front();
1902     BOOST_CHECK(prop_gene->IsSetId());
1903     BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
1904     BOOST_CHECK(!prop_gene->IsSetXref());
1905 
1906     auto prop_mrna = propagated_feats.back();
1907     BOOST_CHECK(prop_mrna->IsSetId());
1908     BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
1909     BOOST_TEST_MESSAGE("the Xref is also propagated");
1910     BOOST_CHECK(prop_mrna->IsSetXref());
1911     CSeqFeatXref xref;
1912     xref.SetId(prop_gene->SetId());
1913     BOOST_CHECK(prop_mrna->HasSeqFeatXref(xref.GetId()));
1914     listener.Clear();
1915 }
1916 
BOOST_AUTO_TEST_CASE(Test_Propagate1FeatureWithXrefs)1917 BOOST_AUTO_TEST_CASE(Test_Propagate1FeatureWithXrefs)
1918 {
1919     size_t front_insert = 5;
1920     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1921     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1922 
1923     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1924     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1925 
1926     CRef<CSeq_loc> main_loc(new CSeq_loc());
1927     main_loc->SetInt().SetFrom(0);
1928     main_loc->SetInt().SetTo(15);
1929     main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1930 
1931     CRef<CSeq_loc> subloc(new CSeq_loc());
1932     subloc->SetInt().SetFrom(3);
1933     subloc->SetInt().SetTo(5);
1934     subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1935 
1936     int feat_id = 0;
1937     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1938     gene->SetData().SetGene().SetLocus("gene locus");
1939     gene->SetId().SetLocal().SetId(++feat_id);
1940     gene->SetLocation().Assign(*main_loc);
1941 
1942     CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1943     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
1944     mrna->SetId().SetLocal().SetId(++feat_id);
1945     mrna->SetLocation().Assign(*main_loc);
1946 
1947     CreateXRefLink(*mrna, *gene);
1948 
1949     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1950     CRef<CScope> scope(new CScope(*object_manager));
1951     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1952 
1953     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
1954 
1955     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
1956     CBioseq_Handle src_bseq = *b_iter;
1957     CBioseq_Handle target_bseq = *(++b_iter);
1958 
1959     BOOST_TEST_MESSAGE("When the mrna is propagated alone");
1960     CMessageListener_Basic listener;
1961     edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
1962     vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ mrna });
1963     BOOST_CHECK_EQUAL(listener.Count(), 0);
1964 
1965     auto prop_mrna = propagated_feats.front();
1966     BOOST_CHECK(prop_mrna->IsSetId());
1967     BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
1968     BOOST_TEST_MESSAGE("the Xref is missing");
1969     BOOST_CHECK(!prop_mrna->IsSetXref());
1970     listener.Clear();
1971 }
1972 
BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs_RevOrder)1973 BOOST_AUTO_TEST_CASE(Test_Propagate2FeaturesWithXrefs_RevOrder)
1974 {
1975     size_t front_insert = 5;
1976     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
1977     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
1978 
1979     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
1980     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
1981 
1982     CRef<CSeq_loc> main_loc(new CSeq_loc());
1983     main_loc->SetInt().SetFrom(0);
1984     main_loc->SetInt().SetTo(15);
1985     main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1986 
1987     CRef<CSeq_loc> subloc(new CSeq_loc());
1988     subloc->SetInt().SetFrom(3);
1989     subloc->SetInt().SetTo(5);
1990     subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
1991 
1992     int feat_id = 0;
1993     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
1994     gene->SetData().SetGene().SetLocus("gene locus");
1995     gene->SetId().SetLocal().SetId(++feat_id);
1996     gene->SetLocation().Assign(*main_loc);
1997 
1998     CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
1999     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
2000     mrna->SetId().SetLocal().SetId(++feat_id);
2001     mrna->SetLocation().Assign(*main_loc);
2002 
2003     CreateXRefLink(*gene, *mrna);
2004 
2005     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2006     CRef<CScope> scope(new CScope(*object_manager));
2007     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2008 
2009     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2010 
2011     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2012     CBioseq_Handle src_bseq = *b_iter;
2013     CBioseq_Handle target_bseq = *(++b_iter);
2014 
2015     CMessageListener_Basic listener;
2016     edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
2017     vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList({ gene, mrna });
2018     BOOST_CHECK_EQUAL(listener.Count(), 0);
2019 
2020     auto prop_gene = propagated_feats.front();
2021     BOOST_CHECK_EQUAL(listener.Count(), 0);
2022     BOOST_CHECK(prop_gene->IsSetId());
2023     BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
2024     BOOST_CHECK(prop_gene->IsSetXref());
2025 
2026     auto prop_mrna = propagated_feats.back();
2027     CSeqFeatXref xref;
2028     xref.SetId(prop_mrna->SetId());
2029     BOOST_CHECK(prop_gene->HasSeqFeatXref(xref.GetId()));
2030 
2031     BOOST_CHECK(prop_mrna->IsSetId());
2032     BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
2033     BOOST_CHECK(!prop_mrna->IsSetXref());
2034     listener.Clear();
2035 }
2036 
BOOST_AUTO_TEST_CASE(Test_PropagateFeaturesWithXrefsWithCDS)2037 BOOST_AUTO_TEST_CASE(Test_PropagateFeaturesWithXrefsWithCDS)
2038 {
2039     size_t front_insert = 5;
2040     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
2041     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2042 
2043     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2044     CRef<CSeq_entry> last = entry->SetSet().SetSeq_set().back();
2045 
2046     CRef<CSeq_loc> main_loc(new CSeq_loc());
2047     main_loc->SetInt().SetFrom(0);
2048     main_loc->SetInt().SetTo(15);
2049     main_loc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2050 
2051     CRef<CSeq_loc> subloc(new CSeq_loc());
2052     subloc->SetInt().SetFrom(3);
2053     subloc->SetInt().SetTo(5);
2054     subloc->SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2055 
2056     int feat_id = 0;
2057     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(first, 15);
2058     gene->SetData().SetGene().SetLocus("gene locus");
2059     gene->SetId().SetLocal().SetId(++feat_id);
2060     gene->SetLocation().Assign(*main_loc);
2061 
2062     CRef<CSeq_feat> mrna = unit_test_util::AddMiscFeature(first, 15);
2063     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
2064     mrna->SetId().SetLocal().SetId(++feat_id);
2065     mrna->SetLocation().Assign(*main_loc);
2066 
2067     CreateXRefLink(*mrna, *gene);
2068 
2069     CRef<CSeq_feat> cds_withoutprot = unit_test_util::AddMiscFeature(first, 15);
2070     cds_withoutprot->SetData().SetCdregion();
2071     cds_withoutprot->SetId().SetLocal().SetId(++feat_id);
2072     cds_withoutprot->SetLocation().SetInt().SetFrom(10);
2073     cds_withoutprot->SetLocation().SetInt().SetTo(25);
2074     cds_withoutprot->SetLocation().SetInt().SetId().Assign(*(first->GetSeq().GetId().front()));
2075 
2076     CRef<CSeq_feat> cds_withprot = unit_test_util::MakeMiscFeature(unit_test_util::IdFromEntry(first), 15);
2077     cds_withprot->SetComment("CDS with product");
2078     cds_withprot->SetData().SetCdregion();
2079     cds_withprot->SetId().SetLocal().SetId(++feat_id);
2080     cds_withprot->SetLocation().Assign(*main_loc);
2081 
2082     CreateXRefLink(*cds_withprot, *gene);
2083     CreateXRefLink(*mrna, *cds_withprot);
2084     CreateXRefLink(*cds_withprot, *mrna);
2085 
2086     // constructing the protein sequence
2087     CRef<CSeq_entry> prot_entry(new CSeq_entry());
2088     prot_entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
2089     prot_entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
2090     prot_entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("-WPKL");
2091     prot_entry->SetSeq().SetInst().SetLength(5);
2092 
2093     const string prot_id = "good1_1";
2094     CRef<CSeq_id> id(new CSeq_id());
2095     id->SetLocal().SetStr(prot_id);
2096     prot_entry->SetSeq().SetId().push_back(id);
2097 
2098     CRef<CSeqdesc> mdesc(new CSeqdesc());
2099     mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
2100     prot_entry->SetSeq().SetDescr().Set().push_back(mdesc);
2101 
2102     CRef<CSeq_feat> prot_feat(new CSeq_feat());
2103     prot_feat->SetData().SetProt().SetName().push_back("hypothetical protein");
2104     prot_feat->SetLocation().SetInt().SetId().Assign(*(prot_entry->GetSeq().GetId().front()));
2105     prot_feat->SetLocation().SetInt().SetFrom(0);
2106     prot_feat->SetLocation().SetInt().SetTo(prot_entry->GetSeq().GetInst().GetLength() - 1);
2107     prot_feat->SetId().SetLocal().SetId(++feat_id);
2108     unit_test_util::AddFeat(prot_feat, prot_entry);
2109 
2110     cds_withprot->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
2111 
2112     CRef<CBioseq_set> set(new CBioseq_set());
2113     set->SetClass(CBioseq_set::eClass_nuc_prot);
2114     set->SetSeq_set().push_back(first);
2115     set->SetSeq_set().push_back(prot_entry);
2116     CRef<CSeq_entry> set_entry(new CSeq_entry());
2117     set_entry->SetSet(*set);
2118 
2119     unit_test_util::AddFeat(cds_withprot, set_entry);
2120 
2121     auto it = entry->SetSet().SetSeq_set().begin();
2122     it = entry->SetSet().SetSeq_set().erase(it);
2123     entry->SetSet().SetSeq_set().insert(it, set_entry);
2124 
2125     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2126     CRef<CScope> scope(new CScope(*object_manager));
2127     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2128 
2129     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2130 
2131     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2132     CBioseq_Handle src_bseq = *b_iter;
2133     CBioseq_Handle target_bseq = *(++b_iter);
2134 
2135     CMessageListener_Basic listener;
2136     edit::CFeaturePropagator propagator(src_bseq, target_bseq, *align, true, true, true, true, &listener, &maxFeatId);
2137     vector<CConstRef<CSeq_feat>> feat_list{ gene, mrna, cds_withoutprot, cds_withprot };
2138     vector<CRef<CSeq_feat>> propagated_feats = propagator.PropagateFeatureList(feat_list);
2139     BOOST_CHECK_EQUAL(listener.Count(), 0);
2140 
2141     BOOST_CHECK(propagated_feats.size() == feat_id );
2142 
2143     auto feat_it = propagated_feats.begin();
2144     auto prop_gene = *feat_it;
2145     BOOST_CHECK(prop_gene->IsSetId());
2146     BOOST_CHECK(prop_gene->GetId().GetLocal().GetId() == (++feat_id));
2147     BOOST_CHECK(!prop_gene->IsSetXref());
2148 
2149     ++feat_it;
2150     auto prop_mrna = *feat_it;
2151     BOOST_CHECK(prop_mrna->IsSetId());
2152     BOOST_CHECK(prop_mrna->GetId().GetLocal().GetId() == (++feat_id));
2153     BOOST_CHECK(prop_mrna->IsSetXref());
2154     BOOST_CHECK(prop_mrna->GetXref().size() == 2);
2155 
2156     ++feat_it;
2157     auto prop_cds_withoutprot = *feat_it;
2158     BOOST_CHECK(prop_cds_withoutprot->IsSetId());
2159     BOOST_CHECK(prop_cds_withoutprot->GetId().GetLocal().GetId() == (++feat_id));
2160     BOOST_CHECK(!prop_cds_withoutprot->IsSetXref());
2161 
2162     ++feat_it;
2163     auto prop_cds = *feat_it;
2164     BOOST_CHECK(prop_cds->IsSetId());
2165     BOOST_CHECK(prop_cds->GetId().GetLocal().GetId() == (++feat_id));
2166     BOOST_CHECK(prop_cds->IsSetXref());
2167 
2168     CSeqFeatXref mrna_xref1;
2169     mrna_xref1.SetId(prop_gene->SetId());
2170     BOOST_CHECK(prop_mrna->HasSeqFeatXref(mrna_xref1.GetId()));
2171     mrna_xref1.SetId(prop_cds->SetId());
2172     BOOST_CHECK(prop_mrna->HasSeqFeatXref(mrna_xref1.GetId()));
2173 
2174     CSeqFeatXref cds_xref;
2175     cds_xref.SetId(prop_gene->SetId());
2176     BOOST_CHECK(prop_cds->HasSeqFeatXref(cds_xref.GetId()));
2177     cds_xref.SetId(prop_mrna->SetId());
2178     BOOST_CHECK(prop_cds->HasSeqFeatXref(cds_xref.GetId()));
2179 
2180     ++feat_it;
2181     auto prop_protein = *feat_it;
2182     BOOST_CHECK(prop_protein->IsSetId());
2183     BOOST_CHECK(prop_protein->GetId().GetLocal().GetId() == (++feat_id));
2184     listener.Clear();
2185 }
2186 
2187 
BuildAlignmentWithInternalGap()2188 CRef<CSeq_entry> BuildAlignmentWithInternalGap()
2189 {
2190     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSet();
2191 
2192     CRef<objects::CSeq_entry> seq4 = unit_test_util::BuildGoodSeq();
2193     unit_test_util::ChangeId(seq4, "4");
2194     entry->SetSet().SetSeq_set().push_back(seq4);
2195 
2196     CRef<objects::CSeq_align> align(new CSeq_align());
2197     align->SetType(objects::CSeq_align::eType_global);
2198     align->SetDim(entry->GetSet().GetSeq_set().size());
2199 
2200     // assign IDs
2201     for (auto& s : entry->SetSet().SetSeq_set()) {
2202         CRef<CSeq_id> id(new CSeq_id());
2203         id->Assign(*(s->GetSeq().GetId().front()));
2204         align->SetSegs().SetDenseg().SetIds().push_back(id);
2205     }
2206 
2207     auto s = entry->SetSet().SetSeq_set().begin();
2208     auto first_seq = (*s)->GetSeq().GetInst().GetSeq_data().GetIupacna().Get(); // original
2209     s++;
2210     // second sequence: remove beginning
2211     (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(20, 40));
2212     (*s)->SetSeq().SetInst().SetLength(40);
2213     s++;
2214     // third sequence: remove part of the middle
2215     (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(0, 20) + first_seq.substr(40, 20));
2216     (*s)->SetSeq().SetInst().SetLength(40);
2217     s++;
2218     // fourth sequence: remove end
2219     (*s)->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(first_seq.substr(0, 40));
2220     (*s)->SetSeq().SetInst().SetLength(40);
2221 
2222     // now make first sequence longer than alignment
2223     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2224     first->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AAAAATTTTTGGGGGCCCCC" + first_seq + "AAAAATTTTTGGGGGCCCCC");
2225     first->SetSeq().SetInst().SetLength(100);
2226 
2227 
2228     auto& denseg = align->SetSegs().SetDenseg();
2229     denseg.SetNumseg(3);
2230     denseg.SetLens().push_back(20);
2231     denseg.SetLens().push_back(20);
2232     denseg.SetLens().push_back(20);
2233     denseg.SetDim(entry->GetSet().GetSeq_set().size());
2234     // first segment - second sequence missing
2235     denseg.SetStarts().push_back(20);
2236     denseg.SetStarts().push_back(-1);
2237     denseg.SetStarts().push_back(0);
2238     denseg.SetStarts().push_back(0);
2239     // second segment - third sequence is gap
2240     denseg.SetStarts().push_back(40);
2241     denseg.SetStarts().push_back(0);
2242     denseg.SetStarts().push_back(-1);
2243     denseg.SetStarts().push_back(20);
2244     // third segment - fourth sequence is gap
2245     denseg.SetStarts().push_back(60);
2246     denseg.SetStarts().push_back(20);
2247     denseg.SetStarts().push_back(20);
2248     denseg.SetStarts().push_back(-1);
2249 
2250     CRef<CSeq_annot> annot(new CSeq_annot());
2251     annot->SetData().SetAlign().push_back(align);
2252     entry->SetSet().SetAnnot().push_back(annot);
2253     return entry;
2254 }
2255 
2256 
BOOST_AUTO_TEST_CASE(Test_DoNotPropagateToGap_RW_887)2257 BOOST_AUTO_TEST_CASE(Test_DoNotPropagateToGap_RW_887)
2258 {
2259     CRef<CSeq_entry> entry = BuildAlignmentWithInternalGap();
2260     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2261     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2262 
2263     // before alignment
2264     CRef<CSeq_feat> gene1 = unit_test_util::AddMiscFeature(first);
2265     gene1->SetData().SetGene().SetLocus("gene locus");
2266     gene1->SetLocation().SetInt().SetFrom(0);
2267     gene1->SetLocation().SetInt().SetTo(19);
2268 
2269     // first gap
2270     CRef<CSeq_feat> gene2 = unit_test_util::AddMiscFeature(first);
2271     gene2->SetData().SetGene().SetLocus("gene locus");
2272     gene2->SetLocation().SetInt().SetFrom(20);
2273     gene2->SetLocation().SetInt().SetTo(39);
2274 
2275     // second gap
2276     CRef<CSeq_feat> gene3 = unit_test_util::AddMiscFeature(first);
2277     gene3->SetData().SetGene().SetLocus("gene locus");
2278     gene3->SetLocation().SetInt().SetFrom(40);
2279     gene3->SetLocation().SetInt().SetTo(59);
2280 
2281     // third gap
2282     CRef<CSeq_feat> gene4 = unit_test_util::AddMiscFeature(first);
2283     gene4->SetData().SetGene().SetLocus("gene locus");
2284     gene4->SetLocation().SetInt().SetFrom(60);
2285     gene4->SetLocation().SetInt().SetTo(79);
2286 
2287     // after alignment
2288     CRef<CSeq_feat> gene5 = unit_test_util::AddMiscFeature(first);
2289     gene5->SetData().SetGene().SetLocus("gene locus");
2290     gene5->SetLocation().SetInt().SetFrom(80);
2291     gene5->SetLocation().SetInt().SetTo(99);
2292 
2293     vector<CConstRef<CSeq_feat>> feat_list{ gene1, gene2, gene3, gene4, gene5 };
2294 
2295     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2296     CRef<CScope> scope(new CScope(*object_manager));
2297     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2298 
2299     CObject_id::TId maxFeatId = s_FindHighestFeatId(seh);
2300 
2301 
2302     CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
2303     CBioseq_Handle src_bseq = *b_iter;
2304 
2305     ++b_iter;
2306 
2307     CMessageListener_Basic listener;
2308     edit::CFeaturePropagator propagator_to_2(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2309     vector<CRef<CSeq_feat>> propagated_feats = propagator_to_2.PropagateFeatureList(feat_list);
2310     BOOST_CHECK_EQUAL(listener.Count(), 3);
2311     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good2");
2312     BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:21-40 to lcl|good2");
2313     BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good2");
2314     listener.Clear();
2315 
2316     ++b_iter;
2317     edit::CFeaturePropagator propagator_to_3(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2318     propagated_feats = propagator_to_3.PropagateFeatureList(feat_list);
2319     BOOST_CHECK_EQUAL(listener.Count(), 3);
2320     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good3");
2321     BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:41-60 to lcl|good3");
2322     BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good3");
2323     listener.Clear();
2324 
2325     ++b_iter;
2326     edit::CFeaturePropagator propagator_to_4(src_bseq, *b_iter, *align, true, true, true, true, &listener);
2327     propagated_feats = propagator_to_4.PropagateFeatureList(feat_list);
2328     BOOST_CHECK_EQUAL(listener.Count(), 3);
2329     BOOST_CHECK_EQUAL(listener.GetMessage(0).GetText(), "Unable to propagate location of feature lcl|good1:1-20 to lcl|good4");
2330     BOOST_CHECK_EQUAL(listener.GetMessage(1).GetText(), "Unable to propagate location of feature lcl|good1:61-80 to lcl|good4");
2331     BOOST_CHECK_EQUAL(listener.GetMessage(2).GetText(), "Unable to propagate location of feature lcl|good1:81-100 to lcl|good4");
2332     listener.Clear();
2333 
2334 }
2335 
2336 
2337 #if 0
2338 // checked in by mistake
2339 BOOST_AUTO_TEST_CASE(Test_MergeIntervals)
2340 {
2341     size_t front_insert = 5;
2342     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSetWithAlign(front_insert);
2343     CRef<CSeq_align> align = entry->SetSet().SetAnnot().front()->SetData().SetAlign().front();
2344 
2345     CRef<CSeq_entry> first = entry->SetSet().SetSeq_set().front();
2346 
2347     CRef<CSeq_feat> misc = unit_test_util::AddMiscFeature(first);
2348     CRef<CSeq_loc> l1(new CSeq_loc());
2349     l1->Assign(misc->GetLocation());
2350     CRef<CSeq_loc> l2(new CSeq_loc());
2351     l2->Assign(misc->GetLocation());
2352     l2->SetInt().SetFrom(l1->GetStop(eExtreme_Biological) + 1);
2353     l2->SetInt().SetTo(l2->GetInt().GetFrom() + 15);
2354     misc->SetLocation().SetMix().Set().push_back(l1);
2355     misc->SetLocation().SetMix().Set().push_back(l2);
2356 
2357     vector<CRef<CSeq_loc> > expected_loc;
2358 
2359     CRef<CSeq_loc> loc1(new CSeq_loc());
2360     loc1->SetInt().SetFrom(front_insert);
2361     loc1->SetInt().SetTo(30 + front_insert);
2362     loc1->SetInt().SetId().SetLocal().SetStr("good2");
2363     loc1->SetPartialStart(false, eExtreme_Biological);
2364     loc1->SetPartialStop(false, eExtreme_Biological);
2365     expected_loc.push_back(loc1);
2366 
2367     CRef<CSeq_loc> loc2(new CSeq_loc());
2368     loc2->SetInt().SetFrom(front_insert * 2);
2369     loc2->SetInt().SetTo(30 + front_insert * 2);
2370     loc2->SetInt().SetId().SetLocal().SetStr("good3");
2371     loc2->SetPartialStart(false, eExtreme_Biological);
2372     loc2->SetPartialStop(false, eExtreme_Biological);
2373     expected_loc.push_back(loc2);
2374 
2375     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2376 
2377     CRef<CScope> scope(new CScope(*object_manager));
2378     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
2379     CMessageListener_Basic listener;
2380 
2381     CBioseq_CI b(seh);
2382     CBioseq_Handle src = *b;
2383     ++b;
2384     size_t offset = 0;
2385     while (b) {
2386         edit::CFeaturePropagator propagator(src, *b, *align, false, false, true, true, &listener);
2387 
2388         CRef<CSeq_feat> new_feat = propagator.Propagate(*misc);
2389         CheckPropagatedLocation(*(expected_loc[offset]), new_feat->GetLocation());
2390         BOOST_CHECK_EQUAL(listener.Count(), 0);
2391         listener.Clear();
2392         offset++;
2393         ++b;
2394     }
2395 
2396 }
2397 #endif
2398 
2399 
2400 END_SCOPE(objects)
2401 END_NCBI_SCOPE
2402 
2403