1 /* $Id: unit_test_alnmgr.cpp 601341 2020-02-05 20:27:33Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko
27 *
28 * File Description:
29 * Alignment Manager unit test.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/test_boost.hpp>
38
39 #include <serial/objistr.hpp>
40 #include <objects/seqloc/Seq_id.hpp>
41 #include <objects/general/Object_id.hpp>
42
43 #include <objtools/alnmgr/aln_user_options.hpp>
44 #include <objtools/alnmgr/aln_asn_reader.hpp>
45 #include <objtools/alnmgr/aln_container.hpp>
46 #include <objtools/alnmgr/aln_tests.hpp>
47 #include <objtools/alnmgr/aln_stats.hpp>
48 #include <objtools/alnmgr/pairwise_aln.hpp>
49 #include <objtools/alnmgr/aln_serial.hpp>
50 #include <objtools/alnmgr/sparse_aln.hpp>
51 #include <objtools/alnmgr/aln_converters.hpp>
52 #include <objtools/alnmgr/aln_builders.hpp>
53 #include <objtools/alnmgr/sparse_ci.hpp>
54 #include <objtools/alnmgr/aln_generators.hpp>
55
56 #include <objtools/data_loaders/genbank/gbloader.hpp>
57 #include <objmgr/object_manager.hpp>
58 #include <objmgr/scope.hpp>
59
60 #include <common/test_assert.h> /* This header must go last */
61
62 USING_NCBI_SCOPE;
63 USING_SCOPE(objects);
64
65
66
NCBITEST_INIT_CMDLINE(descrs)67 NCBITEST_INIT_CMDLINE(descrs)
68 {
69 descrs->AddFlag("print_exp", "Print expected values instead of testing");
70 descrs->AddFlag("verbose", "Print detailed test progress");
71 }
72
73
DumpExpected(void)74 bool DumpExpected(void)
75 {
76 return CNcbiApplication::Instance()->GetArgs()["print_exp"];
77 }
78
79
Verbose(void)80 bool Verbose(void)
81 {
82 return CNcbiApplication::Instance()->GetArgs()["verbose"];
83 }
84
85
GetScope(void)86 CScope& GetScope(void)
87 {
88 static CRef<CScope> s_Scope;
89 if ( !s_Scope ) {
90 CRef<CObjectManager> objmgr = CObjectManager::GetInstance();
91 CGBDataLoader::RegisterInObjectManager(*objmgr);
92
93 s_Scope.Reset(new CScope(*objmgr));
94 s_Scope->AddDefaults();
95 }
96 return *s_Scope;
97 }
98
99
100 // Helper function for loading alignments. Returns number of alignments loaded
101 // into the container. The expected format is:
102 // <int-number-of-alignments>
103 // <Seq-align>+
LoadAligns(CNcbiIstream & in,CAlnContainer & aligns,size_t limit=0)104 size_t LoadAligns(CNcbiIstream& in,
105 CAlnContainer& aligns,
106 size_t limit = 0)
107 {
108 size_t num_aligns = 0;
109 while ( !in.eof() ) {
110 try {
111 CRef<CSeq_align> align(new CSeq_align);
112 in >> MSerial_AsnText >> *align;
113 align->Validate(true);
114 aligns.insert(*align);
115 num_aligns++;
116 if (limit > 0 && num_aligns >= limit) break;
117 }
118 catch (CIOException e) {
119 break;
120 }
121 }
122 return num_aligns;
123 }
124
125
126 // The aln_id_map argument is required because TScopeAlnStat keeps const
127 // reference to the id-map. So, the map needs to be returned to the caller.
InitAlnStats(const CAlnContainer & aligns,auto_ptr<TScopeAlnIdMap> & aln_id_map)128 CRef<TScopeAlnStats> InitAlnStats(const CAlnContainer& aligns,
129 auto_ptr<TScopeAlnIdMap>& aln_id_map)
130 {
131 TScopeAlnSeqIdConverter id_conv(&GetScope());
132 TScopeIdExtract id_extract(id_conv);
133 aln_id_map.reset(new TScopeAlnIdMap(id_extract, aligns.size()));
134 ITERATE(CAlnContainer, aln_it, aligns) {
135 aln_id_map->push_back(**aln_it);
136 }
137 CRef<TScopeAlnStats> aln_stats(new TScopeAlnStats(*aln_id_map));
138 return aln_stats;
139 }
140
141
CheckPairwiseAln(CNcbiIstream & in_exp,const CPairwiseAln & pw,size_t aln_idx,int anchor,int row)142 void CheckPairwiseAln(CNcbiIstream& in_exp,
143 const CPairwiseAln& pw,
144 size_t aln_idx,
145 int anchor,
146 int row)
147 {
148 // For each alignment/anchor/row:
149 // <int-align-index> <int-anchor_index> <int-row-index> <int-pairwise-flags>
150 // <int-align-first-from> <int-align-first-to> <int-base-width> <Seq-id>
151 // <int-align-second-from> <int-align-second-to> <int-base-width> <Seq-id>
152 // For each CPairwiseAln element:
153 // <int-first-from> <int-second-from> <int-len> <int-is-direct>
154 // <int-insertions-count>
155 // For each insertion:
156 // <int-first-from> <int-second-from> <int-len> <int-is-direct>
157 // For each iterator:
158 // <int-first-from> <int-first-to> <int-second-from> <int-second-to> ...
159 // ... <int-direct> <int-first-direct> <int-aligned>
160
161 TSignedSeqPos first_from, first_to, second_from, second_to, len;
162 int flags;
163
164 if ( DumpExpected() ) {
165 cout << aln_idx << " " << anchor << " " << row << " " << pw.GetFlags() << endl;
166 cout << " " << pw.GetFirstFrom() << " "
167 << pw.GetFirstTo() << " "
168 << pw.GetFirstBaseWidth() << " "
169 << MSerial_AsnText << pw.GetFirstId()->GetSeqId();
170 cout << " " << pw.GetSecondPosByFirstPos(pw.GetFirstFrom()) << " "
171 << pw.GetSecondPosByFirstPos(pw.GetFirstTo()) << " "
172 << pw.GetSecondBaseWidth() << " "
173 << MSerial_AsnText << pw.GetSecondId()->GetSeqId();
174 }
175 else {
176 if ( Verbose() ) {
177 cout << " aln=" << aln_idx << ", anchor=" << anchor << ", row=" << row << endl;
178 }
179 size_t expected_aln_idx;
180 int expected_row, expected_anchor;
181 int first_width, second_width;
182 CSeq_id first_id, second_id;
183 in_exp >> expected_aln_idx >> expected_anchor >> expected_row >> flags;
184 in_exp >> first_from >> first_to >> first_width;
185 in_exp >> MSerial_AsnText >> first_id;
186 in_exp >> second_from >> second_to >> second_width;
187 in_exp >> MSerial_AsnText >> second_id;
188 BOOST_CHECK(in_exp.good());
189 BOOST_CHECK(aln_idx == expected_aln_idx);
190 BOOST_CHECK(anchor == expected_anchor);
191 BOOST_CHECK(row == expected_row);
192 BOOST_CHECK(pw.GetFlags() == flags);
193 BOOST_CHECK(pw.GetFirstId()->GetSeqId().Equals(first_id));
194 BOOST_CHECK(pw.GetSecondId()->GetSeqId().Equals(second_id));
195 BOOST_CHECK(pw.GetFirstBaseWidth() == first_width);
196 BOOST_CHECK(pw.GetSecondBaseWidth() == second_width);
197 BOOST_CHECK(pw.GetFirstFrom() == first_from);
198 BOOST_CHECK(pw.GetFirstTo() == first_to);
199 BOOST_CHECK(pw.GetSecondPosByFirstPos(pw.GetFirstFrom()) == second_from);
200 BOOST_CHECK(pw.GetSecondPosByFirstPos(pw.GetFirstTo()) == second_to);
201 }
202
203 ITERATE(CPairwiseAln, rg_it, pw) {
204 CAlignRange<TSignedSeqPos> rg = *rg_it;
205
206 if ( DumpExpected() ) {
207 cout << " " << rg_it->GetFirstFrom() << " "
208 << rg_it->GetSecondFrom() << " "
209 << rg_it->GetLength() << " "
210 << (rg.IsDirect() ? 1 : 0) << endl;
211 }
212 else {
213 in_exp >> first_from >> second_from >> len >> flags;
214 BOOST_CHECK(in_exp.good());
215 BOOST_CHECK(rg_it->GetFirstFrom() == first_from);
216 BOOST_CHECK(rg_it->GetSecondFrom() == second_from);
217 BOOST_CHECK(rg_it->GetLength() == len);
218 BOOST_CHECK(rg_it->IsDirect() == (flags != 0));
219 }
220 }
221
222 if ( DumpExpected() ) {
223 cout << " " << pw.GetInsertions().size() << endl;
224 }
225 else {
226 size_t ins_count;
227 in_exp >> ins_count;
228 BOOST_CHECK(in_exp.good());
229 BOOST_CHECK(pw.GetInsertions().size() == ins_count);
230 }
231
232 if (!pw.GetInsertions().empty()) {
233 ITERATE(CPairwiseAln::TInsertions, gap, pw.GetInsertions()) {
234 if ( DumpExpected() ) {
235 cout << " " << gap->GetFirstFrom() << " "
236 << gap->GetSecondFrom() << " "
237 << gap->GetLength() << " "
238 << (gap->IsDirect() ? 1 : 0) << endl;
239 }
240 else {
241 in_exp >> first_from >> second_from >> len >> flags;
242 BOOST_CHECK(in_exp.good());
243 BOOST_CHECK(gap->GetFirstFrom() == first_from);
244 BOOST_CHECK(gap->GetSecondFrom() == second_from);
245 BOOST_CHECK(gap->GetLength() == len);
246 BOOST_CHECK(gap->IsDirect() == (flags != 0));
247 }
248 }
249 }
250 }
251
252
CheckSparseAln(CNcbiIstream & in_exp,const CSparseAln & sparse,bool check_sequence)253 void CheckSparseAln(CNcbiIstream& in_exp,
254 const CSparseAln& sparse,
255 bool check_sequence)
256 {
257 // Format:
258 // <dim> <numrows> <anchor row>
259 // <aln-from> <aln-to>
260 // For each row:
261 // <row> <width> <seq-id>
262 // <aln-from> <aln_to> <seq-from> <seq-to> <native-from> <native-to>
263 // [<aln-sequence>]
264 // [<row-sequence>]
265
266 static const char* kGap = "<GAP>";
267 static const char* kNoData = "<NO SEQUENCE DATA>";
268
269 TSignedSeqPos expected_aln_from, expected_aln_to;
270
271 if ( DumpExpected() ) {
272 cout << sparse.GetDim() << " "
273 << sparse.GetNumRows() << " "
274 << sparse.GetAnchor() << endl;
275 cout << sparse.GetAlnRange().GetFrom() << " "
276 << sparse.GetAlnRange().GetTo() << endl;
277 }
278 else {
279 int expected_dim, expected_numrows, expected_anchor;
280 in_exp >> expected_dim >> expected_numrows >> expected_anchor;
281 in_exp >> expected_aln_from >> expected_aln_to;
282 BOOST_CHECK(in_exp.good());
283 BOOST_CHECK(sparse.GetDim() == expected_dim);
284 BOOST_CHECK(sparse.GetNumRows() == expected_numrows);
285 BOOST_CHECK(sparse.GetAnchor() == expected_anchor);
286 BOOST_CHECK(sparse.GetAlnRange().GetFrom() == expected_aln_from);
287 BOOST_CHECK(sparse.GetAlnRange().GetTo() == expected_aln_to);
288 }
289
290 for (CSparseAln::TDim row = 0; row < sparse.GetDim(); ++row) {
291 if ( DumpExpected() ) {
292 cout << row << " "
293 << sparse.GetBaseWidth(row) << " "
294 << MSerial_AsnText << sparse.GetSeqId(row);
295 }
296 else {
297 int expected_row, expected_width;
298 CSeq_id expected_id;
299 in_exp >> expected_row >> expected_width >> MSerial_AsnText >> expected_id;
300 BOOST_CHECK(in_exp.good());
301 BOOST_CHECK(row == expected_row);
302 BOOST_CHECK(sparse.GetBaseWidth(row) == expected_width);
303 BOOST_CHECK(sparse.GetSeqId(row).Equals(expected_id));
304 }
305
306 CSparseAln::TRange rg = sparse.GetSeqRange(row);
307 CSparseAln::TRange native_rg = sparse.AlnRangeToNativeSeqRange(row, rg);
308
309 TSeqPos expected_seq_from, expected_seq_to,
310 expected_native_from, expected_native_to;
311
312 if ( DumpExpected() ) {
313 cout << sparse.GetSeqAlnStart(row) << " " << sparse.GetSeqAlnStop(row) << " "
314 << rg.GetFrom() << " " << rg.GetTo() << " "
315 << native_rg.GetFrom() << " " << native_rg.GetTo() << endl;
316 }
317 else {
318 in_exp >> expected_aln_from >> expected_aln_to
319 >> expected_seq_from >> expected_seq_to
320 >> expected_native_from >> expected_native_to;
321 BOOST_CHECK(in_exp.good());
322 BOOST_CHECK(sparse.GetSeqAlnStart(row) == expected_aln_from);
323 BOOST_CHECK(sparse.GetSeqAlnStop(row) == expected_aln_to);
324 BOOST_CHECK(rg.GetFrom() == expected_seq_from);
325 BOOST_CHECK(rg.GetTo() == expected_seq_to);
326 BOOST_CHECK(native_rg.GetFrom() == expected_native_from);
327 BOOST_CHECK(native_rg.GetTo() == expected_native_to);
328 }
329
330 if ( check_sequence ) {
331 string aln_sequence, row_sequence;
332 sparse.GetAlnSeqString(row, aln_sequence, CSparseAln::TSignedRange::GetWhole());
333 sparse.GetSeqString(row, row_sequence, CSparseAln::TRange::GetWhole());
334 if ( aln_sequence.empty() ) {
335 aln_sequence = kNoData;
336 }
337 if ( row_sequence.empty() ) {
338 row_sequence = kNoData;
339 }
340
341 if ( DumpExpected() ) {
342 cout << aln_sequence << endl;
343 cout << row_sequence << endl;
344 }
345 else {
346 if ( Verbose() ) {
347 cout << " whole row=" << row << endl;
348 }
349 string expected_aln_sequence, expected_row_sequence;
350 in_exp >> ws;
351 getline(in_exp, expected_aln_sequence);
352 getline(in_exp, expected_row_sequence);
353 BOOST_CHECK(in_exp.good());
354 BOOST_CHECK(aln_sequence == expected_aln_sequence);
355 BOOST_CHECK(row_sequence == expected_row_sequence);
356 }
357 }
358
359 CSparse_CI sparse_ci(sparse, row, CSparse_CI::eAllSegments);
360 for (; sparse_ci; ++sparse_ci) {
361 const IAlnSegment& seg = *sparse_ci;
362 CSparse_CI::TSignedRange aln_rg = seg.GetAlnRange();
363 CSparse_CI::TSignedRange seq_rg = seg.GetRange();
364
365 if ( DumpExpected() ) {
366 cout << " " << seg.GetType() << " "
367 << aln_rg.GetFrom() << " " << aln_rg.GetTo() << " "
368 << seq_rg.GetFrom() << " " << seq_rg.GetTo() << endl;
369 }
370 else {
371 if ( Verbose() ) {
372 cout << " segment: row=" << row << ", range=" <<
373 aln_rg.GetFrom() << ".." << aln_rg.GetTo() << endl;
374 }
375 unsigned expected_seg_type;
376 TSignedSeqPos expected_seq_from, expected_seq_to;
377
378 in_exp >> expected_seg_type
379 >> expected_aln_from >> expected_aln_to
380 >> expected_seq_from >> expected_seq_to;
381 BOOST_CHECK(in_exp.good());
382 BOOST_CHECK(seg.GetType() == expected_seg_type);
383 BOOST_CHECK(aln_rg.GetFrom() == expected_aln_from);
384 BOOST_CHECK(aln_rg.GetTo() == expected_aln_to);
385 BOOST_CHECK(seq_rg.GetFrom() == expected_seq_from);
386 BOOST_CHECK(seq_rg.GetTo() == expected_seq_to);
387 }
388
389 if ( check_sequence ) {
390 string aln_sequence, row_sequence;
391 string expected_aln_sequence, expected_row_sequence;
392
393 if ( !aln_rg.Empty() ) {
394 sparse.GetAlnSeqString(row, aln_sequence, aln_rg);
395 if ( aln_sequence.empty() ) {
396 aln_sequence = kNoData;
397 }
398 }
399 else {
400 aln_sequence = kGap;
401 }
402 if ( !seq_rg.Empty() ) {
403 sparse.GetSeqString(row, row_sequence,
404 IAlnExplorer::TRange(seq_rg.GetFrom(), seq_rg.GetTo()));
405 if ( row_sequence.empty() ) {
406 row_sequence = kNoData;
407 }
408 }
409 else {
410 row_sequence = kGap;
411 }
412
413 if ( DumpExpected() ) {
414 cout << aln_sequence << endl;
415 cout << row_sequence << endl;
416 }
417 else {
418 in_exp >> ws;
419 getline(in_exp, expected_aln_sequence);
420 getline(in_exp, expected_row_sequence);
421 BOOST_CHECK(in_exp.good());
422 BOOST_CHECK(aln_sequence == expected_aln_sequence);
423 BOOST_CHECK(row_sequence == expected_row_sequence);
424 }
425 }
426 }
427 }
428 }
429
430
BOOST_AUTO_TEST_CASE(s_TestLoadAlign)431 BOOST_AUTO_TEST_CASE(s_TestLoadAlign)
432 {
433 cout << "Test CAlnContainer and CAlnStats... (aligns1.asn / expected01.txt)" << endl;
434
435 CNcbiIfstream in_data("data/aligns1.asn");
436 CNcbiIfstream in_exp("data/expected01.txt");
437 // File format:
438 // <int-number-of-alignments>
439 // For each seq-id:
440 // <int-base-width> <Seq-id>
441
442 CAlnContainer aligns;
443 size_t num_aligns = LoadAligns(in_data, aligns);
444
445 if ( DumpExpected() ) {
446 cout << num_aligns << endl;
447 }
448 else {
449 size_t expected_num_aligns;
450 in_exp >> expected_num_aligns;
451 BOOST_CHECK(num_aligns == expected_num_aligns);
452 }
453
454 auto_ptr<TScopeAlnIdMap> aln_id_map;
455 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
456
457 if ( !DumpExpected() ) {
458 BOOST_CHECK(aln_stats->GetAlnCount() == num_aligns);
459 BOOST_CHECK(aln_stats->CanBeAnchored());
460 }
461
462 ITERATE(TScopeAlnStats::TIdVec, id_it, aln_stats->GetIdVec()) {
463 if ( DumpExpected() ) {
464 cout << (*id_it)->GetBaseWidth() << " ";
465 cout << MSerial_AsnText << (*id_it)->GetSeqId();
466 }
467 else {
468 int width;
469 CSeq_id id;
470 in_exp >> width;
471 in_exp >> MSerial_AsnText >> id;
472 BOOST_CHECK(id.Equals((*id_it)->GetSeqId()));
473 BOOST_CHECK((*id_it)->GetBaseWidth() == width);
474 }
475 }
476 }
477
478
BOOST_AUTO_TEST_CASE(s_TestPairwiseAln)479 BOOST_AUTO_TEST_CASE(s_TestPairwiseAln)
480 {
481 cout << "Test CPairwiseAln... (aligns1.asn / expected02.txt)" << endl;
482
483 CNcbiIfstream in_data("data/aligns1.asn");
484 CNcbiIfstream in_exp("data/expected02.txt");
485
486 // File format:
487 // <int-number-of-alignments>
488 // see CheckPairwiseAln
489
490 CAlnContainer aligns;
491 size_t num_aligns = LoadAligns(in_data, aligns);
492
493 if ( DumpExpected() ) {
494 cout << num_aligns << endl;
495 }
496 else {
497 size_t expected_num_aligns;
498 in_exp >> expected_num_aligns;
499 BOOST_CHECK(num_aligns == expected_num_aligns);
500 }
501
502 auto_ptr<TScopeAlnIdMap> aln_id_map;
503 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
504
505 const TScopeAlnStats::TAlnVec& aln_vec = aln_stats->GetAlnVec();
506
507 // For each source alignment create a set of pairwise alignments.
508 for (size_t aln_idx = 0; aln_idx < aln_vec.size(); ++aln_idx) {
509 const CSeq_align& aln = *aln_vec[aln_idx];
510 // Use row 0 as anchor, get pairwise alignments with all other rows.
511 const TScopeAlnStats::TIdVec& aln_ids = aln_stats->GetSeqIdsForAln(aln_idx);
512 // Test two different anchor values.
513 for (int anchor = 0; anchor < 2; anchor++) {
514 TAlnSeqIdIRef anchor_id = aln_ids[anchor];
515 for (int row = 0; row < aln_stats->GetDimForAln(aln_idx); ++row) {
516 if (row == anchor) continue;
517 TAlnSeqIdIRef row_id = aln_ids[row];
518 CPairwiseAln pw(anchor_id, row_id, CPairwiseAln::fDefaultPolicy);
519 ConvertSeqAlignToPairwiseAln(pw, aln, anchor, row,
520 CAlnUserOptions::eBothDirections, &aln_ids);
521
522 CheckPairwiseAln(in_exp, pw, aln_idx, anchor, row);
523
524 for (CPairwise_CI seg(pw); seg; ++seg) {
525 if ( DumpExpected() ) {
526 cout << " " << seg.GetFirstRange().GetFrom() << " "
527 << seg.GetFirstRange().GetTo() << " "
528 << seg.GetSecondRange().GetFrom() << " "
529 << seg.GetSecondRange().GetTo() << " "
530 << (seg.IsDirect() ? 1 : 0) << " "
531 << (seg.IsFirstDirect() ? 1 : 0) << " "
532 << (seg.GetSegType() == CPairwise_CI::eAligned ? 1 : 0) << endl;
533 }
534 else {
535 int direct, first_direct, aligned;
536 TSignedSeqPos first_from, first_to, second_from, second_to;
537 in_exp >> first_from >> first_to >> second_from >> second_to >>
538 direct >> first_direct >> aligned;
539 BOOST_CHECK(in_exp.good());
540 BOOST_CHECK(seg.GetFirstRange().GetFrom() == first_from);
541 BOOST_CHECK(seg.GetFirstRange().GetTo() == first_to);
542 BOOST_CHECK(seg.GetSecondRange().GetFrom() == second_from);
543 BOOST_CHECK(seg.GetSecondRange().GetTo() == second_to);
544 BOOST_CHECK(seg.IsDirect() == (direct != 0));
545 BOOST_CHECK(seg.IsFirstDirect() == (first_direct != 0));
546 BOOST_CHECK((seg.GetSegType() == CPairwise_CI::eAligned) == (aligned != 0));
547 }
548 }
549 }
550 }
551 }
552
553 }
554
555
BOOST_AUTO_TEST_CASE(s_TestPairwiseAlnRanged)556 BOOST_AUTO_TEST_CASE(s_TestPairwiseAlnRanged)
557 {
558 cout << "Test ranged CPairwise_CI... (aligns1.asn / expected03.txt)" << endl;
559
560 CNcbiIfstream in_data("data/aligns1.asn");
561 CNcbiIfstream in_exp("data/expected03.txt");
562
563 // File format:
564 // <int-number-of-alignments>
565 //
566 // For each alignment/anchor/row:
567 // <int-align-index> <int-anchor_index> <int-row-index> <int-pairwise-flags>
568 // <int-align-first-from> <int-align-first-to> <int-base-width> <Seq-id>
569 // <int-align-second-from> <int-align-second-to> <int-base-width> <Seq-id>
570 // For each CPairwiseAln element:
571 // <int-first-from> <int-second-from> <int-len> <int-is-direct>
572 // <int-insertions-count>
573 // For each insertion:
574 // <int-first-from> <int-second-from> <int-len> <int-is-direct>
575 // For each iterator:
576 // <int-first-from> <int-first-to> <int-second-from> <int-second-to> ...
577 // ... <int-direct> <int-first-direct> <int-aligned>
578
579 CAlnContainer aligns;
580 size_t num_aligns = LoadAligns(in_data, aligns);
581
582 if ( DumpExpected() ) {
583 cout << num_aligns << endl;
584 }
585 else {
586 size_t expected_num_aligns;
587 in_exp >> expected_num_aligns;
588 BOOST_CHECK(num_aligns == expected_num_aligns);
589 }
590
591 auto_ptr<TScopeAlnIdMap> aln_id_map;
592 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
593
594 const TScopeAlnStats::TAlnVec& aln_vec = aln_stats->GetAlnVec();
595
596 // For each source alignment create a set of pairwise alignments.
597 for (size_t aln_idx = 0; aln_idx < aln_vec.size(); ++aln_idx) {
598 const CSeq_align& aln = *aln_vec[aln_idx];
599 // Use row 0 as anchor, get pairwise alignments with all other rows.
600 const TScopeAlnStats::TIdVec& aln_ids = aln_stats->GetSeqIdsForAln(aln_idx);
601 // Test two different anchor values.
602 for (int anchor = 0; anchor < 2; anchor++) {
603 TAlnSeqIdIRef anchor_id = aln_ids[anchor];
604 for (int row = 0; row < aln_stats->GetDimForAln(aln_idx); ++row) {
605 if (row == anchor) continue;
606 TAlnSeqIdIRef row_id = aln_ids[row];
607 CPairwiseAln pw(anchor_id, row_id, CPairwiseAln::fDefaultPolicy);
608 ConvertSeqAlignToPairwiseAln(pw, aln, anchor, row,
609 CAlnUserOptions::eBothDirections, &aln_ids);
610
611 TSignedSeqPos first_from, first_to, second_from, second_to;
612
613 if ( DumpExpected() ) {
614 cout << aln_idx << " " << anchor << " " << row << " " << pw.GetFlags() << endl;
615 cout << " " << pw.GetFirstFrom() << " "
616 << pw.GetFirstTo() << " "
617 << pw.GetFirstBaseWidth() << " "
618 << MSerial_AsnText << pw.GetFirstId()->GetSeqId();
619 cout << " " << pw.GetSecondPosByFirstPos(pw.GetFirstFrom()) << " "
620 << pw.GetSecondPosByFirstPos(pw.GetFirstTo()) << " "
621 << pw.GetSecondBaseWidth() << " "
622 << MSerial_AsnText << pw.GetSecondId()->GetSeqId();
623 }
624 else {
625 if ( Verbose() ) {
626 cout << " aln=" << aln_idx << ", anchor=" << anchor << ", row=" << row << endl;
627 }
628 size_t expected_aln_idx;
629 int expected_row, expected_anchor;
630 int first_width, second_width, flags;
631 CSeq_id first_id, second_id;
632 in_exp >> expected_aln_idx >> expected_anchor >> expected_row >> flags;
633 in_exp >> first_from >> first_to >> first_width;
634 in_exp >> MSerial_AsnText >> first_id;
635 in_exp >> second_from >> second_to >> second_width;
636 in_exp >> MSerial_AsnText >> second_id;
637 BOOST_CHECK(in_exp.good());
638 BOOST_CHECK(aln_idx == expected_aln_idx);
639 BOOST_CHECK(anchor == expected_anchor);
640 BOOST_CHECK(row == expected_row);
641 BOOST_CHECK(pw.GetFlags() == flags);
642 BOOST_CHECK(pw.GetFirstId()->GetSeqId().Equals(first_id));
643 BOOST_CHECK(pw.GetSecondId()->GetSeqId().Equals(second_id));
644 BOOST_CHECK(pw.GetFirstBaseWidth() == first_width);
645 BOOST_CHECK(pw.GetSecondBaseWidth() == second_width);
646 BOOST_CHECK(pw.GetFirstFrom() == first_from);
647 BOOST_CHECK(pw.GetFirstTo() == first_to);
648 BOOST_CHECK(pw.GetSecondPosByFirstPos(pw.GetFirstFrom()) == second_from);
649 BOOST_CHECK(pw.GetSecondPosByFirstPos(pw.GetFirstTo()) == second_to);
650 }
651
652 if (pw.size() < 2) continue;
653
654 first_from = (pw.begin()+1)->GetFirstFrom() + 1;
655 first_to = pw.rbegin()->GetFirstFrom() - 2;
656 CPairwise_CI::TSignedRange rg(first_from, first_to);
657
658 for (CPairwise_CI seg(pw, rg); seg; ++seg) {
659 if ( DumpExpected() ) {
660 cout << " " << seg.GetFirstRange().GetFrom() << " "
661 << seg.GetFirstRange().GetTo() << " "
662 << seg.GetSecondRange().GetFrom() << " "
663 << seg.GetSecondRange().GetTo() << " "
664 << (seg.IsDirect() ? 1 : 0) << " "
665 << (seg.IsFirstDirect() ? 1 : 0) << " "
666 << (seg.GetSegType() == CPairwise_CI::eAligned ? 1 : 0) << endl;
667 }
668 else {
669 int direct, first_direct, aligned;
670 in_exp >> first_from >> first_to >> second_from >> second_to >>
671 direct >> first_direct >> aligned;
672 BOOST_CHECK(in_exp.good());
673 BOOST_CHECK(seg.GetFirstRange().GetFrom() == first_from);
674 BOOST_CHECK(seg.GetFirstRange().GetTo() == first_to);
675 BOOST_CHECK(seg.GetSecondRange().GetFrom() == second_from);
676 BOOST_CHECK(seg.GetSecondRange().GetTo() == second_to);
677 BOOST_CHECK(seg.IsDirect() == (direct != 0));
678 BOOST_CHECK(seg.IsFirstDirect() == (first_direct != 0));
679 BOOST_CHECK((seg.GetSegType() == CPairwise_CI::eAligned) == (aligned != 0));
680 }
681 }
682 }
683 }
684 }
685 }
686
687
BOOST_AUTO_TEST_CASE(s_TestAnchoredAln)688 BOOST_AUTO_TEST_CASE(s_TestAnchoredAln)
689 {
690 cout << "Test CAnchoredAln... (aligns1.asn / expected04.txt)" << endl;
691
692 CNcbiIfstream in_data("data/aligns1.asn");
693 CNcbiIfstream in_exp("data/expected04.txt");
694
695 CAlnContainer aligns;
696 size_t num_aligns = LoadAligns(in_data, aligns);
697
698 if ( DumpExpected() ) {
699 cout << num_aligns << endl;
700 }
701 else {
702 size_t expected_num_aligns;
703 in_exp >> expected_num_aligns;
704 BOOST_CHECK(num_aligns == expected_num_aligns);
705 }
706
707 auto_ptr<TScopeAlnIdMap> aln_id_map;
708 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
709
710 for (size_t aln_idx = 0; aln_idx < aligns.size(); ++aln_idx) {
711 for (int anchor = 0; anchor < aln_stats->GetDimForAln(aln_idx); ++anchor) {
712 CAlnUserOptions user_options;
713 CRef<CAnchoredAln> anchored_aln = CreateAnchoredAlnFromAln(*aln_stats,
714 aln_idx, user_options, anchor);
715
716 for (int row = 0; row < (int)anchored_aln->GetPairwiseAlns().size(); ++row) {
717 CheckPairwiseAln(in_exp,
718 *anchored_aln->GetPairwiseAlns()[row],
719 aln_idx, anchored_aln->GetAnchorRow(), row);
720 }
721 }
722 }
723 }
724
725
BOOST_AUTO_TEST_CASE(s_TestAnchoredAlnBuilt)726 BOOST_AUTO_TEST_CASE(s_TestAnchoredAlnBuilt)
727 {
728 static const int kMergeFlags[] = {
729 0,
730 CAlnUserOptions::fTruncateOverlaps,
731 CAlnUserOptions::fAllowTranslocation,
732 CAlnUserOptions::fUseAnchorAsAlnSeq,
733 CAlnUserOptions::fAnchorRowFirst,
734 CAlnUserOptions::fIgnoreInsertions
735 };
736
737 cout << "Test built CAnchoredAln... (aligns2.asn / expected05.txt)" << endl;
738
739 CNcbiIfstream in_data("data/aligns2.asn");
740 CNcbiIfstream in_exp("data/expected05.txt");
741
742 int setnum = 0;
743 while (!in_data.eof() && in_data.good()) {
744 ++setnum;
745 size_t num_to_merge;
746 in_data >> num_to_merge;
747 if (num_to_merge == 0 || !in_data.good()) break;
748
749 CAlnContainer aligns;
750 size_t num_aligns = LoadAligns(in_data, aligns, num_to_merge);
751
752 if ( DumpExpected() ) {
753 cout << num_aligns << endl;
754 }
755 else {
756 size_t expected_num_aligns;
757 in_exp >> expected_num_aligns;
758 BOOST_CHECK(num_aligns == expected_num_aligns);
759 }
760
761 auto_ptr<TScopeAlnIdMap> aln_id_map;
762 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
763
764 const TScopeAlnStats::TIdVec& aln_ids = aln_stats->GetAnchorIdVec();
765 for (size_t anchor_idx = 0; anchor_idx < aln_ids.size(); anchor_idx++) {
766 TAlnSeqIdIRef anchor_id = aln_ids[anchor_idx];
767 CAlnUserOptions user_options;
768 user_options.SetAnchorId(anchor_id);
769
770 for (size_t flags_idx = 0; flags_idx < sizeof(kMergeFlags)/sizeof(kMergeFlags[0]); flags_idx++) {
771 user_options.m_MergeFlags = kMergeFlags[flags_idx];
772
773 if ( DumpExpected() ) {
774 cout << anchor_idx << " "
775 << user_options.m_MergeFlags << endl;
776 }
777 else {
778 if ( Verbose() ) {
779 cout << " set=" << setnum << ", anchor=" << anchor_idx << " (" <<
780 anchor_id->GetSeqId().AsFastaString() << ")" <<
781 ", flags_idx=" << flags_idx << endl;
782 }
783 size_t expected_anchor_idx;
784 int expected_flags;
785 in_exp >> expected_anchor_idx >> expected_flags;
786 BOOST_CHECK(in_exp.good());
787 BOOST_CHECK(anchor_idx == expected_anchor_idx);
788 BOOST_CHECK(user_options.m_MergeFlags == expected_flags);
789 }
790
791 TAnchoredAlnVec anchored_aln_vec;
792 CreateAnchoredAlnVec(*aln_stats, anchored_aln_vec, user_options);
793
794 CRef<CSeq_id> id(new CSeq_id);
795 id->SetLocal().SetStr("pseudo-id");
796 TAlnSeqIdIRef pseudo_id(Ref(new CAlnSeqId(*id)));
797 CAnchoredAln built_aln;
798 BuildAln(anchored_aln_vec, built_aln, user_options, pseudo_id);
799
800 for (int row = 0; row < (int)built_aln.GetPairwiseAlns().size(); ++row) {
801 CheckPairwiseAln(in_exp,
802 *built_aln.GetPairwiseAlns()[row],
803 0, built_aln.GetAnchorRow(), row);
804 }
805 }
806 }
807 }
808 }
809
810
BOOST_AUTO_TEST_CASE(s_TestSparseAlnCoords)811 BOOST_AUTO_TEST_CASE(s_TestSparseAlnCoords)
812 {
813 cout << "Test CSparseAln coordinates... (aligns2.asn / expected06.txt)" << endl;
814
815 CNcbiIfstream in_data("data/aligns2.asn");
816 CNcbiIfstream in_exp("data/expected06.txt");
817
818 int setnum = 0;
819 while (!in_data.eof() && in_data.good()) {
820 ++setnum;
821 size_t num_to_merge;
822 in_data >> num_to_merge;
823 if (num_to_merge == 0 || !in_data.good()) break;
824
825 CAlnContainer aligns;
826 size_t num_aligns = LoadAligns(in_data, aligns, num_to_merge);
827
828 if ( DumpExpected() ) {
829 cout << num_aligns << endl;
830 }
831 else {
832 size_t expected_num_aligns;
833 in_exp >> expected_num_aligns;
834 BOOST_CHECK(num_aligns == expected_num_aligns);
835 }
836
837 auto_ptr<TScopeAlnIdMap> aln_id_map;
838 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
839
840 const TScopeAlnStats::TIdVec& aln_ids = aln_stats->GetAnchorIdVec();
841 for (size_t anchor_idx = 0; anchor_idx < aln_ids.size(); anchor_idx++) {
842 TAlnSeqIdIRef anchor_id = aln_ids[anchor_idx];
843 CAlnUserOptions user_options;
844 user_options.SetAnchorId(anchor_id);
845 user_options.m_MergeFlags = CAlnUserOptions::fTruncateOverlaps;
846
847 if ( DumpExpected() ) {
848 cout << anchor_idx << " "
849 << MSerial_AsnText << anchor_id->GetSeqId();
850 }
851 else {
852 if ( Verbose() ) {
853 cout << " set=" << setnum << ", anchor=" << anchor_idx << " (" <<
854 anchor_id->GetSeqId().AsFastaString() << ")" << endl;
855 }
856 size_t expected_anchor_idx;
857 in_exp >> expected_anchor_idx;
858 CSeq_id expected_anchor_id;
859 in_exp >> MSerial_AsnText >> expected_anchor_id;
860 BOOST_CHECK(in_exp.good());
861 BOOST_CHECK(anchor_idx == expected_anchor_idx);
862 BOOST_CHECK(anchor_id->GetSeqId().Equals(expected_anchor_id));
863 }
864
865 TAnchoredAlnVec anchored_aln_vec;
866 CreateAnchoredAlnVec(*aln_stats, anchored_aln_vec, user_options);
867
868 CRef<CSeq_id> id(new CSeq_id);
869 id->SetLocal().SetStr("pseudo-id");
870 TAlnSeqIdIRef pseudo_id(Ref(new CAlnSeqId(*id)));
871 CAnchoredAln built_aln;
872 BuildAln(anchored_aln_vec, built_aln, user_options, pseudo_id);
873
874 CSparseAln sparse_aln(built_aln, GetScope());
875
876 CheckSparseAln(in_exp, sparse_aln, false);
877 }
878 }
879 }
880
881
BOOST_AUTO_TEST_CASE(s_TestSparseAlnData)882 BOOST_AUTO_TEST_CASE(s_TestSparseAlnData)
883 {
884 cout << "Test CSparseAln sequence... (aligns3.asn / expected07.txt)" << endl;
885
886 CNcbiIfstream in_data("data/aligns3.asn");
887 CNcbiIfstream in_exp("data/expected07.txt");
888
889 int setnum = 0;
890 while (!in_data.eof() && in_data.good()) {
891 ++setnum;
892 size_t num_to_merge;
893 in_data >> num_to_merge;
894 if (num_to_merge == 0 || !in_data.good()) break;
895
896 CAlnContainer aligns;
897 size_t num_aligns = LoadAligns(in_data, aligns, num_to_merge);
898
899 if ( DumpExpected() ) {
900 cout << num_aligns << endl;
901 }
902 else {
903 size_t expected_num_aligns;
904 in_exp >> expected_num_aligns;
905 BOOST_CHECK(num_aligns == expected_num_aligns);
906 }
907
908 auto_ptr<TScopeAlnIdMap> aln_id_map;
909 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
910
911 const TScopeAlnStats::TIdVec& aln_ids = aln_stats->GetAnchorIdVec();
912 for (size_t anchor_idx = 0; anchor_idx < aln_ids.size(); anchor_idx++) {
913 TAlnSeqIdIRef anchor_id = aln_ids[anchor_idx];
914 CAlnUserOptions user_options;
915 user_options.SetAnchorId(anchor_id);
916 user_options.m_MergeFlags = CAlnUserOptions::fTruncateOverlaps;
917
918 if ( DumpExpected() ) {
919 cout << anchor_idx << " "
920 << MSerial_AsnText << anchor_id->GetSeqId();
921 }
922 else {
923 if ( Verbose() ) {
924 cout << " set=" << setnum << ", anchor=" << anchor_idx << " (" <<
925 anchor_id->GetSeqId().AsFastaString() << ")" << endl;
926 }
927 size_t expected_anchor_idx;
928 in_exp >> expected_anchor_idx;
929 CSeq_id expected_anchor_id;
930 in_exp >> MSerial_AsnText >> expected_anchor_id;
931 BOOST_CHECK(in_exp.good());
932 BOOST_CHECK(anchor_idx == expected_anchor_idx);
933 BOOST_CHECK(anchor_id->GetSeqId().Equals(expected_anchor_id));
934 }
935
936 TAnchoredAlnVec anchored_aln_vec;
937 CreateAnchoredAlnVec(*aln_stats, anchored_aln_vec, user_options);
938
939 CRef<CSeq_id> id(new CSeq_id);
940 id->SetLocal().SetStr("pseudo-id");
941 TAlnSeqIdIRef pseudo_id(Ref(new CAlnSeqId(*id)));
942 CAnchoredAln built_aln;
943 BuildAln(anchored_aln_vec, built_aln, user_options, pseudo_id);
944
945 CSparseAln sparse_aln(built_aln, GetScope());
946
947 CheckSparseAln(in_exp, sparse_aln, true);
948
949 CRef<CSeq_align> aln = CreateSeqAlignFromAnchoredAln(built_aln, CSeq_align::TSegs::e_Denseg, &GetScope());
950 BOOST_CHECK(aln);
951
952 if ( DumpExpected() ) {
953 cout << MSerial_AsnText << *aln;
954 }
955 else {
956 CSeq_align expected_aln;
957 in_exp >> MSerial_AsnText >> expected_aln;
958 BOOST_CHECK(aln->Equals(expected_aln));
959 }
960 }
961 }
962 }
963
BOOST_AUTO_TEST_CASE(s_TestAlnTypes)964 BOOST_AUTO_TEST_CASE(s_TestAlnTypes)
965 {
966 cout << "Test seq-align types... (aligns4.asn / expected08.txt)" << endl;
967
968 CNcbiIfstream in_data("data/aligns4.asn");
969 CNcbiIfstream in_exp("data/expected08.txt");
970
971 CAlnContainer aligns;
972 size_t num_aligns = LoadAligns(in_data, aligns);
973
974 if ( DumpExpected() ) {
975 cout << num_aligns << endl;
976 }
977 else {
978 size_t expected_num_aligns;
979 in_exp >> expected_num_aligns;
980 BOOST_CHECK(num_aligns == expected_num_aligns);
981 }
982
983 auto_ptr<TScopeAlnIdMap> aln_id_map;
984 CRef<TScopeAlnStats> aln_stats = InitAlnStats(aligns, aln_id_map);
985
986 for (size_t aln_idx = 0; aln_idx < aligns.size(); ++aln_idx) {
987 for (int anchor = 0; anchor < aln_stats->GetDimForAln(aln_idx); ++anchor) {
988
989 // Special case - sparse-segs can be anchored only to the first row.
990 if (aln_stats->GetAlnVec()[aln_idx]->GetSegs().IsSparse() && anchor > 0) {
991 break;
992 }
993
994 CAlnUserOptions user_options;
995 CRef<CAnchoredAln> anchored_aln = CreateAnchoredAlnFromAln(*aln_stats,
996 aln_idx, user_options, anchor);
997
998 for (int row = 0; row < (int)anchored_aln->GetPairwiseAlns().size(); ++row) {
999 CheckPairwiseAln(in_exp,
1000 *anchored_aln->GetPairwiseAlns()[row],
1001 aln_idx, anchored_aln->GetAnchorRow(), row);
1002 }
1003 }
1004 }
1005 }
1006