1 /*  $Id: queryinfo_unit_test.cpp 506956 2016-07-13 16:19:54Z boratyng $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Tom Madden
27 *
28 * File Description:
29 *   Unit tests for QueryInfo setup
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 #include <corelib/ncbi_limits.hpp>
36 #include <objmgr/object_manager.hpp>
37 #include <objmgr/util/sequence.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <algo/blast/api/blast_types.hpp>
41 #include <algo/blast/api/blast_aux.hpp>
42 #include "test_objmgr.hpp"
43 
44 #include <algo/blast/api/bl2seq.hpp>
45 #include <blast_objmgr_priv.hpp>
46 
47 #include <algo/blast/api/blast_options_handle.hpp>
48 
49 using namespace ncbi::objects;
50 using namespace ncbi::blast;
51 
52 BOOST_AUTO_TEST_SUITE(QueryInfo)
53 
BOOST_AUTO_TEST_CASE(ProteinGetQueryInfo)54 BOOST_AUTO_TEST_CASE(ProteinGetQueryInfo) {
55     const int kNumQueries=1;
56     CSeq_id id("gi|3091");
57     auto_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(id));
58     TSeqLocVector query_v;
59     query_v.push_back(*qsl);
60     CBlastQueryInfo query_info;
61     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastp));
62 
63     const CBlastOptions& kOpts = opts->GetOptions();
64     EBlastProgramType prog = kOpts.GetProgramType();
65     ENa_strand strand_opt = kOpts.GetStrandOption();
66 
67     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
68 
69     BOOST_REQUIRE_EQUAL(kNumQueries, query_info->num_queries);
70     BOOST_REQUIRE_EQUAL(0, query_info->first_context);
71     BOOST_REQUIRE_EQUAL(0, query_info->last_context);
72     BOOST_REQUIRE_EQUAL(0, query_info->contexts[0].query_offset);
73     BOOST_REQUIRE_EQUAL(607, query_info->contexts[0].query_length);
74 }
75 
BOOST_AUTO_TEST_CASE(EmptyBlastxGetQueryInfo)76 BOOST_AUTO_TEST_CASE(EmptyBlastxGetQueryInfo) {
77     const int kNumQueries=1;
78     CSeq_id id("gi|3090");
79     pair<TSeqPos, TSeqPos> range(11, 10);
80     auto_ptr<SSeqLoc> qsl(
81         CTestObjMgr::Instance().CreateSSeqLoc(id, range, eNa_strand_both));
82 
83     TSeqLocVector query_v;
84     query_v.push_back(*qsl);
85     CBlastQueryInfo query_info=NULL;
86     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastx));
87 
88     const CBlastOptions& kOpts = opts->GetOptions();
89     EBlastProgramType prog = kOpts.GetProgramType();
90     ENa_strand strand_opt = kOpts.GetStrandOption();
91 
92     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
93     BOOST_REQUIRE_EQUAL(kNumQueries, query_info->num_queries);
94     BOOST_REQUIRE_EQUAL(0, query_info->first_context);
95     BOOST_REQUIRE_EQUAL(5, query_info->last_context);
96     BOOST_REQUIRE_EQUAL(0, query_info->contexts[0].query_offset);
97     BOOST_REQUIRE_EQUAL(0, query_info->contexts[5].query_length);
98 }
99 
BOOST_AUTO_TEST_CASE(BlastnGetQueryInfo)100 BOOST_AUTO_TEST_CASE(BlastnGetQueryInfo) {
101     const int kNumQueries=2;
102     CSeq_id id1("gi|3090");
103     CSeq_id id2("gi|555");
104     auto_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
105     auto_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
106     TSeqLocVector query_v;
107     query_v.push_back(*qsl1);
108     query_v.push_back(*qsl2);
109     CBlastQueryInfo query_info;
110     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
111 
112     const CBlastOptions& kOpts = opts->GetOptions();
113     EBlastProgramType prog = kOpts.GetProgramType();
114     ENa_strand strand_opt = kOpts.GetStrandOption();
115 
116     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
117 
118     BOOST_REQUIRE_EQUAL(kNumQueries, query_info->num_queries);
119     BOOST_REQUIRE_EQUAL(0, query_info->first_context);
120     BOOST_REQUIRE_EQUAL(3, query_info->last_context);
121     BOOST_REQUIRE_EQUAL(0, query_info->contexts[0].query_offset);
122     BOOST_REQUIRE_EQUAL(2338, query_info->contexts[0].query_length);
123 }
124 
BOOST_AUTO_TEST_CASE(BlastnGetQueryIndex)125 BOOST_AUTO_TEST_CASE(BlastnGetQueryIndex) {
126     CSeq_id id1("gi|3090");
127     CSeq_id id2("gi|555");
128     auto_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
129     auto_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
130     TSeqLocVector query_v;
131     query_v.push_back(*qsl1);
132     query_v.push_back(*qsl2);
133     CBlastQueryInfo query_info;
134     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
135 
136     const CBlastOptions& kOpts = opts->GetOptions();
137     EBlastProgramType prog = kOpts.GetProgramType();
138     ENa_strand strand_opt = kOpts.GetStrandOption();
139 
140     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
141 
142     int query_index = Blast_GetQueryIndexFromQueryOffset(3000, prog, query_info);
143     BOOST_REQUIRE_EQUAL(0, query_index);
144     query_index = Blast_GetQueryIndexFromQueryOffset(5010, prog, query_info);
145     BOOST_REQUIRE_EQUAL(1, query_index);
146 }
147 
BOOST_AUTO_TEST_CASE(BlastnSearchContextInfo)148 BOOST_AUTO_TEST_CASE(BlastnSearchContextInfo)
149 {
150     CSeq_id id1("gi|3090");
151     CSeq_id id2("gi|555");
152     auto_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1));
153     auto_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
154     TSeqLocVector query_v;
155     query_v.push_back(*qsl1);
156     query_v.push_back(*qsl2);
157     CBlastQueryInfo query_info;
158     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
159 
160     const CBlastOptions& kOpts = opts->GetOptions();
161     EBlastProgramType prog = kOpts.GetProgramType();
162     ENa_strand strand_opt = kOpts.GetStrandOption();
163 
164     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
165 
166     int length_1 = qsl1->scope->GetSequenceLength(id1);
167     int length_2 = qsl2->scope->GetSequenceLength(id2);
168 
169     // test min and max query length
170     BOOST_REQUIRE_EQUAL(query_info->min_length, min(length_1, length_2));
171     BOOST_REQUIRE_EQUAL(query_info->max_length, max(length_1, length_2));
172 
173     // test context for zero position in each sequence strand
174     BOOST_REQUIRE_EQUAL(0, BSearchContextInfo(0, query_info.Get()));
175     BOOST_REQUIRE_EQUAL(1, BSearchContextInfo(length_1 + 1, query_info.Get()));
176     BOOST_REQUIRE_EQUAL(2, BSearchContextInfo(2 * (length_1 + 1),
177                                               query_info.Get()));
178 
179     BOOST_REQUIRE_EQUAL(3, BSearchContextInfo(2 * (length_1 + 1) + length_2 + 1,
180                                               query_info.Get()));
181 }
182 
BOOST_AUTO_TEST_CASE(BlastnSearchContextInfoSingleStrand)183 BOOST_AUTO_TEST_CASE(BlastnSearchContextInfoSingleStrand)
184 {
185     CSeq_id id1("gi|555");
186     CSeq_id id2("gi|3090");
187     // only plus strand for the first sequence
188     auto_ptr<SSeqLoc> qsl1(CTestObjMgr::Instance().CreateSSeqLoc(id1,
189                                                            eNa_strand_plus));
190     auto_ptr<SSeqLoc> qsl2(CTestObjMgr::Instance().CreateSSeqLoc(id2));
191     TSeqLocVector query_v;
192     query_v.push_back(*qsl1);
193     query_v.push_back(*qsl2);
194     CBlastQueryInfo query_info;
195     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(eBlastn));
196 
197     const CBlastOptions& kOpts = opts->GetOptions();
198     EBlastProgramType prog = kOpts.GetProgramType();
199     ENa_strand strand_opt = kOpts.GetStrandOption();
200 
201     SetupQueryInfo(query_v, prog, strand_opt, &query_info);
202 
203     // pre condition: empty context in query info
204     BOOST_REQUIRE_EQUAL(query_info->contexts[1].query_length, 0);
205 
206     int length_1 = qsl1->scope->GetSequenceLength(id1);
207     int length_2 = qsl2->scope->GetSequenceLength(id2);
208 
209     // test min and max query length
210     BOOST_REQUIRE_EQUAL(query_info->min_length, 0u);
211     BOOST_REQUIRE_EQUAL(query_info->max_length, max(length_1, length_2));
212 
213     // test context for zero position in each sequence strand
214     BOOST_REQUIRE_EQUAL(0, BSearchContextInfo(0, query_info.Get()));
215     BOOST_REQUIRE_EQUAL(2, BSearchContextInfo(length_1 + 1, query_info.Get()));
216     BOOST_REQUIRE_EQUAL(3, BSearchContextInfo(length_1 + length_2 + 2,
217                                               query_info.Get()));
218 }
219 
220 
221 BOOST_AUTO_TEST_SUITE_END()
222