1 /* $Id: magicblast_thread.cpp 600515 2020-01-21 18:10:49Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Greg Boratyn
27 * Class for MT MagicBlast searches
28 *
29 */
30
31
32 #include <ncbi_pch.hpp>
33 #include <algo/blast/api/magicblast.hpp>
34 #include <algo/blast/api/objmgrfree_query_data.hpp>
35 #include <objects/seqset/Seq_entry.hpp>
36 #include "magicblast_util.hpp"
37 #include "magicblast_thread.hpp"
38 #include <sstream>
39
40 #ifndef SKIP_DOXYGEN_PROCESSING
41 USING_NCBI_SCOPE;
42 USING_SCOPE(blast);
43 USING_SCOPE(objects);
44 #endif
45
46 CFastMutex input_mutex;
47 CFastMutex output_mutex;
48
CMagicBlastThread(CBlastInputOMF & input,CRef<CMagicBlastOptionsHandle> options,CRef<CMapperQueryOptionsArgs> query_opts,CRef<CBlastDatabaseArgs> db_args,CRef<CMapperFormattingArgs> fmt_args,CNcbiOstream & out,CNcbiOstream * unaligned_stream)49 CMagicBlastThread::CMagicBlastThread(CBlastInputOMF& input,
50 CRef<CMagicBlastOptionsHandle> options,
51 CRef<CMapperQueryOptionsArgs> query_opts,
52 CRef<CBlastDatabaseArgs> db_args,
53 CRef<CMapperFormattingArgs> fmt_args,
54 CNcbiOstream& out,
55 CNcbiOstream* unaligned_stream)
56 : m_Input(input),
57 m_Options(options),
58 m_QueryOptions(query_opts),
59 m_DatabaseArgs(db_args),
60 m_FormattingArgs(fmt_args),
61 m_OutStream(out),
62 m_OutUnalignedStream(unaligned_stream)
63 {}
64
Main(void)65 void* CMagicBlastThread::Main(void)
66 {
67 const bool kTrimReadIdForSAM =
68 m_QueryOptions->IsPaired() && m_FormattingArgs->TrimReadIds();
69
70 const bool kPrintUnaligned = m_FormattingArgs->PrintUnaligned();
71 const bool kNoDiscordant = m_FormattingArgs->NoDiscordant();
72 const bool kPrintMdTag = m_FormattingArgs->PrintMdTag();
73
74 // Is either strand-specificity flag set? (mutually exclusive)
75 const bool only_specific = m_FormattingArgs->SelectOnlyStrandSpecific();
76 const bool fr = m_FormattingArgs->SelectFwdRev();
77 const bool rf = m_FormattingArgs->SelectRevFwd();
78
79 // One or both MUST be false. (enforced by command-line processing)
80 _ASSERT(fr == false || rf == false);
81 // "-fr" and "-rf" flags can only be used without
82 // "-only_strand_specific" for SAM output. Return an error if this
83 // condition is not met.
84 if (m_FormattingArgs->GetFormattedOutputChoice() != CFormattingArgs::eSAM) {
85 if (!only_specific && (fr || rf)) {
86 NCBI_THROW(CArgException, eNoValue,
87 "-fr or -rf can only be used with SAM format."
88 " Use -oufmt sam option.");
89 }
90 }
91 // "-only_strand_specific" without "-fr" or "-rf" (or in the future,
92 // "-f" or "-r") is not meaningful.
93 // FIXME: should this be a warning?
94 if (only_specific && !(fr || rf)) {
95 NCBI_THROW(CArgException, eNoValue,
96 "-only_strand_specific without either -fr or -rf "
97 "is not valid.");
98 }
99
100 E_StrandSpecificity kStrandSpecific = eNonSpecific;
101 if (fr) {
102 kStrandSpecific = eFwdRev;
103 } else if (rf) {
104 kStrandSpecific = eRevFwd;
105 }
106
107 bool isDone = false;
108 while (!isDone) {
109 CRef<CBioseq_set> query_batch(new CBioseq_set);
110 const string kDbName = m_DatabaseArgs->GetDatabaseName();
111
112 {
113 CFastMutexGuard guard(input_mutex);
114 isDone = m_Input.End();
115 if (isDone) {
116 break;
117 }
118
119 m_Input.GetNextSeqBatch(*query_batch);
120 }
121
122 if (query_batch->IsSetSeq_set() &&
123 !query_batch->GetSeq_set().empty()) {
124
125 CRef<IQueryFactory> queries(
126 new CObjMgrFree_QueryFactory(query_batch));
127
128 CRef<CMagicBlastResultSet> results;
129 CRef<CSearchDatabase> search_db;
130 CRef<CLocalDbAdapter> thread_db_adapter;
131
132 if (!kDbName.empty()) {
133
134 search_db.Reset(new CSearchDatabase(kDbName,
135 CSearchDatabase::eBlastDbIsNucleotide));
136
137 CRef<CSeqDBGiList> gilist =
138 m_DatabaseArgs->GetSearchDatabase()->GetGiList();
139
140 CRef<CSeqDBGiList> neg_gilist =
141 m_DatabaseArgs->GetSearchDatabase()->GetNegativeGiList();
142
143
144 if (gilist.NotEmpty()) {
145 search_db->SetGiList(gilist.GetNonNullPointer());
146 }
147 else if (neg_gilist.NotEmpty()) {
148 search_db->SetNegativeGiList(
149 neg_gilist.GetNonNullPointer());
150 }
151
152 // this must be the last operation on searh_db, because
153 // CSearchDatabase::GetSeqDb initializes CSeqDB with
154 // whatever information it currently has
155 search_db->GetSeqDb()->SetNumberOfThreads(1, true);
156
157 thread_db_adapter.Reset(new CLocalDbAdapter(*search_db));
158 }
159 else {
160 CRef<CScope> scope;
161 scope.Reset(new CScope(*CObjectManager::GetInstance()));
162 CRef<IQueryFactory> subjects;
163 subjects = m_DatabaseArgs->GetSubjects(scope);
164 thread_db_adapter.Reset(
165 new CLocalDbAdapter(subjects, m_Options, true));
166 }
167
168 // do mapping
169 CMagicBlast magicblast(queries, thread_db_adapter, m_Options);
170 results = magicblast.RunEx();
171
172 // use a single stream when reporting to one file, or two streams
173 // when reporting unaligned reads separately
174 ostringstream ostr;
175 ostringstream the_unaligned_ostr;
176 ostringstream& unaligned_ostr =
177 m_OutUnalignedStream ? the_unaligned_ostr : ostr;
178
179 // format ouput
180 if (m_FormattingArgs->GetFormattedOutputChoice() ==
181 CFormattingArgs::eTabular) {
182
183 CRef<ILocalQueryData> query_data =
184 queries->MakeLocalQueryData(&m_Options->GetOptions());
185
186 PrintTabular(ostr,
187 unaligned_ostr,
188 m_FormattingArgs->GetUnalignedOutputFormat(),
189 *results,
190 *query_batch,
191 m_Options->GetPaired(),
192 /*thread_batch_number*/ 1,
193 kTrimReadIdForSAM,
194 kPrintUnaligned,
195 kNoDiscordant);
196 }
197 else if (m_FormattingArgs->GetFormattedOutputChoice() ==
198 CFormattingArgs::eAsnText) {
199
200 PrintASN1(ostr, *query_batch,
201 *results->GetFlatResults(kNoDiscordant));
202 }
203 else {
204
205 CRef<ILocalQueryData> query_data =
206 queries->MakeLocalQueryData(&m_Options->GetOptions());
207
208 PrintSAM(ostr,
209 unaligned_ostr,
210 m_FormattingArgs->GetUnalignedOutputFormat(),
211 *results,
212 *query_batch,
213 query_data->GetQueryInfo(),
214 m_Options->GetSpliceAlignments(),
215 /*thread_batch_number*/ 1,
216 kTrimReadIdForSAM,
217 kPrintUnaligned,
218 kNoDiscordant,
219 kStrandSpecific,
220 only_specific,
221 kPrintMdTag);
222 }
223
224
225 // write formatted ouput to stream
226 {
227 CFastMutexGuard guard(output_mutex);
228 m_OutStream << ostr.str();
229 // flush string
230 ostr.str("");
231
232 // report unaligned reads to a separate stream if requested
233 if (m_OutUnalignedStream) {
234 *m_OutUnalignedStream << unaligned_ostr.str();
235 unaligned_ostr.str("");
236 }
237 }
238
239 query_batch.Reset();
240 }
241 }
242
243 return nullptr;
244 }
245