1 /*  $Id: magicblast_thread.cpp 600515 2020-01-21 18:10:49Z boratyng $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Greg Boratyn
27  *    Class for MT MagicBlast searches
28  *
29  */
30 
31 
32 #include <ncbi_pch.hpp>
33 #include <algo/blast/api/magicblast.hpp>
34 #include <algo/blast/api/objmgrfree_query_data.hpp>
35 #include <objects/seqset/Seq_entry.hpp>
36 #include "magicblast_util.hpp"
37 #include "magicblast_thread.hpp"
38 #include <sstream>
39 
40 #ifndef SKIP_DOXYGEN_PROCESSING
41 USING_NCBI_SCOPE;
42 USING_SCOPE(blast);
43 USING_SCOPE(objects);
44 #endif
45 
46 CFastMutex input_mutex;
47 CFastMutex output_mutex;
48 
CMagicBlastThread(CBlastInputOMF & input,CRef<CMagicBlastOptionsHandle> options,CRef<CMapperQueryOptionsArgs> query_opts,CRef<CBlastDatabaseArgs> db_args,CRef<CMapperFormattingArgs> fmt_args,CNcbiOstream & out,CNcbiOstream * unaligned_stream)49 CMagicBlastThread::CMagicBlastThread(CBlastInputOMF& input,
50                                      CRef<CMagicBlastOptionsHandle> options,
51                                      CRef<CMapperQueryOptionsArgs> query_opts,
52                                      CRef<CBlastDatabaseArgs> db_args,
53                                      CRef<CMapperFormattingArgs> fmt_args,
54                                      CNcbiOstream& out,
55                                      CNcbiOstream* unaligned_stream)
56  : m_Input(input),
57    m_Options(options),
58    m_QueryOptions(query_opts),
59    m_DatabaseArgs(db_args),
60    m_FormattingArgs(fmt_args),
61    m_OutStream(out),
62    m_OutUnalignedStream(unaligned_stream)
63 {}
64 
Main(void)65 void* CMagicBlastThread::Main(void)
66 {
67     const bool kTrimReadIdForSAM =
68             m_QueryOptions->IsPaired() && m_FormattingArgs->TrimReadIds();
69 
70     const bool kPrintUnaligned = m_FormattingArgs->PrintUnaligned();
71     const bool kNoDiscordant = m_FormattingArgs->NoDiscordant();
72     const bool kPrintMdTag = m_FormattingArgs->PrintMdTag();
73 
74     // Is either strand-specificity flag set? (mutually exclusive)
75     const bool only_specific = m_FormattingArgs->SelectOnlyStrandSpecific();
76     const bool fr = m_FormattingArgs->SelectFwdRev();
77     const bool rf = m_FormattingArgs->SelectRevFwd();
78 
79     // One or both MUST be false. (enforced by command-line processing)
80     _ASSERT(fr == false  ||  rf == false);
81     // "-fr" and "-rf" flags can only be used without
82     // "-only_strand_specific" for SAM output.  Return an error if this
83     // condition is not met.
84     if (m_FormattingArgs->GetFormattedOutputChoice() != CFormattingArgs::eSAM) {
85         if (!only_specific  &&  (fr || rf)) {
86             NCBI_THROW(CArgException, eNoValue,
87                        "-fr or -rf can only be used with SAM format."
88                        " Use -oufmt sam option.");
89         }
90     }
91     // "-only_strand_specific" without "-fr" or "-rf" (or in the future,
92     // "-f" or "-r") is not meaningful.
93     // FIXME: should this be a warning?
94     if (only_specific  &&  !(fr || rf)) {
95         NCBI_THROW(CArgException, eNoValue,
96                    "-only_strand_specific without either -fr or -rf "
97                    "is not valid.");
98     }
99 
100     E_StrandSpecificity kStrandSpecific = eNonSpecific;
101     if (fr) {
102         kStrandSpecific = eFwdRev;
103     } else if (rf) {
104         kStrandSpecific = eRevFwd;
105     }
106 
107     bool isDone = false;
108     while (!isDone) {
109         CRef<CBioseq_set> query_batch(new CBioseq_set);
110         const string kDbName = m_DatabaseArgs->GetDatabaseName();
111 
112         {
113             CFastMutexGuard guard(input_mutex);
114             isDone = m_Input.End();
115             if (isDone) {
116                 break;
117             }
118 
119             m_Input.GetNextSeqBatch(*query_batch);
120         }
121 
122         if (query_batch->IsSetSeq_set() &&
123             !query_batch->GetSeq_set().empty()) {
124 
125             CRef<IQueryFactory> queries(
126                                  new CObjMgrFree_QueryFactory(query_batch));
127 
128             CRef<CMagicBlastResultSet> results;
129             CRef<CSearchDatabase> search_db;
130             CRef<CLocalDbAdapter> thread_db_adapter;
131 
132             if (!kDbName.empty()) {
133 
134                 search_db.Reset(new CSearchDatabase(kDbName,
135                                        CSearchDatabase::eBlastDbIsNucleotide));
136 
137                 CRef<CSeqDBGiList> gilist =
138                     m_DatabaseArgs->GetSearchDatabase()->GetGiList();
139 
140                 CRef<CSeqDBGiList> neg_gilist =
141                     m_DatabaseArgs->GetSearchDatabase()->GetNegativeGiList();
142 
143 
144                 if (gilist.NotEmpty()) {
145                     search_db->SetGiList(gilist.GetNonNullPointer());
146                 }
147                 else if (neg_gilist.NotEmpty()) {
148                     search_db->SetNegativeGiList(
149                                           neg_gilist.GetNonNullPointer());
150                 }
151 
152                 // this must be the last operation on searh_db, because
153                 // CSearchDatabase::GetSeqDb initializes CSeqDB with
154                 // whatever information it currently has
155                 search_db->GetSeqDb()->SetNumberOfThreads(1, true);
156 
157                 thread_db_adapter.Reset(new CLocalDbAdapter(*search_db));
158             }
159             else {
160                 CRef<CScope> scope;
161                 scope.Reset(new CScope(*CObjectManager::GetInstance()));
162                 CRef<IQueryFactory> subjects;
163                 subjects = m_DatabaseArgs->GetSubjects(scope);
164                 thread_db_adapter.Reset(
165                               new CLocalDbAdapter(subjects, m_Options, true));
166             }
167 
168             // do mapping
169             CMagicBlast magicblast(queries, thread_db_adapter, m_Options);
170             results = magicblast.RunEx();
171 
172             // use a single stream when reporting to one file, or two streams
173             // when reporting unaligned reads separately
174             ostringstream ostr;
175             ostringstream the_unaligned_ostr;
176             ostringstream& unaligned_ostr =
177                 m_OutUnalignedStream ? the_unaligned_ostr : ostr;
178 
179             // format ouput
180             if (m_FormattingArgs->GetFormattedOutputChoice() ==
181                 CFormattingArgs::eTabular) {
182 
183                 CRef<ILocalQueryData> query_data =
184                     queries->MakeLocalQueryData(&m_Options->GetOptions());
185 
186                 PrintTabular(ostr,
187                              unaligned_ostr,
188                              m_FormattingArgs->GetUnalignedOutputFormat(),
189                              *results,
190                              *query_batch,
191                              m_Options->GetPaired(),
192                              /*thread_batch_number*/ 1,
193                              kTrimReadIdForSAM,
194                              kPrintUnaligned,
195                              kNoDiscordant);
196             }
197             else if (m_FormattingArgs->GetFormattedOutputChoice() ==
198                      CFormattingArgs::eAsnText) {
199 
200                 PrintASN1(ostr, *query_batch,
201                           *results->GetFlatResults(kNoDiscordant));
202             }
203             else {
204 
205                 CRef<ILocalQueryData> query_data =
206                     queries->MakeLocalQueryData(&m_Options->GetOptions());
207 
208                 PrintSAM(ostr,
209                          unaligned_ostr,
210                          m_FormattingArgs->GetUnalignedOutputFormat(),
211                          *results,
212                          *query_batch,
213                          query_data->GetQueryInfo(),
214                          m_Options->GetSpliceAlignments(),
215                          /*thread_batch_number*/ 1,
216                          kTrimReadIdForSAM,
217                          kPrintUnaligned,
218                          kNoDiscordant,
219                          kStrandSpecific,
220                          only_specific,
221                          kPrintMdTag);
222             }
223 
224 
225             // write formatted ouput to stream
226             {
227                 CFastMutexGuard guard(output_mutex);
228                 m_OutStream << ostr.str();
229                 // flush string
230                 ostr.str("");
231 
232                 // report unaligned reads to a separate stream if requested
233                 if (m_OutUnalignedStream) {
234                     *m_OutUnalignedStream << unaligned_ostr.str();
235                     unaligned_ostr.str("");
236                 }
237             }
238 
239             query_batch.Reset();
240         }
241     }
242 
243     return nullptr;
244 }
245