1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_BWA_TASK_H_
23 #define _U2_BWA_TASK_H_
24 
25 #include <U2Algorithm/DnaAssemblyTask.h>
26 
27 #include <U2Core/ExternalToolRunTask.h>
28 
29 namespace U2 {
30 
31 class MultiTask;
32 
33 class BwaBuildIndexTask : public ExternalToolSupportTask {
34     Q_OBJECT
35 public:
36     BwaBuildIndexTask(const QString &referencePath, const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
37 
38     void prepare();
39 
40 private:
41     class LogParser : public ExternalToolLogParser {
42     public:
43         LogParser();
44 
45         void parseOutput(const QString &partOfLog);
46         void parseErrOutput(const QString &partOfLog);
47     };
48 
49     QString referencePath;
50     QString indexPath;
51     DnaAssemblyToRefTaskSettings settings;
52 };
53 
54 class BwaAlignTask : public ExternalToolSupportTask {
55     Q_OBJECT
56 public:
57     BwaAlignTask(const QString &indexPath, const QList<ShortReadSet> &shortReadSets, const QString &resultPath, const DnaAssemblyToRefTaskSettings &settings);
58     void prepare();
59 
60     class LogParser : public ExternalToolLogParser {
61     public:
62         LogParser();
63         void parseOutput(const QString &partOfLog);
64         void parseErrOutput(const QString &partOfLog);
65     };
66 
67 protected slots:
68     QList<Task *> onSubTaskFinished(Task *subTask);
69 
70 private:
71     QList<ShortReadSet> downStreamList;
72     QList<ShortReadSet> upStreamList;
73     MultiTask *samMultiTask;
74     QStringList urlsToMerge;
75     MultiTask *alignMultiTask;
76     Task *mergeTask;
77     QString indexPath;
78     QList<ShortReadSet> readSets;
79     QString resultPath;
80     DnaAssemblyToRefTaskSettings settings;
81     inline QString getSAIPath(const QString &pathToReads);
82 };
83 
84 class BwaSwAlignTask : public ExternalToolSupportTask {
85     Q_OBJECT
86 public:
87     BwaSwAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
88     void prepare();
89 
90 private:
91     const QString indexPath;
92     DnaAssemblyToRefTaskSettings settings;
93 };
94 
95 class BwaMemAlignTask : public ExternalToolSupportTask {
96     Q_OBJECT
97 public:
98     BwaMemAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
99     void prepare();
100 
101 protected slots:
102     QList<Task *> onSubTaskFinished(Task *subTask);
103 
104 private:
105     MultiTask *alignMultiTask;
106     Task *mergeTask;
107     const QString indexPath;
108     QString resultPath;
109     QStringList bamUrlstoMerge;
110     DnaAssemblyToRefTaskSettings settings;
111 };
112 
113 class BwaTask : public DnaAssemblyToReferenceTask {
114     Q_OBJECT
115     DNA_ASSEMBLEY_TO_REF_TASK_FACTORY(BwaTask)
116 public:
117     BwaTask(const DnaAssemblyToRefTaskSettings &settings, bool justBuildIndex = false);
118 
119     void prepare();
120     ReportResult report();
121 protected slots:
122     QList<Task *> onSubTaskFinished(Task *subTask);
123 
124 public:
125     static const QString OPTION_INDEX_ALGORITHM;
126     static const QString OPTION_N;
127     static const QString OPTION_MAX_GAP_OPENS;
128     static const QString OPTION_MAX_GAP_EXTENSIONS;
129     static const QString OPTION_INDEL_OFFSET;
130     static const QString OPTION_MAX_LONG_DELETION_EXTENSIONS;
131     static const QString OPTION_SEED_LENGTH;
132     static const QString OPTION_MAX_SEED_DIFFERENCES;
133     static const QString OPTION_MAX_QUEUE_ENTRIES;
134     static const QString OPTION_BEST_HITS;
135     static const QString OPTION_QUALITY_THRESHOLD;
136     static const QString OPTION_BARCODE_LENGTH;
137     static const QString OPTION_LONG_SCALED_GAP_PENALTY_FOR_LONG_DELETIONS;
138     static const QString OPTION_NON_ITERATIVE_MODE;
139     static const QString OPTION_SW_ALIGNMENT;
140     static const QString OPTION_MEM_ALIGNMENT;
141     static const QString OPTION_MASK_LEVEL;
142     static const QString OPTION_CHUNK_SIZE;
143     static const QString OPTION_Z_BEST;
144     static const QString OPTION_REV_ALGN_THRESHOLD;
145     static const QString OPTION_PREFER_HARD_CLIPPING;
146     static const QString ALGORITHM_BWA_ALN, ALGORITHM_BWA_SW, ALGORITHM_BWA_MEM;
147 
148     //bwa-mem
149     static const QString OPTION_THREADS;
150     static const QString OPTION_MIN_SEED;
151     static const QString OPTION_BAND_WIDTH;
152     static const QString OPTION_DROPOFF;
153     static const QString OPTION_INTERNAL_SEED_LOOKUP;
154     static const QString OPTION_SKIP_SEED_THRESHOLD;
155     static const QString OPTION_DROP_CHAINS_THRESHOLD;
156     static const QString OPTION_MAX_MATE_RESCUES;
157     static const QString OPTION_SKIP_MATE_RESCUES;
158     static const QString OPTION_SKIP_PAIRING;
159     static const QString OPTION_MATCH_SCORE;
160     static const QString OPTION_MISMATCH_PENALTY;
161     static const QString OPTION_GAP_OPEN_PENALTY;
162     static const QString OPTION_GAP_EXTENSION_PENALTY;
163     static const QString OPTION_CLIPPING_PENALTY;
164     static const QString OPTION_UNPAIRED_PENALTY;
165     static const QString OPTION_SCORE_THRESHOLD;
166 
167     static const QStringList indexSuffixes;
168 
169     /*
170     -k INT     minimum seed length [19]
171           -w INT     band width for banded alignment [100]
172           -d INT     off-diagonal X-dropoff [100]
173           -r FLOAT   look for internal seeds inside a seed longer than {-k} * FLOAT [1.5]
174           -c INT     skip seeds with more than INT occurrences [10000]
175           -D FLOAT   drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50]
176           -m INT     perform at most INT rounds of mate rescues for each read [100]
177           -S         skip mate rescue
178           -P         skip pairing; mate rescue performed unless -S also in use
179           -A INT     score for a sequence match [1]
180           -B INT     penalty for a mismatch [4]
181           -O INT     gap open penalty [6]
182           -E INT     gap extension penalty; a gap of size k cost {-O} + {-E}*k [1]
183           -L INT     penalty for clipping [5]
184           -U INT     penalty for an unpaired read pair [17]
185 
186    Input/output options:
187 
188           -p         first query file consists of interleaved paired-end sequences
189           -R STR     read group header line such as '@RG\tID:foo\tSM:bar' [null]
190 
191           -v INT     verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3]
192           -T INT     minimum score to output [30]
193           -a         output all alignments for SE or unpaired PE
194           -C         append FASTA/FASTQ comment to SAM output
195           -M         mark shorter split hits as secondary (for Picard/GATK compatibility)
196     */
197 
198 private:
199     BwaBuildIndexTask *buildIndexTask;
200     ExternalToolSupportTask *alignTask;
201 };
202 
203 class BwaTaskFactory : public DnaAssemblyToRefTaskFactory {
204 public:
205     DnaAssemblyToReferenceTask *createTaskInstance(const DnaAssemblyToRefTaskSettings &settings, bool justBuildIndex = false);
206 
207 protected:
208 };
209 
210 }    // namespace U2
211 
212 #endif    // _U2_BWA_TASK_H_
213