1 /** 2 * UGENE - Integrated Bioinformatics Tools. 3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru> 4 * http://ugene.net 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19 * MA 02110-1301, USA. 20 */ 21 22 #ifndef _U2_BWA_TASK_H_ 23 #define _U2_BWA_TASK_H_ 24 25 #include <U2Algorithm/DnaAssemblyTask.h> 26 27 #include <U2Core/ExternalToolRunTask.h> 28 29 namespace U2 { 30 31 class MultiTask; 32 33 class BwaBuildIndexTask : public ExternalToolSupportTask { 34 Q_OBJECT 35 public: 36 BwaBuildIndexTask(const QString &referencePath, const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings); 37 38 void prepare(); 39 40 private: 41 class LogParser : public ExternalToolLogParser { 42 public: 43 LogParser(); 44 45 void parseOutput(const QString &partOfLog); 46 void parseErrOutput(const QString &partOfLog); 47 }; 48 49 QString referencePath; 50 QString indexPath; 51 DnaAssemblyToRefTaskSettings settings; 52 }; 53 54 class BwaAlignTask : public ExternalToolSupportTask { 55 Q_OBJECT 56 public: 57 BwaAlignTask(const QString &indexPath, const QList<ShortReadSet> &shortReadSets, const QString &resultPath, const DnaAssemblyToRefTaskSettings &settings); 58 void prepare(); 59 60 class LogParser : public ExternalToolLogParser { 61 public: 62 LogParser(); 63 void parseOutput(const QString &partOfLog); 64 void parseErrOutput(const QString &partOfLog); 65 }; 66 67 protected slots: 68 QList<Task *> onSubTaskFinished(Task *subTask); 69 70 private: 71 QList<ShortReadSet> downStreamList; 72 QList<ShortReadSet> upStreamList; 73 MultiTask *samMultiTask; 74 QStringList urlsToMerge; 75 MultiTask *alignMultiTask; 76 Task *mergeTask; 77 QString indexPath; 78 QList<ShortReadSet> readSets; 79 QString resultPath; 80 DnaAssemblyToRefTaskSettings settings; 81 inline QString getSAIPath(const QString &pathToReads); 82 }; 83 84 class BwaSwAlignTask : public ExternalToolSupportTask { 85 Q_OBJECT 86 public: 87 BwaSwAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings); 88 void prepare(); 89 90 private: 91 const QString indexPath; 92 DnaAssemblyToRefTaskSettings settings; 93 }; 94 95 class BwaMemAlignTask : public ExternalToolSupportTask { 96 Q_OBJECT 97 public: 98 BwaMemAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings); 99 void prepare(); 100 101 protected slots: 102 QList<Task *> onSubTaskFinished(Task *subTask); 103 104 private: 105 MultiTask *alignMultiTask; 106 Task *mergeTask; 107 const QString indexPath; 108 QString resultPath; 109 QStringList bamUrlstoMerge; 110 DnaAssemblyToRefTaskSettings settings; 111 }; 112 113 class BwaTask : public DnaAssemblyToReferenceTask { 114 Q_OBJECT 115 DNA_ASSEMBLEY_TO_REF_TASK_FACTORY(BwaTask) 116 public: 117 BwaTask(const DnaAssemblyToRefTaskSettings &settings, bool justBuildIndex = false); 118 119 void prepare(); 120 ReportResult report(); 121 protected slots: 122 QList<Task *> onSubTaskFinished(Task *subTask); 123 124 public: 125 static const QString OPTION_INDEX_ALGORITHM; 126 static const QString OPTION_N; 127 static const QString OPTION_MAX_GAP_OPENS; 128 static const QString OPTION_MAX_GAP_EXTENSIONS; 129 static const QString OPTION_INDEL_OFFSET; 130 static const QString OPTION_MAX_LONG_DELETION_EXTENSIONS; 131 static const QString OPTION_SEED_LENGTH; 132 static const QString OPTION_MAX_SEED_DIFFERENCES; 133 static const QString OPTION_MAX_QUEUE_ENTRIES; 134 static const QString OPTION_BEST_HITS; 135 static const QString OPTION_QUALITY_THRESHOLD; 136 static const QString OPTION_BARCODE_LENGTH; 137 static const QString OPTION_LONG_SCALED_GAP_PENALTY_FOR_LONG_DELETIONS; 138 static const QString OPTION_NON_ITERATIVE_MODE; 139 static const QString OPTION_SW_ALIGNMENT; 140 static const QString OPTION_MEM_ALIGNMENT; 141 static const QString OPTION_MASK_LEVEL; 142 static const QString OPTION_CHUNK_SIZE; 143 static const QString OPTION_Z_BEST; 144 static const QString OPTION_REV_ALGN_THRESHOLD; 145 static const QString OPTION_PREFER_HARD_CLIPPING; 146 static const QString ALGORITHM_BWA_ALN, ALGORITHM_BWA_SW, ALGORITHM_BWA_MEM; 147 148 //bwa-mem 149 static const QString OPTION_THREADS; 150 static const QString OPTION_MIN_SEED; 151 static const QString OPTION_BAND_WIDTH; 152 static const QString OPTION_DROPOFF; 153 static const QString OPTION_INTERNAL_SEED_LOOKUP; 154 static const QString OPTION_SKIP_SEED_THRESHOLD; 155 static const QString OPTION_DROP_CHAINS_THRESHOLD; 156 static const QString OPTION_MAX_MATE_RESCUES; 157 static const QString OPTION_SKIP_MATE_RESCUES; 158 static const QString OPTION_SKIP_PAIRING; 159 static const QString OPTION_MATCH_SCORE; 160 static const QString OPTION_MISMATCH_PENALTY; 161 static const QString OPTION_GAP_OPEN_PENALTY; 162 static const QString OPTION_GAP_EXTENSION_PENALTY; 163 static const QString OPTION_CLIPPING_PENALTY; 164 static const QString OPTION_UNPAIRED_PENALTY; 165 static const QString OPTION_SCORE_THRESHOLD; 166 167 static const QStringList indexSuffixes; 168 169 /* 170 -k INT minimum seed length [19] 171 -w INT band width for banded alignment [100] 172 -d INT off-diagonal X-dropoff [100] 173 -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] 174 -c INT skip seeds with more than INT occurrences [10000] 175 -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] 176 -m INT perform at most INT rounds of mate rescues for each read [100] 177 -S skip mate rescue 178 -P skip pairing; mate rescue performed unless -S also in use 179 -A INT score for a sequence match [1] 180 -B INT penalty for a mismatch [4] 181 -O INT gap open penalty [6] 182 -E INT gap extension penalty; a gap of size k cost {-O} + {-E}*k [1] 183 -L INT penalty for clipping [5] 184 -U INT penalty for an unpaired read pair [17] 185 186 Input/output options: 187 188 -p first query file consists of interleaved paired-end sequences 189 -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null] 190 191 -v INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3] 192 -T INT minimum score to output [30] 193 -a output all alignments for SE or unpaired PE 194 -C append FASTA/FASTQ comment to SAM output 195 -M mark shorter split hits as secondary (for Picard/GATK compatibility) 196 */ 197 198 private: 199 BwaBuildIndexTask *buildIndexTask; 200 ExternalToolSupportTask *alignTask; 201 }; 202 203 class BwaTaskFactory : public DnaAssemblyToRefTaskFactory { 204 public: 205 DnaAssemblyToReferenceTask *createTaskInstance(const DnaAssemblyToRefTaskSettings &settings, bool justBuildIndex = false); 206 207 protected: 208 }; 209 210 } // namespace U2 211 212 #endif // _U2_BWA_TASK_H_ 213