1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #ifdef SW2_BUILD_WITH_CUDA
23 # include <cuda_runtime.h>
24 #endif
25
26 #include <QMap>
27 #include <QMutexLocker>
28 #include <QVariant>
29
30 #include <U2Algorithm/CudaGpuRegistry.h>
31 #include <U2Algorithm/SmithWatermanResult.h>
32 #include <U2Algorithm/SubstMatrixRegistry.h>
33
34 #include <U2Core/AppContext.h>
35 #include <U2Core/AppResources.h>
36 #include <U2Core/AppSettings.h>
37 #include <U2Core/Counter.h>
38 #include <U2Core/Log.h>
39 #include <U2Core/Timer.h>
40 #include <U2Core/U2DbiUtils.h>
41 #include <U2Core/U2OpStatusUtils.h>
42 #include <U2Core/U2SafePoints.h>
43 #include <U2Core/U2SequenceDbi.h>
44
45 #include "SWAlgorithmTask.h"
46 #include "SmithWatermanAlgorithmCUDA.h"
47 #include "SmithWatermanAlgorithmOPENCL.h"
48 #include "SmithWatermanAlgorithmSSE2.h"
49 #include "sw_cuda_cpp.h"
50
51 using namespace std;
52
53 const double B_TO_MB_FACTOR = 1048576.0;
54
55 namespace U2 {
56
57 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_GAP_OPEN("SW_gapOpen");
58 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_GAP_EXTD("SW_gapExtd");
59 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_SCORING_MATRIX_NAME("SW_scoringMatrix");
60 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_REALIZATION_NAME("SW_realizationName");
61 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_DEFAULT_RESULT_FILTER("filter-intersections");
62
SWAlgorithmTask(const SmithWatermanSettings & s,const QString & taskName,SW_AlgType _algType)63 SWAlgorithmTask::SWAlgorithmTask(const SmithWatermanSettings &s,
64 const QString &taskName,
65 SW_AlgType _algType)
66 : Task(taskName, TaskFlag_NoRun),
67 sWatermanConfig(s) {
68 GCOUNTER(cvar, "SWAlgorithmTask");
69
70 algType = _algType;
71 if (algType == SW_sse2) {
72 if (sWatermanConfig.ptrn.length() < 8) {
73 algType = SW_classic;
74 }
75 }
76
77 int maxScore = calculateMaxScore(s.ptrn, s.pSm);
78
79 minScore = (maxScore * s.percentOfScore) / 100;
80 if ((maxScore * (int)s.percentOfScore) % 100 != 0)
81 minScore += 1;
82
83 // acquiring resources for GPU computations
84 if (SW_cuda == algType) {
85 addTaskResource(TaskResourceUsage(RESOURCE_CUDA_GPU, 1, true /*prepareStage*/));
86 } else if (SW_opencl == algType) {
87 addTaskResource(TaskResourceUsage(RESOURCE_OPENCL_GPU, 1, true /*prepareStage*/));
88 }
89
90 setupTask(maxScore);
91 }
92
~SWAlgorithmTask()93 SWAlgorithmTask::~SWAlgorithmTask() {
94 delete sWatermanConfig.resultListener;
95 delete sWatermanConfig.resultCallback;
96 // we do not delete resultFilter here, because filters are stored in special registry
97 }
98
setupTask(int maxScore)99 void SWAlgorithmTask::setupTask(int maxScore) {
100 SequenceWalkerConfig c;
101 c.seq = sWatermanConfig.sqnc.constData();
102 c.seqSize = sWatermanConfig.sqnc.size();
103 c.range = sWatermanConfig.globalRegion;
104 c.complTrans = sWatermanConfig.complTT;
105 c.aminoTrans = sWatermanConfig.aminoTT;
106 c.strandToWalk = sWatermanConfig.strand;
107 algoLog.details(QString("Strand: %1 ").arg(c.strandToWalk));
108
109 quint64 overlapSize = calculateMatrixLength(sWatermanConfig.sqnc.length(),
110 sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3),
111 sWatermanConfig.gapModel.scoreGapOpen,
112 sWatermanConfig.gapModel.scoreGapExtd,
113 maxScore,
114 minScore);
115
116 // divide sequence by PARTS_NUMBER parts
117 int idealThreadCount = AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();
118
119 qint64 partsNumber = 0;
120 double computationMatrixSquare = 0.0;
121
122 switch (algType) {
123 case SW_sse2:
124 computationMatrixSquare = 1619582300.0; // this constant is considered to be optimal computation matrix square (square = localSequence.length * pattern.length) for given algorithm realization and the least minimum score value
125 c.nThreads = idealThreadCount * 2.5;
126 break;
127 case SW_classic:
128 computationMatrixSquare = 751948900.29; // the same as previous
129 c.nThreads = idealThreadCount;
130 break;
131 case SW_cuda:
132 case SW_opencl:
133 computationMatrixSquare = 58484916.67; // the same as previous
134 c.nThreads = 1;
135 break;
136 default:
137 assert(0);
138 }
139
140 c.walkCircular = sWatermanConfig.searchCircular;
141 c.walkCircularDistance = c.walkCircular ? sWatermanConfig.ptrn.size() - 1 : 0;
142
143 partsNumber = static_cast<qint64>((sWatermanConfig.sqnc.size() + c.walkCircularDistance) / (computationMatrixSquare / sWatermanConfig.ptrn.size()) + 1.0);
144 if (partsNumber < c.nThreads) {
145 c.nThreads = partsNumber;
146 }
147
148 c.chunkSize = (c.seqSize + c.walkCircularDistance + overlapSize * (partsNumber - 1)) / partsNumber;
149 if (c.chunkSize <= overlapSize) {
150 c.chunkSize = overlapSize + 1;
151 }
152 if (c.chunkSize < (quint64)sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3)) {
153 c.chunkSize = sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3);
154 }
155
156 c.overlapSize = overlapSize;
157
158 c.lastChunkExtraLen = partsNumber - 1;
159
160 // acquiring memory resources for computations
161 switch (algType) {
162 case SW_cuda:
163 #ifdef SW2_BUILD_WITH_CUDA
164 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
165 SmithWatermanAlgorithmCUDA::estimateNeededRamAmount(sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads), sWatermanConfig.resultView),
166 true));
167 #endif
168 break;
169 case SW_opencl:
170 #ifdef SW2_BUILD_WITH_OPENCL
171 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
172 SmithWatermanAlgorithmOPENCL::estimateNeededRamAmount(sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads), sWatermanConfig.resultView),
173 true));
174 #endif
175 break;
176 case SW_classic:
177 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
178 SmithWatermanAlgorithm::estimateNeededRamAmount(sWatermanConfig.gapModel.scoreGapOpen,
179 sWatermanConfig.gapModel.scoreGapExtd,
180 minScore,
181 maxScore,
182 sWatermanConfig.ptrn,
183 sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads),
184 sWatermanConfig.resultView),
185 true));
186 break;
187 case SW_sse2:
188 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
189 SmithWatermanAlgorithmSSE2::estimateNeededRamAmount(sWatermanConfig.ptrn,
190 sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads),
191 sWatermanConfig.gapModel.scoreGapOpen,
192 sWatermanConfig.gapModel.scoreGapExtd,
193 minScore,
194 maxScore,
195 sWatermanConfig.resultView),
196 true));
197 break;
198 default:
199 assert(0);
200 }
201
202 t = new SequenceWalkerTask(c, this, tr("Smith Waterman2 SequenceWalker"));
203 addSubTask(t);
204 }
205
prepare()206 void SWAlgorithmTask::prepare() {
207 if (SW_cuda == algType) {
208 cudaGpu = AppContext::getCudaGpuRegistry()->acquireAnyReadyGpu();
209 assert(cudaGpu);
210 #ifdef SW2_BUILD_WITH_CUDA
211 const SequenceWalkerConfig &config = t->getConfig();
212 quint64 needMemBytes = SmithWatermanAlgorithmCUDA::estimateNeededGpuMemory(
213 sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(config.chunkSize * config.nThreads), sWatermanConfig.resultView);
214 quint64 gpuMemBytes = cudaGpu->getGlobalMemorySizeBytes();
215 if (gpuMemBytes < needMemBytes) {
216 stateInfo.setError(tr("Not enough memory on CUDA-enabled device. "
217 "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
218 .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number(cudaGpu->getId()), QString(cudaGpu->getName())));
219 return;
220 } else {
221 algoLog.details(tr("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on CUDA device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
222 }
223
224 coreLog.details(QString("GPU model: %1").arg(cudaGpu->getName()));
225
226 cudaSetDevice(cudaGpu->getId());
227 #else
228 assert(false);
229 #endif
230 } else if (SW_opencl == algType) {
231 #ifdef SW2_BUILD_WITH_OPENCL
232 openClGpu = AppContext::getOpenCLGpuRegistry()->acquireEnabledGpuIfReady();
233 SAFE_POINT(nullptr != openClGpu, "GPU isn't ready, abort.", );
234
235 const SequenceWalkerConfig &config = t->getConfig();
236 const quint64 needMemBytes = SmithWatermanAlgorithmOPENCL::estimateNeededGpuMemory(
237 sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(config.chunkSize * config.nThreads));
238 const quint64 gpuMemBytes = openClGpu->getGlobalMemorySizeBytes();
239
240 if (gpuMemBytes < needMemBytes) {
241 stateInfo.setError(QString("Not enough memory on OpenCL-enabled device. "
242 "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
243 .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number((qlonglong)openClGpu->getId()), QString(openClGpu->getName())));
244 return;
245 } else {
246 algoLog.details(QString("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on OpenCL device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
247 }
248
249 coreLog.details(QString("GPU model: %1").arg(openClGpu->getName()));
250 #else
251 assert(0);
252 #endif
253 }
254 }
255
getResult()256 QList<PairAlignSequences> &SWAlgorithmTask::getResult() {
257 removeResultFromOverlap(pairAlignSequences);
258 SmithWatermanAlgorithm::sortByScore(pairAlignSequences);
259
260 return pairAlignSequences;
261 }
262
onRegion(SequenceWalkerSubtask * t,TaskStateInfo & ti)263 void SWAlgorithmTask::onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) {
264 Q_UNUSED(ti);
265
266 int regionLen = t->getRegionSequenceLen();
267 QByteArray localSeq(t->getRegionSequence(), regionLen);
268
269 SmithWatermanAlgorithm *sw = nullptr;
270 if (algType == SW_sse2) {
271 sw = new SmithWatermanAlgorithmSSE2;
272 } else if (algType == SW_cuda) {
273 #ifdef SW2_BUILD_WITH_CUDA
274 sw = new SmithWatermanAlgorithmCUDA;
275 #else
276 coreLog.error("CUDA was not enabled in this build");
277 return;
278 #endif // SW2_BUILD_WITH_CUDA
279 } else if (algType == SW_opencl) {
280 #ifdef SW2_BUILD_WITH_OPENCL
281 sw = new SmithWatermanAlgorithmOPENCL;
282 #else
283 coreLog.error("OPENCL was not enabled in this build");
284 return;
285 #endif // SW2_BUILD_WITH_OPENCL
286 } else {
287 assert(algType == SW_classic);
288 sw = new SmithWatermanAlgorithm;
289 }
290
291 // this substitution is needed for the case when annotation are required as result
292 // as well as pattern subsequence
293 const SmithWatermanSettings::SWResultView resultView =
294 (SmithWatermanSettings::ANNOTATIONS == sWatermanConfig.resultView && sWatermanConfig.includePatternContent) ? SmithWatermanSettings::MULTIPLE_ALIGNMENT : sWatermanConfig.resultView;
295
296 quint64 t1 = GTimer::currentTimeMicros();
297 sw->launch(sWatermanConfig.pSm, sWatermanConfig.ptrn, localSeq, sWatermanConfig.gapModel.scoreGapOpen + sWatermanConfig.gapModel.scoreGapExtd, sWatermanConfig.gapModel.scoreGapExtd, minScore, resultView);
298 QString algName;
299 if (algType == SW_cuda) {
300 algName = "CUDA";
301 } else {
302 algName = "Classic";
303 }
304 QString testName;
305 if (getParentTask() != nullptr) {
306 testName = getParentTask()->getTaskName();
307 } else {
308 testName = "SW alg";
309 }
310 perfLog.details(QString("\n%1 %2 run time is %3\n").arg(testName).arg(algName).arg(GTimer::secsBetween(t1, GTimer::currentTimeMicros())));
311
312 QList<PairAlignSequences> res = sw->getResults();
313
314 for (int i = 0; i < res.size(); i++) {
315 res[i].isDNAComplemented = t->isDNAComplemented();
316 res[i].isAminoTranslated = t->isAminoTranslated();
317
318 if (t->isAminoTranslated()) {
319 res[i].refSubseqInterval.startPos *= 3;
320 res[i].refSubseqInterval.length *= 3;
321 }
322
323 if (t->isDNAComplemented()) {
324 const U2Region &wr = t->getGlobalRegion();
325 res[i].refSubseqInterval.startPos =
326 wr.endPos() - res[i].refSubseqInterval.endPos() - sWatermanConfig.globalRegion.startPos;
327 } else {
328 res[i].refSubseqInterval.startPos +=
329 (t->getGlobalRegion().startPos - sWatermanConfig.globalRegion.startPos);
330 }
331 }
332
333 addResult(res);
334
335 /////////////////////
336 delete sw;
337 }
338
removeResultFromOverlap(QList<PairAlignSequences> & res)339 void SWAlgorithmTask::removeResultFromOverlap(QList<PairAlignSequences> &res) {
340 for (int i = 0; i < res.size() - 1; i++) {
341 for (int j = i + 1; j < res.size(); j++) {
342 if (res.at(i).refSubseqInterval == res.at(j).refSubseqInterval && res.at(i).isDNAComplemented == res.at(j).isDNAComplemented) {
343 if (res.at(i).score > res.at(j).score) {
344 res.removeAt(j);
345 j--;
346 } else {
347 res.removeAt(i);
348 i--;
349 j = res.size();
350 }
351 }
352 }
353 }
354 }
355
addResult(QList<PairAlignSequences> & res)356 void SWAlgorithmTask::addResult(QList<PairAlignSequences> &res) {
357 QMutexLocker ml(&lock);
358 pairAlignSequences += res;
359 pairAlignSequences += res;
360 }
361
calculateMatrixLength(int searchSeqLen,int patternSeqLen,int gapOpen,int gapExtension,int maxScore,int minScore)362 int SWAlgorithmTask::calculateMatrixLength(int searchSeqLen, int patternSeqLen, int gapOpen, int gapExtension, int maxScore, int minScore) {
363 int matrixLength = 0;
364
365 int gap = gapOpen;
366 if (gapOpen < gapExtension)
367 gap = gapExtension;
368
369 matrixLength = patternSeqLen + (maxScore - minScore) / gap * (-1) + 1;
370
371 if (searchSeqLen + 1 < matrixLength)
372 matrixLength = searchSeqLen + 1;
373
374 matrixLength += 1;
375
376 return matrixLength;
377 }
378
calculateMaxScore(const QByteArray & seq,const SMatrix & substitutionMatrix)379 int SWAlgorithmTask::calculateMaxScore(const QByteArray &seq, const SMatrix &substitutionMatrix) {
380 int maxScore = 0;
381 int max;
382 int substValue = 0;
383
384 QByteArray alphaChars = substitutionMatrix.getAlphabet()->getAlphabetChars();
385 for (int i = 0; i < seq.length(); i++) {
386 max = 0;
387 for (int j = 0; j < alphaChars.size(); j++) {
388 // TODO: use raw pointers!
389 char c1 = seq.at(i);
390 char c2 = alphaChars.at(j);
391 substValue = substitutionMatrix.getScore(c1, c2);
392 if (max < substValue)
393 max = substValue;
394 }
395 maxScore += max;
396 }
397 return maxScore;
398 }
399
report()400 Task::ReportResult SWAlgorithmTask::report() {
401 if (SW_cuda == algType) {
402 cudaGpu->setAcquired(false);
403 } else if (SW_opencl == algType) {
404 #ifdef SW2_BUILD_WITH_OPENCL
405 openClGpu->setAcquired(false);
406 #endif
407 }
408
409 SmithWatermanResultListener *rl = sWatermanConfig.resultListener;
410 QList<SmithWatermanResult> resultList = rl->getResults();
411
412 int resultsNum = resultList.size();
413 algoLog.details(tr("%1 results found").arg(resultsNum));
414
415 if (0 != sWatermanConfig.resultCallback) {
416 SmithWatermanReportCallback *rcb = sWatermanConfig.resultCallback;
417 QString res = rcb->report(resultList);
418 if (!res.isEmpty()) {
419 stateInfo.setError(res);
420 }
421 }
422
423 return ReportResult_Finished;
424 }
425
onSubTaskFinished(Task * subTask)426 QList<Task *> SWAlgorithmTask::onSubTaskFinished(Task *subTask) {
427 QList<Task *> res;
428 if (hasError() || isCanceled()) {
429 return res;
430 }
431
432 if (subTask == t) {
433 res.append(new SWResultsPostprocessingTask(sWatermanConfig, resultList, getResult()));
434 }
435 return res;
436 }
437
SWResultsPostprocessingTask(SmithWatermanSettings & _sWatermanConfig,QList<SmithWatermanResult> & _resultList,QList<PairAlignSequences> & _resPAS)438 SWResultsPostprocessingTask::SWResultsPostprocessingTask(SmithWatermanSettings &_sWatermanConfig,
439 QList<SmithWatermanResult> &_resultList,
440 QList<PairAlignSequences> &_resPAS)
441 : Task("SWResultsPostprocessing", TaskFlag_None), sWatermanConfig(_sWatermanConfig), resultList(_resultList), resPAS(_resPAS) {
442 }
443
prepare()444 void SWResultsPostprocessingTask::prepare() {
445 }
446
run()447 void SWResultsPostprocessingTask::run() {
448 for (QList<PairAlignSequences>::const_iterator i = resPAS.constBegin(); i != resPAS.constEnd(); ++i) {
449 SmithWatermanResult r;
450 r.strand = (*i).isDNAComplemented ? U2Strand::Complementary : U2Strand::Direct;
451 r.trans = (*i).isAminoTranslated;
452
453 r.refSubseq = (*i).refSubseqInterval;
454 r.refSubseq.startPos += sWatermanConfig.globalRegion.startPos;
455 r.isJoined = false;
456 if ((*i).refSubseqInterval.endPos() > sWatermanConfig.sqnc.size() && sWatermanConfig.searchCircular) {
457 int t = (*i).refSubseqInterval.endPos() - sWatermanConfig.sqnc.size();
458 r.refSubseq.length -= t;
459 r.isJoined = true;
460 r.refJoinedSubseq = U2Region(0, t);
461 }
462 r.ptrnSubseq = (*i).ptrnSubseqInterval;
463 r.score = (*i).score;
464 r.pairAlignment = (*i).pairAlignment;
465
466 resultList.append(r);
467 }
468
469 if (sWatermanConfig.resultFilter != 0) {
470 SmithWatermanResultFilter *rf = sWatermanConfig.resultFilter;
471 rf->applyFilter(&resultList);
472 }
473 for (const SmithWatermanResult &r : qAsConst(resultList)) { /* push results after filters */
474 sWatermanConfig.resultListener->pushResult(r);
475 }
476 }
477
PairwiseAlignmentSmithWatermanTaskSettings(const PairwiseAlignmentTaskSettings & s)478 PairwiseAlignmentSmithWatermanTaskSettings::PairwiseAlignmentSmithWatermanTaskSettings(const PairwiseAlignmentTaskSettings &s)
479 : PairwiseAlignmentTaskSettings(s),
480 reportCallback(nullptr),
481 resultListener(nullptr),
482 resultFilter(nullptr),
483 gapOpen(0),
484 gapExtd(0),
485 percentOfScore(0) {
486 }
487
~PairwiseAlignmentSmithWatermanTaskSettings()488 PairwiseAlignmentSmithWatermanTaskSettings::~PairwiseAlignmentSmithWatermanTaskSettings() {
489 // all dynamic objects in the world will be destroyed by the task
490 }
491
convertCustomSettings()492 bool PairwiseAlignmentSmithWatermanTaskSettings::convertCustomSettings() {
493 if ((customSettings.contains(PA_SW_GAP_OPEN) == false) ||
494 (customSettings.contains(PA_SW_GAP_EXTD) == false) ||
495 (customSettings.contains(PA_SW_SCORING_MATRIX_NAME) == false)) {
496 return false;
497 }
498 gapOpen = customSettings.value(PA_SW_GAP_OPEN).toInt();
499 gapExtd = customSettings.value(PA_SW_GAP_EXTD).toInt();
500 sMatrixName = customSettings.value(PA_SW_SCORING_MATRIX_NAME).toString();
501 sMatrix = AppContext::getSubstMatrixRegistry()->getMatrix(sMatrixName);
502 SAFE_POINT(!sMatrix.isEmpty(), "No matrix found", false);
503
504 PairwiseAlignmentTaskSettings::convertCustomSettings();
505 return true;
506 }
507
PairwiseAlignmentSmithWatermanTask(PairwiseAlignmentSmithWatermanTaskSettings * _settings,SW_AlgType _algType)508 PairwiseAlignmentSmithWatermanTask::PairwiseAlignmentSmithWatermanTask(PairwiseAlignmentSmithWatermanTaskSettings *_settings, SW_AlgType _algType)
509 : PairwiseAlignmentTask(TaskFlag_NoRun), settings(_settings) {
510 GCOUNTER(cvar, "SWAlgorithmTask");
511
512 assert(settings != nullptr);
513 bool isValid = settings->convertCustomSettings();
514 assert(isValid == true);
515 Q_UNUSED(isValid);
516
517 U2OpStatus2Log os;
518 DbiConnection con(settings->msaRef.dbiRef, os);
519 CHECK_OP(os, );
520 U2Sequence sequence = con.dbi->getSequenceDbi()->getSequenceObject(settings->firstSequenceRef.entityId, os);
521 CHECK_OP(os, );
522 first = con.dbi->getSequenceDbi()->getSequenceData(sequence.id, U2Region(0, sequence.length), os);
523 CHECK_OP(os, );
524
525 sequence = con.dbi->getSequenceDbi()->getSequenceObject(settings->secondSequenceRef.entityId, os);
526 CHECK_OP(os, );
527 second = con.dbi->getSequenceDbi()->getSequenceData(sequence.id, U2Region(0, sequence.length), os);
528 CHECK_OP(os, );
529 con.close(os);
530
531 sqnc = nullptr;
532 ptrn = nullptr;
533 if (first.length() < second.length()) {
534 sqnc = &second;
535 ptrn = &first;
536 } else {
537 sqnc = &first;
538 ptrn = &second;
539 }
540
541 algType = _algType;
542 if (algType == SW_sse2) {
543 if (ptrn->length() < 8) {
544 algType = SW_classic;
545 settings->setCustomValue("realizationName", "SW_classic");
546 settings->realizationName = SW_classic;
547 }
548 }
549 SAFE_POINT(!settings->sMatrix.isEmpty(), tr("Substitution matrix is empty"), );
550 maxScore = calculateMaxScore(*ptrn, settings->sMatrix);
551
552 minScore = (maxScore * settings->percentOfScore) / 100;
553 if ((maxScore * settings->percentOfScore) % 100 != 0) {
554 minScore += 1;
555 }
556
557 // acquiring resources for GPU computations
558 if (SW_cuda == algType) {
559 addTaskResource(TaskResourceUsage(RESOURCE_CUDA_GPU, 1, true /*prepareStage*/));
560 } else if (SW_opencl == algType) {
561 addTaskResource(TaskResourceUsage(RESOURCE_OPENCL_GPU, 1, true /*prepareStage*/));
562 }
563
564 setupTask();
565 }
566
~PairwiseAlignmentSmithWatermanTask()567 PairwiseAlignmentSmithWatermanTask::~PairwiseAlignmentSmithWatermanTask() {
568 delete settings->reportCallback;
569 delete settings->resultListener;
570 // result filter stored in registry, don`t delete it here
571 delete settings;
572 }
573
onRegion(SequenceWalkerSubtask * t,TaskStateInfo & ti)574 void PairwiseAlignmentSmithWatermanTask::onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) {
575 Q_UNUSED(ti);
576
577 int regionLen = t->getRegionSequenceLen();
578 QByteArray localSeq(t->getRegionSequence(), regionLen);
579
580 SmithWatermanAlgorithm *sw = nullptr;
581 if (algType == SW_sse2) {
582 sw = new SmithWatermanAlgorithmSSE2;
583 } else if (algType == SW_cuda) {
584 #ifdef SW2_BUILD_WITH_CUDA
585 sw = new SmithWatermanAlgorithmCUDA;
586 #else
587 coreLog.error("CUDA was not enabled in this build");
588 return;
589 #endif // SW2_BUILD_WITH_CUDA
590 } else if (algType == SW_opencl) {
591 #ifdef SW2_BUILD_WITH_OPENCL
592 sw = new SmithWatermanAlgorithmOPENCL;
593 #else
594 coreLog.error("OPENCL was not enabled in this build");
595 return;
596 #endif // SW2_BUILD_WITH_OPENCL
597 } else {
598 assert(algType == SW_classic);
599 sw = new SmithWatermanAlgorithm;
600 }
601
602 quint64 t1 = GTimer::currentTimeMicros();
603 sw->launch(settings->sMatrix, *ptrn, localSeq, settings->gapOpen + settings->gapExtd, settings->gapExtd, minScore, SmithWatermanSettings::MULTIPLE_ALIGNMENT);
604 QString algName;
605 if (algType == SW_cuda) {
606 algName = "CUDA";
607 } else {
608 algName = "Classic";
609 }
610 QString testName;
611 if (getParentTask() != nullptr) {
612 testName = getParentTask()->getTaskName();
613 } else {
614 testName = "SW alg";
615 }
616 perfLog.details(QString("\n%1 %2 run time is %3\n").arg(testName).arg(algName).arg(GTimer::secsBetween(t1, GTimer::currentTimeMicros())));
617
618 QList<PairAlignSequences> res = sw->getResults();
619 res = expandResults(res);
620
621 for (int i = 0; i < res.size(); i++) {
622 res[i].isDNAComplemented = t->isDNAComplemented();
623 res[i].isAminoTranslated = t->isAminoTranslated();
624
625 if (t->isAminoTranslated()) {
626 res[i].refSubseqInterval.startPos *= 3;
627 res[i].refSubseqInterval.length *= 3;
628 }
629
630 if (t->isDNAComplemented()) {
631 const U2Region &wr = t->getGlobalRegion();
632 res[i].refSubseqInterval.startPos =
633 wr.endPos() - res[i].refSubseqInterval.endPos();
634 } else {
635 res[i].refSubseqInterval.startPos +=
636 (t->getGlobalRegion().startPos);
637 }
638 }
639
640 addResult(res);
641
642 /////////////////////
643 delete sw;
644 }
645
calculateMaxScore(const QByteArray & seq,const SMatrix & substitutionMatrix)646 int PairwiseAlignmentSmithWatermanTask::calculateMaxScore(const QByteArray &seq, const SMatrix &substitutionMatrix) {
647 int maxScore = 0;
648 int max;
649 int substValue = 0;
650
651 QByteArray alphaChars = substitutionMatrix.getAlphabet()->getAlphabetChars();
652 for (int i = 0; i < seq.length(); i++) {
653 max = 0;
654 for (int j = 0; j < alphaChars.size(); j++) {
655 // TODO: use raw pointers!
656 char c1 = seq.at(i);
657 char c2 = alphaChars.at(j);
658 substValue = substitutionMatrix.getScore(c1, c2);
659 if (max < substValue)
660 max = substValue;
661 }
662 maxScore += max;
663 }
664 return maxScore;
665 }
666
setupTask()667 void PairwiseAlignmentSmithWatermanTask::setupTask() {
668 SequenceWalkerConfig c;
669 c.seq = *sqnc;
670 c.seqSize = sqnc->size();
671 c.range = U2Region(0, sqnc->size());
672 c.complTrans = nullptr;
673 c.aminoTrans = nullptr;
674 c.strandToWalk = StrandOption_DirectOnly;
675
676 quint64 overlapSize = calculateMatrixLength(*sqnc, *ptrn, settings->gapOpen, settings->gapExtd, maxScore, minScore);
677
678 // divide sequence by PARTS_NUMBER parts
679 int idealThreadCount = AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();
680
681 qint64 partsNumber = 0;
682 double computationMatrixSquare = 0.0;
683
684 switch (algType) {
685 case SW_sse2:
686 computationMatrixSquare = 16195823.0; // this constant is considered to be optimal computation matrix square (square = localSequence.length * pattern.length) for given algorithm realization and the least minimum score value
687 c.nThreads = idealThreadCount * 2.5;
688 break;
689 case SW_classic:
690 computationMatrixSquare = 7519489.29; // the same as previous
691 c.nThreads = idealThreadCount;
692 break;
693 case SW_cuda:
694 case SW_opencl:
695 computationMatrixSquare = 58484916.67; // the same as previous
696 c.nThreads = 1;
697 break;
698 default:
699 assert(0);
700 }
701
702 partsNumber = static_cast<qint64>(sqnc->size() / (computationMatrixSquare / ptrn->size()) + 1.0);
703 if (partsNumber < c.nThreads) {
704 c.nThreads = partsNumber;
705 }
706
707 c.chunkSize = (c.seqSize + overlapSize * (partsNumber - 1)) / partsNumber;
708 if (c.chunkSize <= overlapSize) {
709 c.chunkSize = overlapSize + 1;
710 }
711 c.overlapSize = overlapSize;
712
713 c.lastChunkExtraLen = partsNumber - 1;
714
715 // acquiring memory resources for computations
716 switch (algType) {
717 case SW_cuda:
718 #ifdef SW2_BUILD_WITH_CUDA
719 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
720 SmithWatermanAlgorithmCUDA::estimateNeededRamAmount(settings->sMatrix, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
721 true));
722 #endif
723 break;
724 case SW_opencl:
725 #ifdef SW2_BUILD_WITH_OPENCL
726 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
727 SmithWatermanAlgorithmOPENCL::estimateNeededRamAmount(settings->sMatrix, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
728 true));
729 #endif
730 break;
731 case SW_classic:
732 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
733 SmithWatermanAlgorithm::estimateNeededRamAmount(settings->gapOpen, settings->gapExtd, minScore, maxScore, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
734 true));
735 break;
736 case SW_sse2:
737 addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
738 SmithWatermanAlgorithmSSE2::estimateNeededRamAmount(*ptrn,
739 sqnc->left(c.chunkSize * c.nThreads),
740 settings->gapOpen,
741 settings->gapExtd,
742 minScore,
743 maxScore,
744 SmithWatermanSettings::MULTIPLE_ALIGNMENT),
745 true));
746 break;
747 default:
748 assert(0);
749 }
750
751 t = new SequenceWalkerTask(c, this, tr("Smith Waterman2 SequenceWalker"));
752 addSubTask(t);
753 }
754
calculateMatrixLength(const QByteArray & searchSeq,const QByteArray & patternSeq,int gapOpen,int gapExtension,int maxScore,int minScore)755 int PairwiseAlignmentSmithWatermanTask::calculateMatrixLength(const QByteArray &searchSeq, const QByteArray &patternSeq, int gapOpen, int gapExtension, int maxScore, int minScore) {
756 int matrixLength = 0;
757
758 int gap = gapOpen;
759 if (gapOpen < gapExtension)
760 gap = gapExtension;
761
762 matrixLength = patternSeq.length() + (maxScore - minScore) / gap * (-1) + 1;
763
764 if (searchSeq.length() + 1 < matrixLength)
765 matrixLength = searchSeq.length() + 1;
766
767 matrixLength += 1;
768
769 return matrixLength;
770 }
771
prepare()772 void PairwiseAlignmentSmithWatermanTask::prepare() {
773 if (SW_cuda == algType) {
774 cudaGpu = AppContext::getCudaGpuRegistry()->acquireAnyReadyGpu();
775 assert(cudaGpu);
776 #ifdef SW2_BUILD_WITH_CUDA
777 const SequenceWalkerConfig &config = t->getConfig();
778 quint64 needMemBytes = SmithWatermanAlgorithmCUDA::estimateNeededGpuMemory(
779 settings->sMatrix, *ptrn, sqnc->left(config.chunkSize * config.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT);
780 quint64 gpuMemBytes = cudaGpu->getGlobalMemorySizeBytes();
781 if (gpuMemBytes < needMemBytes) {
782 stateInfo.setError(tr("Not enough memory on CUDA-enabled device. "
783 "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
784 .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number(cudaGpu->getId()), QString(cudaGpu->getName())));
785 return;
786 } else {
787 algoLog.details(tr("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on CUDA device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
788 }
789
790 coreLog.details(QString("GPU model: %1").arg(cudaGpu->getName()));
791
792 cudaSetDevice(cudaGpu->getId());
793 #else
794 assert(false);
795 #endif
796 } else if (SW_opencl == algType) {
797 #ifdef SW2_BUILD_WITH_OPENCL
798 openClGpu = AppContext::getOpenCLGpuRegistry()->acquireEnabledGpuIfReady();
799 SAFE_POINT(nullptr != openClGpu, "GPU isn't ready, abort.", );
800
801 const SequenceWalkerConfig &config = t->getConfig();
802 const quint64 needMemBytes = SmithWatermanAlgorithmOPENCL::estimateNeededGpuMemory(
803 settings->sMatrix, *ptrn, sqnc->left(config.chunkSize * config.nThreads));
804 const quint64 gpuMemBytes = openClGpu->getGlobalMemorySizeBytes();
805
806 if (gpuMemBytes < needMemBytes) {
807 stateInfo.setError(QString("Not enough memory on OpenCL-enabled device. "
808 "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
809 .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number((qlonglong)openClGpu->getId()), QString(openClGpu->getName())));
810 return;
811 } else {
812 algoLog.details(QString("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on OpenCL device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
813 }
814
815 coreLog.details(QString("GPU model: %1").arg(openClGpu->getName()));
816 #else
817 assert(0);
818 #endif
819 }
820 }
821
getResult()822 QList<PairAlignSequences> &PairwiseAlignmentSmithWatermanTask::getResult() {
823 removeResultFromOverlap(pairAlignSequences);
824 SmithWatermanAlgorithm::sortByScore(pairAlignSequences);
825
826 return pairAlignSequences;
827 }
828
report()829 Task::ReportResult PairwiseAlignmentSmithWatermanTask::report() {
830 if (SW_cuda == algType) {
831 cudaGpu->setAcquired(false);
832 } else if (SW_opencl == algType) {
833 #ifdef SW2_BUILD_WITH_OPENCL
834 openClGpu->setAcquired(false);
835 #endif
836 }
837
838 assert(settings->resultListener != nullptr);
839 QList<SmithWatermanResult> resultList = settings->resultListener->getResults();
840
841 int resultsNum = resultList.size();
842 algoLog.details(tr("%1 results found").arg(resultsNum));
843
844 if (0 != settings->reportCallback) {
845 QString res = settings->reportCallback->report(resultList);
846 if (!res.isEmpty()) {
847 stateInfo.setError(res);
848 }
849 }
850
851 return ReportResult_Finished;
852 }
853
addResult(QList<PairAlignSequences> & res)854 void PairwiseAlignmentSmithWatermanTask::addResult(QList<PairAlignSequences> &res) {
855 QMutexLocker ml(&lock);
856 pairAlignSequences += res;
857 }
858
onSubTaskFinished(Task * subTask)859 QList<Task *> PairwiseAlignmentSmithWatermanTask::onSubTaskFinished(Task *subTask) {
860 QList<Task *> res;
861 if (hasError() || isCanceled()) {
862 return res;
863 }
864
865 if (subTask == t) {
866 res.append(new PairwiseAlignmentSWResultsPostprocessingTask(settings->resultFilter, settings->resultListener, resultList, getResult()));
867 }
868 return res;
869 }
870
removeResultFromOverlap(QList<PairAlignSequences> & res)871 void PairwiseAlignmentSmithWatermanTask::removeResultFromOverlap(QList<PairAlignSequences> &res) {
872 for (int i = 0; i < res.size() - 1; i++) {
873 for (int j = i + 1; j < res.size(); j++) {
874 if (res.at(i).refSubseqInterval == res.at(j).refSubseqInterval && res.at(i).isDNAComplemented == res.at(j).isDNAComplemented /*&&
875 res.at(i).pairAlignment == res.at(j).pairAlignment*/
876 ) {
877 if (res.at(i).score > res.at(j).score) {
878 res.removeAt(j);
879 j--;
880 } else {
881 res.removeAt(i);
882 i--;
883 j = res.size();
884 }
885 }
886 }
887 }
888 }
889
expandResults(QList<PairAlignSequences> & res)890 QList<PairAlignSequences> PairwiseAlignmentSmithWatermanTask::expandResults(QList<PairAlignSequences> &res) {
891 bool resIsEmpty = false;
892 if (res.size() == 0) {
893 PairAlignSequences p;
894 p.refSubseqInterval.startPos = sqnc->length();
895 p.refSubseqInterval.length = 0;
896 p.ptrnSubseqInterval.startPos = ptrn->length();
897 p.ptrnSubseqInterval.length = 0;
898 res += p;
899 resIsEmpty = true;
900 }
901 for (int i = 0; i < res.size(); i++) {
902 if (res[i].ptrnSubseqInterval.length < ptrn->length() && res[i].refSubseqInterval.length < sqnc->length() &&
903 resIsEmpty == false) {
904 for (int j = 0; j < res[i].ptrnSubseqInterval.startPos && j < res[i].refSubseqInterval.startPos;) {
905 res[i].pairAlignment.append(PairAlignSequences::DIAG);
906 res[i].ptrnSubseqInterval.startPos--;
907 res[i].ptrnSubseqInterval.length++;
908 res[i].refSubseqInterval.startPos--;
909 res[i].refSubseqInterval.length++;
910 }
911 for (int j = ptrn->length(); j > res[i].ptrnSubseqInterval.endPos() && j > res[i].refSubseqInterval.endPos();) {
912 res[i].pairAlignment.prepend(PairAlignSequences::DIAG);
913 res[i].ptrnSubseqInterval.length++;
914 res[i].refSubseqInterval.length++;
915 }
916 }
917 if (res[i].ptrnSubseqInterval.length < ptrn->length()) {
918 for (int j = 0; j < res[i].ptrnSubseqInterval.startPos;) {
919 res[i].pairAlignment.append(PairAlignSequences::LEFT);
920 res[i].ptrnSubseqInterval.startPos--;
921 res[i].ptrnSubseqInterval.length++;
922 }
923 for (int j = ptrn->length(); j > res[i].ptrnSubseqInterval.endPos();) {
924 res[i].pairAlignment.prepend(PairAlignSequences::LEFT);
925 res[i].ptrnSubseqInterval.length++;
926 }
927 }
928 if (res[i].refSubseqInterval.length < sqnc->length()) {
929 for (int j = 0; j < res[i].refSubseqInterval.startPos;) {
930 res[i].pairAlignment.append(PairAlignSequences::UP);
931 res[i].refSubseqInterval.startPos--;
932 res[i].refSubseqInterval.length++;
933 }
934 for (int j = sqnc->length(); j > res[i].refSubseqInterval.endPos();) {
935 res[i].pairAlignment.prepend(PairAlignSequences::UP);
936 res[i].refSubseqInterval.length++;
937 }
938 }
939 }
940 return res;
941 }
942
PairwiseAlignmentSWResultsPostprocessingTask(SmithWatermanResultFilter * _rf,SmithWatermanResultListener * _rl,QList<SmithWatermanResult> & _resultList,QList<PairAlignSequences> & _resPAS)943 PairwiseAlignmentSWResultsPostprocessingTask::PairwiseAlignmentSWResultsPostprocessingTask(SmithWatermanResultFilter *_rf,
944 SmithWatermanResultListener *_rl,
945 QList<SmithWatermanResult> &_resultList,
946 QList<PairAlignSequences> &_resPAS)
947 : Task("PairwiseAlignmentSWResultsPostprocessing", TaskFlag_None), rf(_rf), rl(_rl), resultList(_resultList), resPAS(_resPAS) {
948 }
949
run()950 void PairwiseAlignmentSWResultsPostprocessingTask::run() {
951 for (QList<PairAlignSequences>::const_iterator i = resPAS.constBegin(); i != resPAS.constEnd(); ++i) {
952 SmithWatermanResult r;
953 r.strand = (*i).isDNAComplemented ? U2Strand::Complementary : U2Strand::Direct;
954 r.trans = (*i).isAminoTranslated;
955 r.refSubseq = (*i).refSubseqInterval;
956 r.refSubseq.startPos = 0;
957 r.ptrnSubseq = (*i).ptrnSubseqInterval;
958 r.score = (*i).score;
959 r.pairAlignment = (*i).pairAlignment;
960
961 resultList.append(r);
962 }
963
964 if (rf != 0) {
965 rf->applyFilter(&resultList);
966 }
967 foreach (const SmithWatermanResult &r, resultList) { /* push results after filters */
968 rl->pushResult(r);
969 }
970 }
971
prepare()972 void PairwiseAlignmentSWResultsPostprocessingTask::prepare() {
973 }
974
975 } // namespace U2
976