1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifdef SW2_BUILD_WITH_CUDA
23 #    include <cuda_runtime.h>
24 #endif
25 
26 #include <QMap>
27 #include <QMutexLocker>
28 #include <QVariant>
29 
30 #include <U2Algorithm/CudaGpuRegistry.h>
31 #include <U2Algorithm/SmithWatermanResult.h>
32 #include <U2Algorithm/SubstMatrixRegistry.h>
33 
34 #include <U2Core/AppContext.h>
35 #include <U2Core/AppResources.h>
36 #include <U2Core/AppSettings.h>
37 #include <U2Core/Counter.h>
38 #include <U2Core/Log.h>
39 #include <U2Core/Timer.h>
40 #include <U2Core/U2DbiUtils.h>
41 #include <U2Core/U2OpStatusUtils.h>
42 #include <U2Core/U2SafePoints.h>
43 #include <U2Core/U2SequenceDbi.h>
44 
45 #include "SWAlgorithmTask.h"
46 #include "SmithWatermanAlgorithmCUDA.h"
47 #include "SmithWatermanAlgorithmOPENCL.h"
48 #include "SmithWatermanAlgorithmSSE2.h"
49 #include "sw_cuda_cpp.h"
50 
51 using namespace std;
52 
53 const double B_TO_MB_FACTOR = 1048576.0;
54 
55 namespace U2 {
56 
57 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_GAP_OPEN("SW_gapOpen");
58 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_GAP_EXTD("SW_gapExtd");
59 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_SCORING_MATRIX_NAME("SW_scoringMatrix");
60 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_REALIZATION_NAME("SW_realizationName");
61 const QString PairwiseAlignmentSmithWatermanTaskSettings::PA_SW_DEFAULT_RESULT_FILTER("filter-intersections");
62 
SWAlgorithmTask(const SmithWatermanSettings & s,const QString & taskName,SW_AlgType _algType)63 SWAlgorithmTask::SWAlgorithmTask(const SmithWatermanSettings &s,
64                                  const QString &taskName,
65                                  SW_AlgType _algType)
66     : Task(taskName, TaskFlag_NoRun),
67       sWatermanConfig(s) {
68     GCOUNTER(cvar, "SWAlgorithmTask");
69 
70     algType = _algType;
71     if (algType == SW_sse2) {
72         if (sWatermanConfig.ptrn.length() < 8) {
73             algType = SW_classic;
74         }
75     }
76 
77     int maxScore = calculateMaxScore(s.ptrn, s.pSm);
78 
79     minScore = (maxScore * s.percentOfScore) / 100;
80     if ((maxScore * (int)s.percentOfScore) % 100 != 0)
81         minScore += 1;
82 
83     // acquiring resources for GPU computations
84     if (SW_cuda == algType) {
85         addTaskResource(TaskResourceUsage(RESOURCE_CUDA_GPU, 1, true /*prepareStage*/));
86     } else if (SW_opencl == algType) {
87         addTaskResource(TaskResourceUsage(RESOURCE_OPENCL_GPU, 1, true /*prepareStage*/));
88     }
89 
90     setupTask(maxScore);
91 }
92 
~SWAlgorithmTask()93 SWAlgorithmTask::~SWAlgorithmTask() {
94     delete sWatermanConfig.resultListener;
95     delete sWatermanConfig.resultCallback;
96     // we do not delete resultFilter here, because filters are stored in special registry
97 }
98 
setupTask(int maxScore)99 void SWAlgorithmTask::setupTask(int maxScore) {
100     SequenceWalkerConfig c;
101     c.seq = sWatermanConfig.sqnc.constData();
102     c.seqSize = sWatermanConfig.sqnc.size();
103     c.range = sWatermanConfig.globalRegion;
104     c.complTrans = sWatermanConfig.complTT;
105     c.aminoTrans = sWatermanConfig.aminoTT;
106     c.strandToWalk = sWatermanConfig.strand;
107     algoLog.details(QString("Strand: %1 ").arg(c.strandToWalk));
108 
109     quint64 overlapSize = calculateMatrixLength(sWatermanConfig.sqnc.length(),
110                                                 sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3),
111                                                 sWatermanConfig.gapModel.scoreGapOpen,
112                                                 sWatermanConfig.gapModel.scoreGapExtd,
113                                                 maxScore,
114                                                 minScore);
115 
116     // divide sequence by PARTS_NUMBER parts
117     int idealThreadCount = AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();
118 
119     qint64 partsNumber = 0;
120     double computationMatrixSquare = 0.0;
121 
122     switch (algType) {
123         case SW_sse2:
124             computationMatrixSquare = 1619582300.0;  // this constant is considered to be optimal computation matrix square (square = localSequence.length * pattern.length) for given algorithm realization and the least minimum score value
125             c.nThreads = idealThreadCount * 2.5;
126             break;
127         case SW_classic:
128             computationMatrixSquare = 751948900.29;  // the same as previous
129             c.nThreads = idealThreadCount;
130             break;
131         case SW_cuda:
132         case SW_opencl:
133             computationMatrixSquare = 58484916.67;  // the same as previous
134             c.nThreads = 1;
135             break;
136         default:
137             assert(0);
138     }
139 
140     c.walkCircular = sWatermanConfig.searchCircular;
141     c.walkCircularDistance = c.walkCircular ? sWatermanConfig.ptrn.size() - 1 : 0;
142 
143     partsNumber = static_cast<qint64>((sWatermanConfig.sqnc.size() + c.walkCircularDistance) / (computationMatrixSquare / sWatermanConfig.ptrn.size()) + 1.0);
144     if (partsNumber < c.nThreads) {
145         c.nThreads = partsNumber;
146     }
147 
148     c.chunkSize = (c.seqSize + c.walkCircularDistance + overlapSize * (partsNumber - 1)) / partsNumber;
149     if (c.chunkSize <= overlapSize) {
150         c.chunkSize = overlapSize + 1;
151     }
152     if (c.chunkSize < (quint64)sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3)) {
153         c.chunkSize = sWatermanConfig.ptrn.length() * (sWatermanConfig.aminoTT == nullptr ? 1 : 3);
154     }
155 
156     c.overlapSize = overlapSize;
157 
158     c.lastChunkExtraLen = partsNumber - 1;
159 
160     // acquiring memory resources for computations
161     switch (algType) {
162         case SW_cuda:
163 #ifdef SW2_BUILD_WITH_CUDA
164             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
165                                               SmithWatermanAlgorithmCUDA::estimateNeededRamAmount(sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads), sWatermanConfig.resultView),
166                                               true));
167 #endif
168             break;
169         case SW_opencl:
170 #ifdef SW2_BUILD_WITH_OPENCL
171             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
172                                               SmithWatermanAlgorithmOPENCL::estimateNeededRamAmount(sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads), sWatermanConfig.resultView),
173                                               true));
174 #endif
175             break;
176         case SW_classic:
177             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
178                                               SmithWatermanAlgorithm::estimateNeededRamAmount(sWatermanConfig.gapModel.scoreGapOpen,
179                                                                                               sWatermanConfig.gapModel.scoreGapExtd,
180                                                                                               minScore,
181                                                                                               maxScore,
182                                                                                               sWatermanConfig.ptrn,
183                                                                                               sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads),
184                                                                                               sWatermanConfig.resultView),
185                                               true));
186             break;
187         case SW_sse2:
188             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
189                                               SmithWatermanAlgorithmSSE2::estimateNeededRamAmount(sWatermanConfig.ptrn,
190                                                                                                   sWatermanConfig.sqnc.left(c.chunkSize * c.nThreads),
191                                                                                                   sWatermanConfig.gapModel.scoreGapOpen,
192                                                                                                   sWatermanConfig.gapModel.scoreGapExtd,
193                                                                                                   minScore,
194                                                                                                   maxScore,
195                                                                                                   sWatermanConfig.resultView),
196                                               true));
197             break;
198         default:
199             assert(0);
200     }
201 
202     t = new SequenceWalkerTask(c, this, tr("Smith Waterman2 SequenceWalker"));
203     addSubTask(t);
204 }
205 
prepare()206 void SWAlgorithmTask::prepare() {
207     if (SW_cuda == algType) {
208         cudaGpu = AppContext::getCudaGpuRegistry()->acquireAnyReadyGpu();
209         assert(cudaGpu);
210 #ifdef SW2_BUILD_WITH_CUDA
211         const SequenceWalkerConfig &config = t->getConfig();
212         quint64 needMemBytes = SmithWatermanAlgorithmCUDA::estimateNeededGpuMemory(
213             sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(config.chunkSize * config.nThreads), sWatermanConfig.resultView);
214         quint64 gpuMemBytes = cudaGpu->getGlobalMemorySizeBytes();
215         if (gpuMemBytes < needMemBytes) {
216             stateInfo.setError(tr("Not enough memory on CUDA-enabled device. "
217                                   "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
218                                    .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number(cudaGpu->getId()), QString(cudaGpu->getName())));
219             return;
220         } else {
221             algoLog.details(tr("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on CUDA device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
222         }
223 
224         coreLog.details(QString("GPU model: %1").arg(cudaGpu->getName()));
225 
226         cudaSetDevice(cudaGpu->getId());
227 #else
228         assert(false);
229 #endif
230     } else if (SW_opencl == algType) {
231 #ifdef SW2_BUILD_WITH_OPENCL
232         openClGpu = AppContext::getOpenCLGpuRegistry()->acquireEnabledGpuIfReady();
233         SAFE_POINT(nullptr != openClGpu, "GPU isn't ready, abort.", );
234 
235         const SequenceWalkerConfig &config = t->getConfig();
236         const quint64 needMemBytes = SmithWatermanAlgorithmOPENCL::estimateNeededGpuMemory(
237             sWatermanConfig.pSm, sWatermanConfig.ptrn, sWatermanConfig.sqnc.left(config.chunkSize * config.nThreads));
238         const quint64 gpuMemBytes = openClGpu->getGlobalMemorySizeBytes();
239 
240         if (gpuMemBytes < needMemBytes) {
241             stateInfo.setError(QString("Not enough memory on OpenCL-enabled device. "
242                                        "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
243                                    .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number((qlonglong)openClGpu->getId()), QString(openClGpu->getName())));
244             return;
245         } else {
246             algoLog.details(QString("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on OpenCL device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
247         }
248 
249         coreLog.details(QString("GPU model: %1").arg(openClGpu->getName()));
250 #else
251         assert(0);
252 #endif
253     }
254 }
255 
getResult()256 QList<PairAlignSequences> &SWAlgorithmTask::getResult() {
257     removeResultFromOverlap(pairAlignSequences);
258     SmithWatermanAlgorithm::sortByScore(pairAlignSequences);
259 
260     return pairAlignSequences;
261 }
262 
onRegion(SequenceWalkerSubtask * t,TaskStateInfo & ti)263 void SWAlgorithmTask::onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) {
264     Q_UNUSED(ti);
265 
266     int regionLen = t->getRegionSequenceLen();
267     QByteArray localSeq(t->getRegionSequence(), regionLen);
268 
269     SmithWatermanAlgorithm *sw = nullptr;
270     if (algType == SW_sse2) {
271         sw = new SmithWatermanAlgorithmSSE2;
272     } else if (algType == SW_cuda) {
273 #ifdef SW2_BUILD_WITH_CUDA
274         sw = new SmithWatermanAlgorithmCUDA;
275 #else
276         coreLog.error("CUDA was not enabled in this build");
277         return;
278 #endif  // SW2_BUILD_WITH_CUDA
279     } else if (algType == SW_opencl) {
280 #ifdef SW2_BUILD_WITH_OPENCL
281         sw = new SmithWatermanAlgorithmOPENCL;
282 #else
283         coreLog.error("OPENCL was not enabled in this build");
284         return;
285 #endif  // SW2_BUILD_WITH_OPENCL
286     } else {
287         assert(algType == SW_classic);
288         sw = new SmithWatermanAlgorithm;
289     }
290 
291     // this substitution is needed for the case when annotation are required as result
292     // as well as pattern subsequence
293     const SmithWatermanSettings::SWResultView resultView =
294         (SmithWatermanSettings::ANNOTATIONS == sWatermanConfig.resultView && sWatermanConfig.includePatternContent) ? SmithWatermanSettings::MULTIPLE_ALIGNMENT : sWatermanConfig.resultView;
295 
296     quint64 t1 = GTimer::currentTimeMicros();
297     sw->launch(sWatermanConfig.pSm, sWatermanConfig.ptrn, localSeq, sWatermanConfig.gapModel.scoreGapOpen + sWatermanConfig.gapModel.scoreGapExtd, sWatermanConfig.gapModel.scoreGapExtd, minScore, resultView);
298     QString algName;
299     if (algType == SW_cuda) {
300         algName = "CUDA";
301     } else {
302         algName = "Classic";
303     }
304     QString testName;
305     if (getParentTask() != nullptr) {
306         testName = getParentTask()->getTaskName();
307     } else {
308         testName = "SW alg";
309     }
310     perfLog.details(QString("\n%1 %2 run time is %3\n").arg(testName).arg(algName).arg(GTimer::secsBetween(t1, GTimer::currentTimeMicros())));
311 
312     QList<PairAlignSequences> res = sw->getResults();
313 
314     for (int i = 0; i < res.size(); i++) {
315         res[i].isDNAComplemented = t->isDNAComplemented();
316         res[i].isAminoTranslated = t->isAminoTranslated();
317 
318         if (t->isAminoTranslated()) {
319             res[i].refSubseqInterval.startPos *= 3;
320             res[i].refSubseqInterval.length *= 3;
321         }
322 
323         if (t->isDNAComplemented()) {
324             const U2Region &wr = t->getGlobalRegion();
325             res[i].refSubseqInterval.startPos =
326                 wr.endPos() - res[i].refSubseqInterval.endPos() - sWatermanConfig.globalRegion.startPos;
327         } else {
328             res[i].refSubseqInterval.startPos +=
329                 (t->getGlobalRegion().startPos - sWatermanConfig.globalRegion.startPos);
330         }
331     }
332 
333     addResult(res);
334 
335     /////////////////////
336     delete sw;
337 }
338 
removeResultFromOverlap(QList<PairAlignSequences> & res)339 void SWAlgorithmTask::removeResultFromOverlap(QList<PairAlignSequences> &res) {
340     for (int i = 0; i < res.size() - 1; i++) {
341         for (int j = i + 1; j < res.size(); j++) {
342             if (res.at(i).refSubseqInterval == res.at(j).refSubseqInterval && res.at(i).isDNAComplemented == res.at(j).isDNAComplemented) {
343                 if (res.at(i).score > res.at(j).score) {
344                     res.removeAt(j);
345                     j--;
346                 } else {
347                     res.removeAt(i);
348                     i--;
349                     j = res.size();
350                 }
351             }
352         }
353     }
354 }
355 
addResult(QList<PairAlignSequences> & res)356 void SWAlgorithmTask::addResult(QList<PairAlignSequences> &res) {
357     QMutexLocker ml(&lock);
358     pairAlignSequences += res;
359     pairAlignSequences += res;
360 }
361 
calculateMatrixLength(int searchSeqLen,int patternSeqLen,int gapOpen,int gapExtension,int maxScore,int minScore)362 int SWAlgorithmTask::calculateMatrixLength(int searchSeqLen, int patternSeqLen, int gapOpen, int gapExtension, int maxScore, int minScore) {
363     int matrixLength = 0;
364 
365     int gap = gapOpen;
366     if (gapOpen < gapExtension)
367         gap = gapExtension;
368 
369     matrixLength = patternSeqLen + (maxScore - minScore) / gap * (-1) + 1;
370 
371     if (searchSeqLen + 1 < matrixLength)
372         matrixLength = searchSeqLen + 1;
373 
374     matrixLength += 1;
375 
376     return matrixLength;
377 }
378 
calculateMaxScore(const QByteArray & seq,const SMatrix & substitutionMatrix)379 int SWAlgorithmTask::calculateMaxScore(const QByteArray &seq, const SMatrix &substitutionMatrix) {
380     int maxScore = 0;
381     int max;
382     int substValue = 0;
383 
384     QByteArray alphaChars = substitutionMatrix.getAlphabet()->getAlphabetChars();
385     for (int i = 0; i < seq.length(); i++) {
386         max = 0;
387         for (int j = 0; j < alphaChars.size(); j++) {
388             // TODO: use raw pointers!
389             char c1 = seq.at(i);
390             char c2 = alphaChars.at(j);
391             substValue = substitutionMatrix.getScore(c1, c2);
392             if (max < substValue)
393                 max = substValue;
394         }
395         maxScore += max;
396     }
397     return maxScore;
398 }
399 
report()400 Task::ReportResult SWAlgorithmTask::report() {
401     if (SW_cuda == algType) {
402         cudaGpu->setAcquired(false);
403     } else if (SW_opencl == algType) {
404 #ifdef SW2_BUILD_WITH_OPENCL
405         openClGpu->setAcquired(false);
406 #endif
407     }
408 
409     SmithWatermanResultListener *rl = sWatermanConfig.resultListener;
410     QList<SmithWatermanResult> resultList = rl->getResults();
411 
412     int resultsNum = resultList.size();
413     algoLog.details(tr("%1 results found").arg(resultsNum));
414 
415     if (0 != sWatermanConfig.resultCallback) {
416         SmithWatermanReportCallback *rcb = sWatermanConfig.resultCallback;
417         QString res = rcb->report(resultList);
418         if (!res.isEmpty()) {
419             stateInfo.setError(res);
420         }
421     }
422 
423     return ReportResult_Finished;
424 }
425 
onSubTaskFinished(Task * subTask)426 QList<Task *> SWAlgorithmTask::onSubTaskFinished(Task *subTask) {
427     QList<Task *> res;
428     if (hasError() || isCanceled()) {
429         return res;
430     }
431 
432     if (subTask == t) {
433         res.append(new SWResultsPostprocessingTask(sWatermanConfig, resultList, getResult()));
434     }
435     return res;
436 }
437 
SWResultsPostprocessingTask(SmithWatermanSettings & _sWatermanConfig,QList<SmithWatermanResult> & _resultList,QList<PairAlignSequences> & _resPAS)438 SWResultsPostprocessingTask::SWResultsPostprocessingTask(SmithWatermanSettings &_sWatermanConfig,
439                                                          QList<SmithWatermanResult> &_resultList,
440                                                          QList<PairAlignSequences> &_resPAS)
441     : Task("SWResultsPostprocessing", TaskFlag_None), sWatermanConfig(_sWatermanConfig), resultList(_resultList), resPAS(_resPAS) {
442 }
443 
prepare()444 void SWResultsPostprocessingTask::prepare() {
445 }
446 
run()447 void SWResultsPostprocessingTask::run() {
448     for (QList<PairAlignSequences>::const_iterator i = resPAS.constBegin(); i != resPAS.constEnd(); ++i) {
449         SmithWatermanResult r;
450         r.strand = (*i).isDNAComplemented ? U2Strand::Complementary : U2Strand::Direct;
451         r.trans = (*i).isAminoTranslated;
452 
453         r.refSubseq = (*i).refSubseqInterval;
454         r.refSubseq.startPos += sWatermanConfig.globalRegion.startPos;
455         r.isJoined = false;
456         if ((*i).refSubseqInterval.endPos() > sWatermanConfig.sqnc.size() && sWatermanConfig.searchCircular) {
457             int t = (*i).refSubseqInterval.endPos() - sWatermanConfig.sqnc.size();
458             r.refSubseq.length -= t;
459             r.isJoined = true;
460             r.refJoinedSubseq = U2Region(0, t);
461         }
462         r.ptrnSubseq = (*i).ptrnSubseqInterval;
463         r.score = (*i).score;
464         r.pairAlignment = (*i).pairAlignment;
465 
466         resultList.append(r);
467     }
468 
469     if (sWatermanConfig.resultFilter != 0) {
470         SmithWatermanResultFilter *rf = sWatermanConfig.resultFilter;
471         rf->applyFilter(&resultList);
472     }
473     for (const SmithWatermanResult &r : qAsConst(resultList)) { /* push results after filters */
474         sWatermanConfig.resultListener->pushResult(r);
475     }
476 }
477 
PairwiseAlignmentSmithWatermanTaskSettings(const PairwiseAlignmentTaskSettings & s)478 PairwiseAlignmentSmithWatermanTaskSettings::PairwiseAlignmentSmithWatermanTaskSettings(const PairwiseAlignmentTaskSettings &s)
479     : PairwiseAlignmentTaskSettings(s),
480       reportCallback(nullptr),
481       resultListener(nullptr),
482       resultFilter(nullptr),
483       gapOpen(0),
484       gapExtd(0),
485       percentOfScore(0) {
486 }
487 
~PairwiseAlignmentSmithWatermanTaskSettings()488 PairwiseAlignmentSmithWatermanTaskSettings::~PairwiseAlignmentSmithWatermanTaskSettings() {
489     // all dynamic objects in the world will be destroyed by the task
490 }
491 
convertCustomSettings()492 bool PairwiseAlignmentSmithWatermanTaskSettings::convertCustomSettings() {
493     if ((customSettings.contains(PA_SW_GAP_OPEN) == false) ||
494         (customSettings.contains(PA_SW_GAP_EXTD) == false) ||
495         (customSettings.contains(PA_SW_SCORING_MATRIX_NAME) == false)) {
496         return false;
497     }
498     gapOpen = customSettings.value(PA_SW_GAP_OPEN).toInt();
499     gapExtd = customSettings.value(PA_SW_GAP_EXTD).toInt();
500     sMatrixName = customSettings.value(PA_SW_SCORING_MATRIX_NAME).toString();
501     sMatrix = AppContext::getSubstMatrixRegistry()->getMatrix(sMatrixName);
502     SAFE_POINT(!sMatrix.isEmpty(), "No matrix found", false);
503 
504     PairwiseAlignmentTaskSettings::convertCustomSettings();
505     return true;
506 }
507 
PairwiseAlignmentSmithWatermanTask(PairwiseAlignmentSmithWatermanTaskSettings * _settings,SW_AlgType _algType)508 PairwiseAlignmentSmithWatermanTask::PairwiseAlignmentSmithWatermanTask(PairwiseAlignmentSmithWatermanTaskSettings *_settings, SW_AlgType _algType)
509     : PairwiseAlignmentTask(TaskFlag_NoRun), settings(_settings) {
510     GCOUNTER(cvar, "SWAlgorithmTask");
511 
512     assert(settings != nullptr);
513     bool isValid = settings->convertCustomSettings();
514     assert(isValid == true);
515     Q_UNUSED(isValid);
516 
517     U2OpStatus2Log os;
518     DbiConnection con(settings->msaRef.dbiRef, os);
519     CHECK_OP(os, );
520     U2Sequence sequence = con.dbi->getSequenceDbi()->getSequenceObject(settings->firstSequenceRef.entityId, os);
521     CHECK_OP(os, );
522     first = con.dbi->getSequenceDbi()->getSequenceData(sequence.id, U2Region(0, sequence.length), os);
523     CHECK_OP(os, );
524 
525     sequence = con.dbi->getSequenceDbi()->getSequenceObject(settings->secondSequenceRef.entityId, os);
526     CHECK_OP(os, );
527     second = con.dbi->getSequenceDbi()->getSequenceData(sequence.id, U2Region(0, sequence.length), os);
528     CHECK_OP(os, );
529     con.close(os);
530 
531     sqnc = nullptr;
532     ptrn = nullptr;
533     if (first.length() < second.length()) {
534         sqnc = &second;
535         ptrn = &first;
536     } else {
537         sqnc = &first;
538         ptrn = &second;
539     }
540 
541     algType = _algType;
542     if (algType == SW_sse2) {
543         if (ptrn->length() < 8) {
544             algType = SW_classic;
545             settings->setCustomValue("realizationName", "SW_classic");
546             settings->realizationName = SW_classic;
547         }
548     }
549     SAFE_POINT(!settings->sMatrix.isEmpty(), tr("Substitution matrix is empty"), );
550     maxScore = calculateMaxScore(*ptrn, settings->sMatrix);
551 
552     minScore = (maxScore * settings->percentOfScore) / 100;
553     if ((maxScore * settings->percentOfScore) % 100 != 0) {
554         minScore += 1;
555     }
556 
557     // acquiring resources for GPU computations
558     if (SW_cuda == algType) {
559         addTaskResource(TaskResourceUsage(RESOURCE_CUDA_GPU, 1, true /*prepareStage*/));
560     } else if (SW_opencl == algType) {
561         addTaskResource(TaskResourceUsage(RESOURCE_OPENCL_GPU, 1, true /*prepareStage*/));
562     }
563 
564     setupTask();
565 }
566 
~PairwiseAlignmentSmithWatermanTask()567 PairwiseAlignmentSmithWatermanTask::~PairwiseAlignmentSmithWatermanTask() {
568     delete settings->reportCallback;
569     delete settings->resultListener;
570     // result filter stored in registry, don`t delete it here
571     delete settings;
572 }
573 
onRegion(SequenceWalkerSubtask * t,TaskStateInfo & ti)574 void PairwiseAlignmentSmithWatermanTask::onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) {
575     Q_UNUSED(ti);
576 
577     int regionLen = t->getRegionSequenceLen();
578     QByteArray localSeq(t->getRegionSequence(), regionLen);
579 
580     SmithWatermanAlgorithm *sw = nullptr;
581     if (algType == SW_sse2) {
582         sw = new SmithWatermanAlgorithmSSE2;
583     } else if (algType == SW_cuda) {
584 #ifdef SW2_BUILD_WITH_CUDA
585         sw = new SmithWatermanAlgorithmCUDA;
586 #else
587         coreLog.error("CUDA was not enabled in this build");
588         return;
589 #endif  // SW2_BUILD_WITH_CUDA
590     } else if (algType == SW_opencl) {
591 #ifdef SW2_BUILD_WITH_OPENCL
592         sw = new SmithWatermanAlgorithmOPENCL;
593 #else
594         coreLog.error("OPENCL was not enabled in this build");
595         return;
596 #endif  // SW2_BUILD_WITH_OPENCL
597     } else {
598         assert(algType == SW_classic);
599         sw = new SmithWatermanAlgorithm;
600     }
601 
602     quint64 t1 = GTimer::currentTimeMicros();
603     sw->launch(settings->sMatrix, *ptrn, localSeq, settings->gapOpen + settings->gapExtd, settings->gapExtd, minScore, SmithWatermanSettings::MULTIPLE_ALIGNMENT);
604     QString algName;
605     if (algType == SW_cuda) {
606         algName = "CUDA";
607     } else {
608         algName = "Classic";
609     }
610     QString testName;
611     if (getParentTask() != nullptr) {
612         testName = getParentTask()->getTaskName();
613     } else {
614         testName = "SW alg";
615     }
616     perfLog.details(QString("\n%1 %2 run time is %3\n").arg(testName).arg(algName).arg(GTimer::secsBetween(t1, GTimer::currentTimeMicros())));
617 
618     QList<PairAlignSequences> res = sw->getResults();
619     res = expandResults(res);
620 
621     for (int i = 0; i < res.size(); i++) {
622         res[i].isDNAComplemented = t->isDNAComplemented();
623         res[i].isAminoTranslated = t->isAminoTranslated();
624 
625         if (t->isAminoTranslated()) {
626             res[i].refSubseqInterval.startPos *= 3;
627             res[i].refSubseqInterval.length *= 3;
628         }
629 
630         if (t->isDNAComplemented()) {
631             const U2Region &wr = t->getGlobalRegion();
632             res[i].refSubseqInterval.startPos =
633                 wr.endPos() - res[i].refSubseqInterval.endPos();
634         } else {
635             res[i].refSubseqInterval.startPos +=
636                 (t->getGlobalRegion().startPos);
637         }
638     }
639 
640     addResult(res);
641 
642     /////////////////////
643     delete sw;
644 }
645 
calculateMaxScore(const QByteArray & seq,const SMatrix & substitutionMatrix)646 int PairwiseAlignmentSmithWatermanTask::calculateMaxScore(const QByteArray &seq, const SMatrix &substitutionMatrix) {
647     int maxScore = 0;
648     int max;
649     int substValue = 0;
650 
651     QByteArray alphaChars = substitutionMatrix.getAlphabet()->getAlphabetChars();
652     for (int i = 0; i < seq.length(); i++) {
653         max = 0;
654         for (int j = 0; j < alphaChars.size(); j++) {
655             // TODO: use raw pointers!
656             char c1 = seq.at(i);
657             char c2 = alphaChars.at(j);
658             substValue = substitutionMatrix.getScore(c1, c2);
659             if (max < substValue)
660                 max = substValue;
661         }
662         maxScore += max;
663     }
664     return maxScore;
665 }
666 
setupTask()667 void PairwiseAlignmentSmithWatermanTask::setupTask() {
668     SequenceWalkerConfig c;
669     c.seq = *sqnc;
670     c.seqSize = sqnc->size();
671     c.range = U2Region(0, sqnc->size());
672     c.complTrans = nullptr;
673     c.aminoTrans = nullptr;
674     c.strandToWalk = StrandOption_DirectOnly;
675 
676     quint64 overlapSize = calculateMatrixLength(*sqnc, *ptrn, settings->gapOpen, settings->gapExtd, maxScore, minScore);
677 
678     // divide sequence by PARTS_NUMBER parts
679     int idealThreadCount = AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount();
680 
681     qint64 partsNumber = 0;
682     double computationMatrixSquare = 0.0;
683 
684     switch (algType) {
685         case SW_sse2:
686             computationMatrixSquare = 16195823.0;  // this constant is considered to be optimal computation matrix square (square = localSequence.length * pattern.length) for given algorithm realization and the least minimum score value
687             c.nThreads = idealThreadCount * 2.5;
688             break;
689         case SW_classic:
690             computationMatrixSquare = 7519489.29;  // the same as previous
691             c.nThreads = idealThreadCount;
692             break;
693         case SW_cuda:
694         case SW_opencl:
695             computationMatrixSquare = 58484916.67;  // the same as previous
696             c.nThreads = 1;
697             break;
698         default:
699             assert(0);
700     }
701 
702     partsNumber = static_cast<qint64>(sqnc->size() / (computationMatrixSquare / ptrn->size()) + 1.0);
703     if (partsNumber < c.nThreads) {
704         c.nThreads = partsNumber;
705     }
706 
707     c.chunkSize = (c.seqSize + overlapSize * (partsNumber - 1)) / partsNumber;
708     if (c.chunkSize <= overlapSize) {
709         c.chunkSize = overlapSize + 1;
710     }
711     c.overlapSize = overlapSize;
712 
713     c.lastChunkExtraLen = partsNumber - 1;
714 
715     // acquiring memory resources for computations
716     switch (algType) {
717         case SW_cuda:
718 #ifdef SW2_BUILD_WITH_CUDA
719             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
720                                               SmithWatermanAlgorithmCUDA::estimateNeededRamAmount(settings->sMatrix, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
721                                               true));
722 #endif
723             break;
724         case SW_opencl:
725 #ifdef SW2_BUILD_WITH_OPENCL
726             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
727                                               SmithWatermanAlgorithmOPENCL::estimateNeededRamAmount(settings->sMatrix, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
728                                               true));
729 #endif
730             break;
731         case SW_classic:
732             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
733                                               SmithWatermanAlgorithm::estimateNeededRamAmount(settings->gapOpen, settings->gapExtd, minScore, maxScore, *ptrn, sqnc->left(c.chunkSize * c.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT),
734                                               true));
735             break;
736         case SW_sse2:
737             addTaskResource(TaskResourceUsage(RESOURCE_MEMORY,
738                                               SmithWatermanAlgorithmSSE2::estimateNeededRamAmount(*ptrn,
739                                                                                                   sqnc->left(c.chunkSize * c.nThreads),
740                                                                                                   settings->gapOpen,
741                                                                                                   settings->gapExtd,
742                                                                                                   minScore,
743                                                                                                   maxScore,
744                                                                                                   SmithWatermanSettings::MULTIPLE_ALIGNMENT),
745                                               true));
746             break;
747         default:
748             assert(0);
749     }
750 
751     t = new SequenceWalkerTask(c, this, tr("Smith Waterman2 SequenceWalker"));
752     addSubTask(t);
753 }
754 
calculateMatrixLength(const QByteArray & searchSeq,const QByteArray & patternSeq,int gapOpen,int gapExtension,int maxScore,int minScore)755 int PairwiseAlignmentSmithWatermanTask::calculateMatrixLength(const QByteArray &searchSeq, const QByteArray &patternSeq, int gapOpen, int gapExtension, int maxScore, int minScore) {
756     int matrixLength = 0;
757 
758     int gap = gapOpen;
759     if (gapOpen < gapExtension)
760         gap = gapExtension;
761 
762     matrixLength = patternSeq.length() + (maxScore - minScore) / gap * (-1) + 1;
763 
764     if (searchSeq.length() + 1 < matrixLength)
765         matrixLength = searchSeq.length() + 1;
766 
767     matrixLength += 1;
768 
769     return matrixLength;
770 }
771 
prepare()772 void PairwiseAlignmentSmithWatermanTask::prepare() {
773     if (SW_cuda == algType) {
774         cudaGpu = AppContext::getCudaGpuRegistry()->acquireAnyReadyGpu();
775         assert(cudaGpu);
776 #ifdef SW2_BUILD_WITH_CUDA
777         const SequenceWalkerConfig &config = t->getConfig();
778         quint64 needMemBytes = SmithWatermanAlgorithmCUDA::estimateNeededGpuMemory(
779             settings->sMatrix, *ptrn, sqnc->left(config.chunkSize * config.nThreads), SmithWatermanSettings::MULTIPLE_ALIGNMENT);
780         quint64 gpuMemBytes = cudaGpu->getGlobalMemorySizeBytes();
781         if (gpuMemBytes < needMemBytes) {
782             stateInfo.setError(tr("Not enough memory on CUDA-enabled device. "
783                                   "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
784                                    .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number(cudaGpu->getId()), QString(cudaGpu->getName())));
785             return;
786         } else {
787             algoLog.details(tr("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on CUDA device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
788         }
789 
790         coreLog.details(QString("GPU model: %1").arg(cudaGpu->getName()));
791 
792         cudaSetDevice(cudaGpu->getId());
793 #else
794         assert(false);
795 #endif
796     } else if (SW_opencl == algType) {
797 #ifdef SW2_BUILD_WITH_OPENCL
798         openClGpu = AppContext::getOpenCLGpuRegistry()->acquireEnabledGpuIfReady();
799         SAFE_POINT(nullptr != openClGpu, "GPU isn't ready, abort.", );
800 
801         const SequenceWalkerConfig &config = t->getConfig();
802         const quint64 needMemBytes = SmithWatermanAlgorithmOPENCL::estimateNeededGpuMemory(
803             settings->sMatrix, *ptrn, sqnc->left(config.chunkSize * config.nThreads));
804         const quint64 gpuMemBytes = openClGpu->getGlobalMemorySizeBytes();
805 
806         if (gpuMemBytes < needMemBytes) {
807             stateInfo.setError(QString("Not enough memory on OpenCL-enabled device. "
808                                        "The space required is %1 bytes, but only %2 bytes are available. Device id: %3, device name: %4")
809                                    .arg(QString::number(needMemBytes), QString::number(gpuMemBytes), QString::number((qlonglong)openClGpu->getId()), QString(openClGpu->getName())));
810             return;
811         } else {
812             algoLog.details(QString("The Smith-Waterman search allocates ~%1 bytes (%2 Mb) on OpenCL device").arg(QString::number(needMemBytes), QString::number(needMemBytes / B_TO_MB_FACTOR)));
813         }
814 
815         coreLog.details(QString("GPU model: %1").arg(openClGpu->getName()));
816 #else
817         assert(0);
818 #endif
819     }
820 }
821 
getResult()822 QList<PairAlignSequences> &PairwiseAlignmentSmithWatermanTask::getResult() {
823     removeResultFromOverlap(pairAlignSequences);
824     SmithWatermanAlgorithm::sortByScore(pairAlignSequences);
825 
826     return pairAlignSequences;
827 }
828 
report()829 Task::ReportResult PairwiseAlignmentSmithWatermanTask::report() {
830     if (SW_cuda == algType) {
831         cudaGpu->setAcquired(false);
832     } else if (SW_opencl == algType) {
833 #ifdef SW2_BUILD_WITH_OPENCL
834         openClGpu->setAcquired(false);
835 #endif
836     }
837 
838     assert(settings->resultListener != nullptr);
839     QList<SmithWatermanResult> resultList = settings->resultListener->getResults();
840 
841     int resultsNum = resultList.size();
842     algoLog.details(tr("%1 results found").arg(resultsNum));
843 
844     if (0 != settings->reportCallback) {
845         QString res = settings->reportCallback->report(resultList);
846         if (!res.isEmpty()) {
847             stateInfo.setError(res);
848         }
849     }
850 
851     return ReportResult_Finished;
852 }
853 
addResult(QList<PairAlignSequences> & res)854 void PairwiseAlignmentSmithWatermanTask::addResult(QList<PairAlignSequences> &res) {
855     QMutexLocker ml(&lock);
856     pairAlignSequences += res;
857 }
858 
onSubTaskFinished(Task * subTask)859 QList<Task *> PairwiseAlignmentSmithWatermanTask::onSubTaskFinished(Task *subTask) {
860     QList<Task *> res;
861     if (hasError() || isCanceled()) {
862         return res;
863     }
864 
865     if (subTask == t) {
866         res.append(new PairwiseAlignmentSWResultsPostprocessingTask(settings->resultFilter, settings->resultListener, resultList, getResult()));
867     }
868     return res;
869 }
870 
removeResultFromOverlap(QList<PairAlignSequences> & res)871 void PairwiseAlignmentSmithWatermanTask::removeResultFromOverlap(QList<PairAlignSequences> &res) {
872     for (int i = 0; i < res.size() - 1; i++) {
873         for (int j = i + 1; j < res.size(); j++) {
874             if (res.at(i).refSubseqInterval == res.at(j).refSubseqInterval && res.at(i).isDNAComplemented == res.at(j).isDNAComplemented /*&&
875                     res.at(i).pairAlignment == res.at(j).pairAlignment*/
876             ) {
877                 if (res.at(i).score > res.at(j).score) {
878                     res.removeAt(j);
879                     j--;
880                 } else {
881                     res.removeAt(i);
882                     i--;
883                     j = res.size();
884                 }
885             }
886         }
887     }
888 }
889 
expandResults(QList<PairAlignSequences> & res)890 QList<PairAlignSequences> PairwiseAlignmentSmithWatermanTask::expandResults(QList<PairAlignSequences> &res) {
891     bool resIsEmpty = false;
892     if (res.size() == 0) {
893         PairAlignSequences p;
894         p.refSubseqInterval.startPos = sqnc->length();
895         p.refSubseqInterval.length = 0;
896         p.ptrnSubseqInterval.startPos = ptrn->length();
897         p.ptrnSubseqInterval.length = 0;
898         res += p;
899         resIsEmpty = true;
900     }
901     for (int i = 0; i < res.size(); i++) {
902         if (res[i].ptrnSubseqInterval.length < ptrn->length() && res[i].refSubseqInterval.length < sqnc->length() &&
903             resIsEmpty == false) {
904             for (int j = 0; j < res[i].ptrnSubseqInterval.startPos && j < res[i].refSubseqInterval.startPos;) {
905                 res[i].pairAlignment.append(PairAlignSequences::DIAG);
906                 res[i].ptrnSubseqInterval.startPos--;
907                 res[i].ptrnSubseqInterval.length++;
908                 res[i].refSubseqInterval.startPos--;
909                 res[i].refSubseqInterval.length++;
910             }
911             for (int j = ptrn->length(); j > res[i].ptrnSubseqInterval.endPos() && j > res[i].refSubseqInterval.endPos();) {
912                 res[i].pairAlignment.prepend(PairAlignSequences::DIAG);
913                 res[i].ptrnSubseqInterval.length++;
914                 res[i].refSubseqInterval.length++;
915             }
916         }
917         if (res[i].ptrnSubseqInterval.length < ptrn->length()) {
918             for (int j = 0; j < res[i].ptrnSubseqInterval.startPos;) {
919                 res[i].pairAlignment.append(PairAlignSequences::LEFT);
920                 res[i].ptrnSubseqInterval.startPos--;
921                 res[i].ptrnSubseqInterval.length++;
922             }
923             for (int j = ptrn->length(); j > res[i].ptrnSubseqInterval.endPos();) {
924                 res[i].pairAlignment.prepend(PairAlignSequences::LEFT);
925                 res[i].ptrnSubseqInterval.length++;
926             }
927         }
928         if (res[i].refSubseqInterval.length < sqnc->length()) {
929             for (int j = 0; j < res[i].refSubseqInterval.startPos;) {
930                 res[i].pairAlignment.append(PairAlignSequences::UP);
931                 res[i].refSubseqInterval.startPos--;
932                 res[i].refSubseqInterval.length++;
933             }
934             for (int j = sqnc->length(); j > res[i].refSubseqInterval.endPos();) {
935                 res[i].pairAlignment.prepend(PairAlignSequences::UP);
936                 res[i].refSubseqInterval.length++;
937             }
938         }
939     }
940     return res;
941 }
942 
PairwiseAlignmentSWResultsPostprocessingTask(SmithWatermanResultFilter * _rf,SmithWatermanResultListener * _rl,QList<SmithWatermanResult> & _resultList,QList<PairAlignSequences> & _resPAS)943 PairwiseAlignmentSWResultsPostprocessingTask::PairwiseAlignmentSWResultsPostprocessingTask(SmithWatermanResultFilter *_rf,
944                                                                                            SmithWatermanResultListener *_rl,
945                                                                                            QList<SmithWatermanResult> &_resultList,
946                                                                                            QList<PairAlignSequences> &_resPAS)
947     : Task("PairwiseAlignmentSWResultsPostprocessing", TaskFlag_None), rf(_rf), rl(_rl), resultList(_resultList), resPAS(_resPAS) {
948 }
949 
run()950 void PairwiseAlignmentSWResultsPostprocessingTask::run() {
951     for (QList<PairAlignSequences>::const_iterator i = resPAS.constBegin(); i != resPAS.constEnd(); ++i) {
952         SmithWatermanResult r;
953         r.strand = (*i).isDNAComplemented ? U2Strand::Complementary : U2Strand::Direct;
954         r.trans = (*i).isAminoTranslated;
955         r.refSubseq = (*i).refSubseqInterval;
956         r.refSubseq.startPos = 0;
957         r.ptrnSubseq = (*i).ptrnSubseqInterval;
958         r.score = (*i).score;
959         r.pairAlignment = (*i).pairAlignment;
960 
961         resultList.append(r);
962     }
963 
964     if (rf != 0) {
965         rf->applyFilter(&resultList);
966     }
967     foreach (const SmithWatermanResult &r, resultList) { /* push results after filters */
968         rl->pushResult(r);
969     }
970 }
971 
prepare()972 void PairwiseAlignmentSWResultsPostprocessingTask::prepare() {
973 }
974 
975 }  // namespace U2
976