1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include <QDir>
23 #include <QtMath>
24 
25 #include <U2Core/AppContext.h>
26 #include <U2Core/BaseDocumentFormats.h>
27 #include <U2Core/DNASequenceObject.h>
28 #include <U2Core/DocumentModel.h>
29 #include <U2Core/GObjectTypes.h>
30 #include <U2Core/IOAdapter.h>
31 #include <U2Core/IOAdapterUtils.h>
32 #include <U2Core/LoadDocumentTask.h>
33 #include <U2Core/Log.h>
34 #include <U2Core/MultipleSequenceAlignmentImporter.h>
35 #include <U2Core/MultipleSequenceAlignmentObject.h>
36 #include <U2Core/SaveDocumentTask.h>
37 #include <U2Core/U2OpStatusUtils.h>
38 #include <U2Core/U2SafePoints.h>
39 
40 #include "umuscleTests.h"
41 #include "MuscleConstants.h"
42 #include "MuscleTask.h"
43 #include "MuscleParallel.h"
44 
45 /* TRANSLATOR U2::GTest*/
46 
47 namespace U2 {
48 
49 extern double QScore(const MultipleSequenceAlignment& maTest, const MultipleSequenceAlignment& maRef, TaskStateInfo& ti);
50 
51 #define OUT_FILE_NAME_ATTR "out"
52 #define IN_FILE_NAME_ATTR "in"
53 #define QSCORE_ATTR "qscr"
54 #define QSCORE_DELTA_ATTR "dqscr"
55 #define INDEX_ATTR "index"
56 #define DOC1_ATTR "doc1"
57 #define DOC2_ATTR "doc2"
58 #define IN_DIR_ATTR "indir"
59 #define PAT_DIR_ATTR "refdir"
60 #define PARALLEL_FLAG_ATTR "parallel"
61 #define MAX_ITERS_ATTR "maxiters"
62 #define REFINE_ONLY_ATTR "refine"
63 #define REGION_ATTR "region"
64 #define STABLE_ATTR "stable"
65 #define ENV_MUSCLE_N_THREADS "MUSCLE_N_THREADS"
66 #define MACHINE_PATH "MACHINE"
67 
68 struct GTestBoolProperty {
getU2::GTestBoolProperty69     static bool get(QString attr, bool &value, const QDomElement &el) {
70         QString value_str = el.attribute(attr);
71         if(!value_str.isEmpty()) {
72             bool ok = false;
73             value = value_str.toInt(&ok);
74             return ok;
75         }
76         return true;
77     }
78 };
79 
80 #define GET_BOOL_PROP(ATTR,VAL) if(GTestBoolProperty::get((ATTR),(VAL),(el))==false) {\
81     failMissingValue((ATTR));\
82     return;}
83 
init(XMLTestFormat *,const QDomElement & el)84 void GTest_uMuscle::init(XMLTestFormat *, const QDomElement& el) {
85     ctxAdded = false;
86     ma_result = nullptr;
87     refineOnly = false;
88     maxIters = -1;
89     alignRegion = false;
90     stable = false;
91 
92     inputDocCtxName = el.attribute(IN_FILE_NAME_ATTR);
93     if (inputDocCtxName.isEmpty()) {
94         failMissingValue(IN_FILE_NAME_ATTR);
95         return;
96     }
97 
98     QString refineOnly_str = el.attribute(REFINE_ONLY_ATTR);
99     if (!refineOnly_str.isEmpty()) {
100         bool ok = false;
101         refineOnly = refineOnly_str.toInt(&ok);
102         if (!ok) {
103             failMissingValue(REFINE_ONLY_ATTR);
104             return;
105         }
106     }
107 
108     QString maxIters_str = el.attribute(MAX_ITERS_ATTR);
109     if(!maxIters_str.isEmpty()) {
110         bool ok = false;
111         maxIters = maxIters_str.toInt(&ok);
112         if (!ok) {
113             failMissingValue(MAX_ITERS_ATTR);
114             return;
115         }
116     }
117 
118     QString region_str = el.attribute(REGION_ATTR);
119     if(!region_str.isEmpty()) {
120         QRegExp rx("([0123456789]+)..([0123456789]+)");
121         if(rx.indexIn(region_str, 0) != -1) {
122             bool ok1,ok2;
123             int start = rx.cap(1).toInt(&ok1) - 1;
124             int end = rx.cap(2).toInt(&ok2);
125             if (ok1 && ok2) {
126                 region = U2Region(start, end - start);
127                 alignRegion = true;
128             } else {
129                 failMissingValue(REGION_ATTR);
130             }
131         } else {
132             failMissingValue(REGION_ATTR);
133         }
134     }
135 
136     GET_BOOL_PROP(STABLE_ATTR,stable)
137 
138     resultCtxName = el.attribute(INDEX_ATTR);
139 }
140 
prepare()141 void GTest_uMuscle::prepare() {
142     mTask = nullptr;
143     ma_result = nullptr;
144 
145     doc = getContext<Document>(this, inputDocCtxName);
146     if (doc == nullptr) {
147         stateInfo.setError(  QString("context not found %1").arg(inputDocCtxName) );
148         return;
149     }
150 
151     QList<GObject*> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
152     if (list.size() == 0) {
153         stateInfo.setError(  QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT) );
154         return;
155     }
156 
157     GObject *obj = list.first();
158     if(obj==nullptr){
159         stateInfo.setError(  QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT) );
160         return;
161     }
162     assert(obj!=nullptr);
163     MultipleSequenceAlignmentObject* ma = qobject_cast<MultipleSequenceAlignmentObject*>(obj);
164     if(ma==nullptr){
165         stateInfo.setError(  QString("error can't cast to multiple alignment from GObject") );
166         return;
167     }
168 
169     MuscleTaskSettings s;
170     bool ok = false;
171     s.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
172     if(!ok) {
173         stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
174         return;
175     }
176     if(maxIters != -1) {
177         s.maxIterations = maxIters;
178     }
179     if(refineOnly) {
180         s.op = MuscleTaskOp_Refine;
181     }
182     s.stableMode = stable; //default mode is 'group' like in MUSCLE
183     s.alignRegion = alignRegion;
184     if(alignRegion) {
185         s.regionToAlign = region;
186     }
187     ma_result = ma;
188     mTask = new MuscleGObjectTask(ma_result,s);
189     addSubTask(mTask);
190 }
191 
report()192 Task::ReportResult GTest_uMuscle::report() {
193     if (!hasError()) {
194         if(mTask->hasError()) {
195             stateInfo.setError(  mTask->getError() );
196             return ReportResult_Finished;
197         }
198         if(!resultCtxName.isEmpty()) {
199             ctxAdded = true;
200             addContext(resultCtxName, ma_result);
201         }
202     }
203     return ReportResult_Finished;
204 }
205 
cleanup()206 void GTest_uMuscle::cleanup() {
207     //if(ma_result!=NULL)
208     //    delete ma_result;
209     if(ctxAdded) {
210         removeContext(resultCtxName);
211     }
212 
213     XmlTest::cleanup();
214 }
215 
init(XMLTestFormat *,const QDomElement & el)216 void GTest_CompareMAlignment::init(XMLTestFormat *, const QDomElement& el) {
217     doc1CtxName = el.attribute(DOC1_ATTR);
218     if (doc1CtxName.isEmpty()) {
219         failMissingValue(DOC1_ATTR);
220         return;
221     }
222     doc2CtxName = el.attribute(DOC2_ATTR);
223     if (doc2CtxName.isEmpty()) {
224         failMissingValue(DOC2_ATTR);
225         return;
226     }
227 }
228 
report()229 Task::ReportResult GTest_CompareMAlignment::report() {
230     Document* doc1 = getContext<Document>(this, doc1CtxName);
231     if (doc1 == nullptr) {
232         stateInfo.setError(  QString("document not found %1").arg(doc1CtxName) );
233         return ReportResult_Finished;
234     }
235     Document* doc2 = getContext<Document>(this, doc2CtxName);
236     if (doc2 == nullptr) {
237         stateInfo.setError(  QString("document not found %1").arg(doc2CtxName) );
238         return ReportResult_Finished;
239     }
240 
241     QList<GObject*> objs1 = doc1->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
242     QList<GObject*> objs2 = doc2->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
243 
244     if(objs1.size()!=objs2.size()) {
245         stateInfo.setError(  QString("MAlignmentObjects count not matched %1, expected %2").arg(objs1.size()).arg(objs2.size()) );
246         return ReportResult_Finished;
247     }
248 
249     int listSize = objs1.size();
250     for (int i=0;i<listSize;i++) {
251         MultipleSequenceAlignmentObject* ma1 = qobject_cast<MultipleSequenceAlignmentObject*>(objs1.at(i));
252         MultipleSequenceAlignmentObject* ma2 = qobject_cast<MultipleSequenceAlignmentObject*>(objs2.at(i));
253         const QList<MultipleSequenceAlignmentRow> alignedSeqs1 = ma1->getMsa()->getMsaRows();
254         const QList<MultipleSequenceAlignmentRow> alignedSeqs2 = ma2->getMsa()->getMsaRows();
255         if(ma1->objectName()!=ma2->objectName()) {
256             stateInfo.setError(  QString("MAlignmentObjects name not matched \"%1\", expected \"%2\"").arg(ma1->objectName()).arg(ma2->objectName()) );
257             return ReportResult_Finished;
258         }
259         foreach(const MultipleSequenceAlignmentRow &maItem1, alignedSeqs1) {
260             bool nameFound = false;
261             foreach(const MultipleSequenceAlignmentRow &maItem2, alignedSeqs2) {
262                 if (maItem1->getName() == maItem2->getName()) {
263                     nameFound = true;
264                     int l1 = maItem1->getCoreEnd();
265                     int l2 = maItem2->getCoreEnd();
266                     if(l1!=l2) {
267                         stateInfo.setError(  QString("Aligned sequences \"%1\" length not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(l1).arg(l2) );
268                         return ReportResult_Finished;
269                     }
270                     if (*maItem1 != *maItem2) {
271                         stateInfo.setError(  QString("Aligned sequences \"%1\" not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(QString(maItem1->getCore())).arg(QString(maItem2->getCore())) );
272                         return ReportResult_Finished;
273                     }
274                 }
275             }
276             if (!nameFound) {
277                 stateInfo.setError(  QString("aligned sequence not found \"%1\"").arg(maItem1->getName()) );
278             }
279 
280         }
281 
282     }
283     return ReportResult_Finished;
284 }
285 
init(XMLTestFormat *,const QDomElement & el)286 void GTest_uMuscleAddUnalignedSequenceToProfile::init(XMLTestFormat */*tf*/, const QDomElement& el) {
287     origAliSeqs = 0;
288     aliObj = nullptr;
289     resultAliSeqs = 0;
290     aliDocName = el.attribute("ali-doc");
291     if (aliDocName.isEmpty()) {
292         stateInfo.setError(  QString("value not set %1").arg("ali-doc") );
293         return;
294     }
295     seqDocName = el.attribute("seq-doc");
296     if (seqDocName.isEmpty()) {
297         stateInfo.setError(  QString("value not set %1").arg("seq-doc") );
298         return;
299     }
300     QString gaps = el.attribute("gap-map");
301     QStringList gapsPerSeq = gaps.split('|');
302     //gapsPerSeq.removeAll(QString());
303     foreach (const QString& s, gapsPerSeq) {
304         QList<int> seqGaps;
305         QStringList nums = s.split(',');
306         foreach (const QString& n, nums) {
307             if (n.isEmpty()) {
308                 continue;
309             }
310             bool ok = false;
311             int gapPos = n.toInt(&ok);
312             if (!ok) {
313                 stateInfo.setError(  QString("error parsing gap value '%1', line %2").arg(n).arg(s) );
314                 return;
315             }
316             seqGaps.append(gapPos);
317         }
318         gapPositionsForSeqs.append(seqGaps);
319     }
320     QString resultLen = el.attribute("result-ali-len");
321     bool ok = false;
322     resultAliLen = resultLen.toInt(&ok);
323     if (!ok) {
324         stateInfo.setError(  QString("error result-ali-len '%1'").arg(resultLen) );
325         return;
326     }
327 }
328 
prepare()329 void GTest_uMuscleAddUnalignedSequenceToProfile::prepare() {
330     if (hasError()) {
331         return;
332     }
333     Document* aliDoc = getContext<Document>(this, aliDocName);
334     if (aliDoc == nullptr) {
335         stateInfo.setError(  QString("alignment document not found in context: %1").arg(aliDocName) );
336         return;
337     }
338     Document* seqDoc = getContext<Document>(this, seqDocName);
339     if (seqDoc == nullptr) {
340         stateInfo.setError(  QString("sequence document not found in context: %1").arg(seqDocName) );
341         return;
342     }
343     QList<GObject*> aliObjs = aliDoc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
344     if (aliObjs.isEmpty()) {
345         stateInfo.setError(  QString("no alignment object found in doc: %1").arg(aliDoc->getURLString()) );
346         return;
347     }
348     aliObj = qobject_cast<MultipleSequenceAlignmentObject*>(aliObjs[0]);
349     origAliSeqs = aliObj->getNumRows();
350 
351     QList<GObject*> seqObjs = seqDoc->findGObjectByType(GObjectTypes::SEQUENCE);
352     if (seqObjs.isEmpty()) {
353         stateInfo.setError(  QString("no sequence objects found in doc: %1").arg(seqDoc->getURLString()) );
354         return;
355     }
356     MultipleSequenceAlignment unalignedMA;
357     unalignedMA->setAlphabet(aliObj->getAlphabet());
358     foreach (GObject* obj, seqObjs) {
359         U2SequenceObject* dnaObj = qobject_cast<U2SequenceObject*>(obj);
360         QByteArray seqData = dnaObj->getWholeSequenceData(stateInfo);
361         CHECK_OP(stateInfo, );
362         unalignedMA->addRow(dnaObj->getSequenceName(), seqData);
363     }
364     if (unalignedMA->getNumRows()!=gapPositionsForSeqs.size()) {
365         stateInfo.setError( QString("number of sequences not matches number of gaps in test: %1 sequences and %2 gap lines")
366             .arg(unalignedMA->getNumRows()).arg(gapPositionsForSeqs.size()) );
367         return;
368     }
369     resultAliSeqs = origAliSeqs + unalignedMA->getNumRows();
370 
371     MuscleTaskSettings s;
372     s.op = MuscleTaskOp_AddUnalignedToProfile;
373     s.profile = unalignedMA;
374     bool ok = false;
375     s.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
376     if(!ok) {
377         stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
378         return;
379     }
380     addSubTask(new MuscleGObjectTask(aliObj, s));
381 }
382 
report()383 Task::ReportResult GTest_uMuscleAddUnalignedSequenceToProfile::report() {
384     propagateSubtaskError();
385     if (hasError()) {
386         return ReportResult_Finished;
387     }
388     const MultipleSequenceAlignment msa = aliObj->getMultipleAlignment();
389     if (msa->getLength()!=resultAliLen) {
390         stateInfo.setError(  QString("result alignment length notmatches: %1, expected: %2").arg(msa->getLength()).arg(resultAliLen) );
391         return ReportResult_Finished;
392     }
393 
394     if (resultAliSeqs!=msa->getNumRows()) {
395         stateInfo.setError(  QString("unexpected number of sequences in result: %1, expected: %2").arg(msa->getNumRows()).arg(resultAliSeqs) );
396         return ReportResult_Finished;
397     }
398 
399     U2OpStatus2Log os;
400     for (int i = origAliSeqs, j = 0; i < msa->getNumRows(); i++, j++) {
401         const MultipleSequenceAlignmentRow row = msa->getMsaRow(i);
402         QByteArray seq = row->toByteArray(os, msa->getLength());
403         QList<int> seqGaps = gapPositionsForSeqs[j];
404         for (int pos = 0; pos < seq.size(); pos++) {
405             char c = seq[pos];
406             if (c == U2Msa::GAP_CHAR) {
407                 bool found = seqGaps.contains(pos);
408                 if (!found) {
409                     stateInfo.setError(  QString("illegal gap found! pos: %1, sequence: %2").arg(pos).arg(row->getName()) );
410                     return ReportResult_Finished;
411                 }
412             }
413         }
414         for (int gap = 0; gap < seqGaps.size(); gap++) {
415             int pos  = seqGaps[gap];
416             char c = seq[pos];
417             if (c != U2Msa::GAP_CHAR) {
418                 stateInfo.setError(  QString("gap not found! pos: %1, sequence: %2").arg(pos).arg(row->getName()) );
419                 return ReportResult_Finished;
420             }
421         }
422     }
423     return ReportResult_Finished;
424 }
425 
init(XMLTestFormat *,const QDomElement & el)426 void GTest_Muscle_Load_Align_QScore::init(XMLTestFormat *, const QDomElement& el) {
427     inFileURL = el.attribute(IN_FILE_NAME_ATTR);
428     stateInfo.progress = 0;
429     loadTask1 = nullptr;
430     loadTask2 = nullptr;
431     muscleTask = nullptr;
432     ma1 = nullptr;
433     ma2 = nullptr;
434 
435     if (inFileURL.isEmpty()) {
436         failMissingValue(IN_FILE_NAME_ATTR);
437         return;
438     }
439     patFileURL = el.attribute(OUT_FILE_NAME_ATTR);
440     if (patFileURL.isEmpty()) {
441         failMissingValue(OUT_FILE_NAME_ATTR);
442         return;
443     }
444 
445     QString str_qscore = el.attribute(QSCORE_ATTR);
446     if (str_qscore.isEmpty()) {
447         failMissingValue(QSCORE_ATTR);
448         return;
449     }
450     bool ok = false;
451     qscore = str_qscore.toFloat(&ok);
452     if (!ok) {
453         failMissingValue(QSCORE_ATTR);
454         return;
455     }
456 
457     this->dqscore = 0.01;
458     QString str_dqscore = el.attribute(QSCORE_DELTA_ATTR);
459     if (!str_dqscore.isEmpty()) {
460         dqscore = str_dqscore.toFloat(&ok);
461         if (!ok) {
462             failMissingValue(QSCORE_DELTA_ATTR);
463             return;
464         }
465     }
466     setUseDescriptionFromSubtask(true);
467 }
468 
prepare()469 void GTest_Muscle_Load_Align_QScore::prepare() {
470 
471     config.stableMode = false; //default mode is 'group' like in MUSCLE
472     bool ok = false;
473     config.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
474     if(!ok) {
475         stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
476         return;
477     }
478     QFileInfo inFile(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL);
479     if(!inFile.exists()) {
480         stateInfo.setError(  QString("file not exist %1").arg(inFile.absoluteFilePath()) );
481         return;
482     }
483     QFileInfo patFile(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL);
484     if(!patFile.exists()) {
485         stateInfo.setError(  QString("file not exist %1").arg(patFile.absoluteFilePath()) );
486         return;
487     }
488 
489     IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL));
490     loadTask1 = new LoadDocumentTask(BaseDocumentFormats::FASTA,env->getVar("COMMON_DATA_DIR")+"/"+inFileURL,iof);
491     loadTask1->setSubtaskProgressWeight(0);
492     addSubTask(loadTask1);
493     iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL));
494 
495     loadTask2 = new LoadDocumentTask(BaseDocumentFormats::FASTA,env->getVar("COMMON_DATA_DIR")+"/"+patFileURL,iof);
496 
497     addSubTask(loadTask2);
498     loadTask1->setSubtaskProgressWeight(0);
499 }
500 
dna_to_ma(QList<GObject * > dnaSeqs)501 MultipleSequenceAlignment GTest_Muscle_Load_Align_QScore::dna_to_ma(QList<GObject*> dnaSeqs) {
502 
503     int seqCount = dnaSeqs.count();
504     U2SequenceObject *seq = qobject_cast<U2SequenceObject *>(dnaSeqs[0]);
505     MultipleSequenceAlignment ma("Alignment", seq->getAlphabet());
506     for(int i=0; i<seqCount; i++) {
507         seq = qobject_cast<U2SequenceObject *>(dnaSeqs[i]);
508         if(seq == nullptr) {
509             stateInfo.setError(  QString("Can't cast GObject to U2SequenceObject") );
510             return ma;
511         }
512         QByteArray seqData = seq->getWholeSequenceData(stateInfo);
513         SAFE_POINT_OP(stateInfo, MultipleSequenceAlignment());
514         ma->addRow(seq->getSequenceName(), seqData);
515     }
516     return ma;
517 }
518 
onSubTaskFinished(Task * subTask)519 QList<Task*> GTest_Muscle_Load_Align_QScore::onSubTaskFinished(Task* subTask) {
520     Q_UNUSED(subTask);
521     QList<Task*> res;
522     if (hasError() || isCanceled()) {
523         return res;
524     }
525 
526     if (subTask == loadTask1) {
527         Document *doc = loadTask1->getDocument();
528         if(loadTask1->hasError()) {
529             return res;
530         }
531         assert(doc!=nullptr);
532 
533         QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
534 
535         if (list.size() == 0) {
536             stateInfo.setError(  QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
537             return res;
538         }
539 
540         MultipleSequenceAlignment malign = dna_to_ma(list);
541         if(hasError()) {
542             return res;
543         }
544 
545         ma1 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
546         CHECK_OP(stateInfo, res);
547 
548         if(ma1 == nullptr){
549             stateInfo.setError(  QString("can't convert dna sequences to MultipleSequenceAlignment") );
550             return res;
551         }
552 
553         muscleTask = new MuscleTask(ma1->getMultipleAlignment(), config);
554         res << muscleTask;
555         this->connect(muscleTask,SIGNAL(si_progressChanged()),SLOT(sl_muscleProgressChg()));
556     }
557     else if (subTask == muscleTask) {
558         if(muscleTask->hasError()) {
559             setError( muscleTask->getError() );
560             return res;
561         }
562 
563         MuscleTask * localMuscle = qobject_cast<MuscleTask*>( subTask );
564         assert( nullptr != localMuscle );
565         ma1->setMultipleAlignment( localMuscle->resultMA );
566 
567     }
568     else if (subTask == loadTask2) {
569         if (loadTask2->hasError()) {
570             return res;
571         }
572         Document *doc = loadTask2->getDocument();
573         if(loadTask2->hasError()) {
574             return res;
575         }
576         assert(doc!=nullptr);
577 
578         QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
579 
580         if (list.size() == 0) {
581             stateInfo.setError(  QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
582             return res;
583         }
584 
585         MultipleSequenceAlignment malign = dna_to_ma(list);
586         if(hasError()) {
587             return res;
588         }
589 
590         ma2 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
591         CHECK_OP(stateInfo, res);
592 
593         if(ma2 == nullptr){
594             stateInfo.setError(  QString("can't convert dna sequences to MultipleSequenceAlignment") );
595             return res;
596         }
597     }
598     return res;
599 }
600 
run()601 void GTest_Muscle_Load_Align_QScore::run() {
602     double qscore = QScore(ma1->getMultipleAlignment(), ma2->getMultipleAlignment(), stateInfo);
603     if(stateInfo.hasError()) {
604         return;
605     }
606 
607     bool match = fabsl(this->qscore - qscore) < dqscore;
608 
609     if(!match) stateInfo.setError(  QString("qscore not matched: %1, expected %2").arg(qscore).arg(this->qscore));
610 }
611 
report()612 Task::ReportResult GTest_Muscle_Load_Align_QScore::report() {
613     propagateSubtaskError();
614     return ReportResult_Finished;
615 }
616 
~GTest_Muscle_Load_Align_QScore()617 GTest_Muscle_Load_Align_QScore::~GTest_Muscle_Load_Align_QScore() {
618 }
619 
Muscle_Load_Align_Compare_Task(QString inFileURL,QString patFileURL,MuscleTaskSettings & _config,QString _name)620 Muscle_Load_Align_Compare_Task::Muscle_Load_Align_Compare_Task( QString inFileURL, QString patFileURL,
621                                                                 MuscleTaskSettings& _config, QString _name)
622 : Task(_name, TaskFlags_FOSCOE), str_inFileURL(inFileURL), str_patFileURL(patFileURL),muscleTask(nullptr), config(_config)
623 {
624     //QFileInfo fInf(inFileURL);
625     //setTaskName("Muscle_Load_Align_Compare_Task: " + fInf.fileName());
626     setUseDescriptionFromSubtask(true);
627     stateInfo.progress = 0;
628     loadTask1 = nullptr;
629     loadTask2 = nullptr;
630     muscleTask = nullptr;
631     ma1 = nullptr;
632     ma2 = nullptr;
633 }
634 
prepare()635 void Muscle_Load_Align_Compare_Task::prepare() {
636     IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(str_inFileURL));
637     loadTask1 = new LoadDocumentTask(BaseDocumentFormats::FASTA,str_inFileURL,iof);
638     loadTask1->setSubtaskProgressWeight(0);
639     addSubTask(loadTask1);
640     iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(str_patFileURL));
641     loadTask2 = new LoadDocumentTask(BaseDocumentFormats::FASTA,str_patFileURL,iof);
642     addSubTask(loadTask2);
643     loadTask1->setSubtaskProgressWeight(0);
644 }
645 
dna_to_ma(QList<GObject * > dnaSeqs)646 MultipleSequenceAlignment Muscle_Load_Align_Compare_Task::dna_to_ma(QList<GObject*> dnaSeqs) {
647 
648     int seqCount = dnaSeqs.count();
649     U2SequenceObject *seq = qobject_cast<U2SequenceObject *>(dnaSeqs[0]);
650     MultipleSequenceAlignment ma("Alignment",seq->getAlphabet());
651     for(int i=0; i<seqCount; i++) {
652         seq = qobject_cast<U2SequenceObject *>(dnaSeqs[i]);
653         if(seq == nullptr) {
654             stateInfo.setError(  QString("Can't cast GObject to U2SequenceObject") );
655             return ma;
656         }
657         QByteArray seqData = seq->getWholeSequenceData(stateInfo);
658         SAFE_POINT_OP(stateInfo, MultipleSequenceAlignment());
659         ma->addRow(seq->getSequenceName(), seqData);
660     }
661     return ma;
662 }
663 
onSubTaskFinished(Task * subTask)664 QList<Task*> Muscle_Load_Align_Compare_Task::onSubTaskFinished(Task* subTask) {
665     QList<Task*> res;
666     if (hasError() || isCanceled()) {
667         return res;
668     }
669 
670     if (subTask == loadTask1) {
671         Document *doc = loadTask1->getDocument();
672         if(loadTask1->hasError()) {
673             return res;
674         }
675         assert(doc!=nullptr);
676 
677         QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
678 
679         if (list.size() == 0) {
680             stateInfo.setError(  QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
681             return res;
682         }
683 
684         MultipleSequenceAlignment malign = dna_to_ma(list);
685         if(hasError()) {
686             return res;
687         }
688 
689         ma1 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
690         CHECK_OP(stateInfo, res);
691 
692         if(ma1 == nullptr){
693             stateInfo.setError(  QString("can't convert dna sequences to MultipleSequenceAlignment") );
694             return res;
695         }
696 
697         muscleTask = new MuscleTask(ma1->getMultipleAlignment(), config);
698 
699         res << muscleTask;
700         this->connect(muscleTask,SIGNAL(si_progressChanged()),SLOT(sl_muscleProgressChg()));
701     }
702     else if (subTask == muscleTask) {
703         if(muscleTask->hasError()) {
704             setError( muscleTask->getError() );
705             return res;
706         }
707         MuscleTask * localMuscle = qobject_cast<MuscleTask*>( subTask );
708         assert( nullptr != localMuscle );
709         ma1->setMultipleAlignment( localMuscle->resultMA );
710 
711     }
712     else if (subTask == loadTask2) {
713         if (loadTask2->hasError()) {
714             return res;
715         }
716         Document *doc = loadTask2->getDocument();
717         if(loadTask2->hasError()) {
718             return res;
719         }
720         assert(doc!=nullptr);
721 
722         QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
723 
724         if (list.size() == 0) {
725             stateInfo.setError(  QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
726             return res;
727         }
728 
729         MultipleSequenceAlignment malign = dna_to_ma(list);
730         if(hasError()) {
731             return res;
732         }
733 
734         ma2 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
735         CHECK_OP(stateInfo, res);
736 
737         if(ma2 == nullptr){
738             stateInfo.setError(  QString("can't convert dna sequences to MultipleSequenceAlignment") );
739             return res;
740         }
741     }
742     return res;
743 }
744 
run()745 void Muscle_Load_Align_Compare_Task::run() {
746 
747     const QList<MultipleSequenceAlignmentRow> alignedSeqs1 = ma1->getMsa()->getMsaRows();
748     const QList<MultipleSequenceAlignmentRow> alignedSeqs2 = ma2->getMsa()->getMsaRows();
749 
750     foreach(const MultipleSequenceAlignmentRow &maItem1, alignedSeqs1) {
751         bool nameFound = false;
752         foreach(const MultipleSequenceAlignmentRow&maItem2, alignedSeqs2) {
753             if (maItem1->getName()== maItem2->getName()) {
754                 nameFound = true;
755                 int l1 = maItem1->getCoreLength();
756                 int l2 = maItem2->getCoreLength();
757                 if (l1!= l2) {
758                     stateInfo.setError(  QString("Aligned sequences \"%1\" length not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(l1).arg(l2) );
759                     return;
760                 }
761                 if (*maItem1 != *maItem2) {
762                     stateInfo.setError(  QString("Aligned sequences \"%1\" not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(QString(maItem1->getCore())).arg(QString(maItem2->getCore())) );
763                     return;
764                 }
765             }
766         }
767         if (!nameFound) {
768             stateInfo.setError(  QString("aligned sequence not found \"%1\"").arg(maItem1->getName()) );
769         }
770     }
771 }
772 
cleanup()773 void Muscle_Load_Align_Compare_Task::cleanup() {
774     delete ma1;
775     delete ma2;
776     ma1 = nullptr;
777     ma2 = nullptr;
778 }
779 
report()780 Task::ReportResult Muscle_Load_Align_Compare_Task::report() {
781     propagateSubtaskError();
782     if(hasError()) {
783         stateInfo.setError(  QString("input file \"%1\", pattern file \"%2\":\n").arg(str_inFileURL).arg(str_patFileURL) + stateInfo.getError() );
784     }
785     return ReportResult_Finished;
786 }
787 
init(XMLTestFormat *,const QDomElement & el)788 void GTest_Muscle_Load_Align_Compare::init(XMLTestFormat *, const QDomElement& el) {
789     inFileURL = el.attribute(IN_FILE_NAME_ATTR);
790     if (inFileURL.isEmpty()) {
791         failMissingValue(IN_FILE_NAME_ATTR);
792         return;
793     }
794     patFileURL = el.attribute(OUT_FILE_NAME_ATTR);
795     if (inFileURL.isEmpty()) {
796         failMissingValue(OUT_FILE_NAME_ATTR);
797         return;
798     }
799 
800 }
801 
prepare()802 void GTest_Muscle_Load_Align_Compare::prepare() {
803 
804     MuscleTaskSettings mSettings;
805     mSettings.stableMode = false; //default mode is 'group' like in MUSCLE
806     bool ok = false;
807     mSettings.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
808     if(!ok) {
809         stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
810         return;
811     }
812     QFileInfo inFile(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL);
813     if(!inFile.exists()) {
814         stateInfo.setError(  QString("file not exist %1").arg(inFile.absoluteFilePath()) );
815         return;
816     }
817     QFileInfo patFile(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL);
818     if(!patFile.exists()) {
819         stateInfo.setError(  QString("file not exist %1").arg(patFile.absoluteFilePath()) );
820         return;
821     }
822 
823     worker = new Muscle_Load_Align_Compare_Task(inFile.absoluteFilePath(),patFile.absoluteFilePath(),mSettings,inFile.fileName());
824     addSubTask(worker);
825 }
826 
report()827 Task::ReportResult GTest_Muscle_Load_Align_Compare::report() {
828     propagateSubtaskError();
829     return ReportResult_Finished;
830 }
831 
~GTest_Muscle_Load_Align_Compare()832 GTest_Muscle_Load_Align_Compare::~GTest_Muscle_Load_Align_Compare() {
833 }
834 
init(U2::XMLTestFormat *,const QDomElement & el)835 void GTest_uMusclePacketTest::init(U2::XMLTestFormat *, const QDomElement &el) {
836     int nThread = qMax(0, getEnv()->getVar("NUM_THREADS").toInt());
837     setMaxParallelSubtasks(nThread);
838 
839     tpm = Task::Progress_SubTasksBased;
840 
841     inDirName = el.attribute(IN_DIR_ATTR);
842     if (inDirName.isEmpty()) {
843         failMissingValue(IN_DIR_ATTR);
844         return;
845     }
846     patDirName = el.attribute(PAT_DIR_ATTR);
847     if (patDirName.isEmpty()) {
848         failMissingValue(PAT_DIR_ATTR);
849         return;
850     }
851 }
prepare()852 void GTest_uMusclePacketTest::prepare() {
853     QDir inDir(env->getVar("COMMON_DATA_DIR")+"/"+inDirName);
854     QDir refDir(env->getVar("COMMON_DATA_DIR")+"/"+patDirName);
855     QFileInfoList allFilesInfoList = inDir.entryInfoList();
856     QFileInfoList inFileInfoList;
857     QFileInfoList patFileInfoList;
858     if (allFilesInfoList.isEmpty()) {
859         stateInfo.setError(  QString("no files in dir %1").arg(inDir.absolutePath()) );
860         return;
861     }
862 
863     foreach(const QFileInfo &infoLisItem, allFilesInfoList) {
864         if(infoLisItem.isFile()) {
865             if(!infoLisItem.exists()) {
866                 stateInfo.setError(  QString("no not exist %1").arg(infoLisItem.absoluteFilePath()) );
867             }
868             QDir patternDir(env->getVar("COMMON_DATA_DIR")+"/"+patDirName);
869             QFileInfo patFile(patternDir.absolutePath() + "/" + infoLisItem.fileName());
870             if(!patFile.exists()) {
871                 stateInfo.setError(  QString("file not exist %1").arg(patFile.absoluteFilePath()) );
872                 return;
873             }
874             inFileInfoList << infoLisItem;
875             patFileInfoList << patFile;
876         }
877     }
878 
879     MuscleTaskSettings mSettings;
880     mSettings.stableMode = false; //default mode is 'group' like in MUSCLE
881     bool ok = false;
882     mSettings.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
883     if(!ok) {
884         stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
885         return;
886     }
887 
888     for (int i = 0; i < inFileInfoList.count(); i++) {
889         Task *task = new Muscle_Load_Align_Compare_Task(inFileInfoList[i].absoluteFilePath(),patFileInfoList[i].absoluteFilePath(),mSettings, "MusclePacketTestSubtask: "+inFileInfoList[i].fileName());
890         addSubTask(task);
891     }
892     timer.start();
893 }
894 
report()895 Task::ReportResult GTest_uMusclePacketTest::report() {
896     propagateSubtaskError();
897     if (!hasError()) {
898         algoLog.trace(QString("uMusclePacketTest: \"%1\" accomplished. Time elapsed: %2 ms").arg(inDirName).arg(timer.elapsed()));
899     }
900     return ReportResult_Finished;
901 }
902 
createTestFactories()903 QList<XMLTestFactory*> UMUSCLETests::createTestFactories() {
904     QList<XMLTestFactory*> res;
905     res.append(GTest_uMuscle::createFactory());
906     res.append(GTest_CompareMAlignment::createFactory());
907     res.append(GTest_uMuscleAddUnalignedSequenceToProfile::createFactory());
908     res.append(GTest_uMusclePacketTest::createFactory());
909     res.append(GTest_Muscle_Load_Align_Compare::createFactory());
910     res.append(GTest_Muscle_Load_Align_QScore::createFactory());
911     return res;
912 }
913 
914 }//namespace
915