1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include <QDir>
23 #include <QtMath>
24
25 #include <U2Core/AppContext.h>
26 #include <U2Core/BaseDocumentFormats.h>
27 #include <U2Core/DNASequenceObject.h>
28 #include <U2Core/DocumentModel.h>
29 #include <U2Core/GObjectTypes.h>
30 #include <U2Core/IOAdapter.h>
31 #include <U2Core/IOAdapterUtils.h>
32 #include <U2Core/LoadDocumentTask.h>
33 #include <U2Core/Log.h>
34 #include <U2Core/MultipleSequenceAlignmentImporter.h>
35 #include <U2Core/MultipleSequenceAlignmentObject.h>
36 #include <U2Core/SaveDocumentTask.h>
37 #include <U2Core/U2OpStatusUtils.h>
38 #include <U2Core/U2SafePoints.h>
39
40 #include "umuscleTests.h"
41 #include "MuscleConstants.h"
42 #include "MuscleTask.h"
43 #include "MuscleParallel.h"
44
45 /* TRANSLATOR U2::GTest*/
46
47 namespace U2 {
48
49 extern double QScore(const MultipleSequenceAlignment& maTest, const MultipleSequenceAlignment& maRef, TaskStateInfo& ti);
50
51 #define OUT_FILE_NAME_ATTR "out"
52 #define IN_FILE_NAME_ATTR "in"
53 #define QSCORE_ATTR "qscr"
54 #define QSCORE_DELTA_ATTR "dqscr"
55 #define INDEX_ATTR "index"
56 #define DOC1_ATTR "doc1"
57 #define DOC2_ATTR "doc2"
58 #define IN_DIR_ATTR "indir"
59 #define PAT_DIR_ATTR "refdir"
60 #define PARALLEL_FLAG_ATTR "parallel"
61 #define MAX_ITERS_ATTR "maxiters"
62 #define REFINE_ONLY_ATTR "refine"
63 #define REGION_ATTR "region"
64 #define STABLE_ATTR "stable"
65 #define ENV_MUSCLE_N_THREADS "MUSCLE_N_THREADS"
66 #define MACHINE_PATH "MACHINE"
67
68 struct GTestBoolProperty {
getU2::GTestBoolProperty69 static bool get(QString attr, bool &value, const QDomElement &el) {
70 QString value_str = el.attribute(attr);
71 if(!value_str.isEmpty()) {
72 bool ok = false;
73 value = value_str.toInt(&ok);
74 return ok;
75 }
76 return true;
77 }
78 };
79
80 #define GET_BOOL_PROP(ATTR,VAL) if(GTestBoolProperty::get((ATTR),(VAL),(el))==false) {\
81 failMissingValue((ATTR));\
82 return;}
83
init(XMLTestFormat *,const QDomElement & el)84 void GTest_uMuscle::init(XMLTestFormat *, const QDomElement& el) {
85 ctxAdded = false;
86 ma_result = nullptr;
87 refineOnly = false;
88 maxIters = -1;
89 alignRegion = false;
90 stable = false;
91
92 inputDocCtxName = el.attribute(IN_FILE_NAME_ATTR);
93 if (inputDocCtxName.isEmpty()) {
94 failMissingValue(IN_FILE_NAME_ATTR);
95 return;
96 }
97
98 QString refineOnly_str = el.attribute(REFINE_ONLY_ATTR);
99 if (!refineOnly_str.isEmpty()) {
100 bool ok = false;
101 refineOnly = refineOnly_str.toInt(&ok);
102 if (!ok) {
103 failMissingValue(REFINE_ONLY_ATTR);
104 return;
105 }
106 }
107
108 QString maxIters_str = el.attribute(MAX_ITERS_ATTR);
109 if(!maxIters_str.isEmpty()) {
110 bool ok = false;
111 maxIters = maxIters_str.toInt(&ok);
112 if (!ok) {
113 failMissingValue(MAX_ITERS_ATTR);
114 return;
115 }
116 }
117
118 QString region_str = el.attribute(REGION_ATTR);
119 if(!region_str.isEmpty()) {
120 QRegExp rx("([0123456789]+)..([0123456789]+)");
121 if(rx.indexIn(region_str, 0) != -1) {
122 bool ok1,ok2;
123 int start = rx.cap(1).toInt(&ok1) - 1;
124 int end = rx.cap(2).toInt(&ok2);
125 if (ok1 && ok2) {
126 region = U2Region(start, end - start);
127 alignRegion = true;
128 } else {
129 failMissingValue(REGION_ATTR);
130 }
131 } else {
132 failMissingValue(REGION_ATTR);
133 }
134 }
135
136 GET_BOOL_PROP(STABLE_ATTR,stable)
137
138 resultCtxName = el.attribute(INDEX_ATTR);
139 }
140
prepare()141 void GTest_uMuscle::prepare() {
142 mTask = nullptr;
143 ma_result = nullptr;
144
145 doc = getContext<Document>(this, inputDocCtxName);
146 if (doc == nullptr) {
147 stateInfo.setError( QString("context not found %1").arg(inputDocCtxName) );
148 return;
149 }
150
151 QList<GObject*> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
152 if (list.size() == 0) {
153 stateInfo.setError( QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT) );
154 return;
155 }
156
157 GObject *obj = list.first();
158 if(obj==nullptr){
159 stateInfo.setError( QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT) );
160 return;
161 }
162 assert(obj!=nullptr);
163 MultipleSequenceAlignmentObject* ma = qobject_cast<MultipleSequenceAlignmentObject*>(obj);
164 if(ma==nullptr){
165 stateInfo.setError( QString("error can't cast to multiple alignment from GObject") );
166 return;
167 }
168
169 MuscleTaskSettings s;
170 bool ok = false;
171 s.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
172 if(!ok) {
173 stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
174 return;
175 }
176 if(maxIters != -1) {
177 s.maxIterations = maxIters;
178 }
179 if(refineOnly) {
180 s.op = MuscleTaskOp_Refine;
181 }
182 s.stableMode = stable; //default mode is 'group' like in MUSCLE
183 s.alignRegion = alignRegion;
184 if(alignRegion) {
185 s.regionToAlign = region;
186 }
187 ma_result = ma;
188 mTask = new MuscleGObjectTask(ma_result,s);
189 addSubTask(mTask);
190 }
191
report()192 Task::ReportResult GTest_uMuscle::report() {
193 if (!hasError()) {
194 if(mTask->hasError()) {
195 stateInfo.setError( mTask->getError() );
196 return ReportResult_Finished;
197 }
198 if(!resultCtxName.isEmpty()) {
199 ctxAdded = true;
200 addContext(resultCtxName, ma_result);
201 }
202 }
203 return ReportResult_Finished;
204 }
205
cleanup()206 void GTest_uMuscle::cleanup() {
207 //if(ma_result!=NULL)
208 // delete ma_result;
209 if(ctxAdded) {
210 removeContext(resultCtxName);
211 }
212
213 XmlTest::cleanup();
214 }
215
init(XMLTestFormat *,const QDomElement & el)216 void GTest_CompareMAlignment::init(XMLTestFormat *, const QDomElement& el) {
217 doc1CtxName = el.attribute(DOC1_ATTR);
218 if (doc1CtxName.isEmpty()) {
219 failMissingValue(DOC1_ATTR);
220 return;
221 }
222 doc2CtxName = el.attribute(DOC2_ATTR);
223 if (doc2CtxName.isEmpty()) {
224 failMissingValue(DOC2_ATTR);
225 return;
226 }
227 }
228
report()229 Task::ReportResult GTest_CompareMAlignment::report() {
230 Document* doc1 = getContext<Document>(this, doc1CtxName);
231 if (doc1 == nullptr) {
232 stateInfo.setError( QString("document not found %1").arg(doc1CtxName) );
233 return ReportResult_Finished;
234 }
235 Document* doc2 = getContext<Document>(this, doc2CtxName);
236 if (doc2 == nullptr) {
237 stateInfo.setError( QString("document not found %1").arg(doc2CtxName) );
238 return ReportResult_Finished;
239 }
240
241 QList<GObject*> objs1 = doc1->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
242 QList<GObject*> objs2 = doc2->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
243
244 if(objs1.size()!=objs2.size()) {
245 stateInfo.setError( QString("MAlignmentObjects count not matched %1, expected %2").arg(objs1.size()).arg(objs2.size()) );
246 return ReportResult_Finished;
247 }
248
249 int listSize = objs1.size();
250 for (int i=0;i<listSize;i++) {
251 MultipleSequenceAlignmentObject* ma1 = qobject_cast<MultipleSequenceAlignmentObject*>(objs1.at(i));
252 MultipleSequenceAlignmentObject* ma2 = qobject_cast<MultipleSequenceAlignmentObject*>(objs2.at(i));
253 const QList<MultipleSequenceAlignmentRow> alignedSeqs1 = ma1->getMsa()->getMsaRows();
254 const QList<MultipleSequenceAlignmentRow> alignedSeqs2 = ma2->getMsa()->getMsaRows();
255 if(ma1->objectName()!=ma2->objectName()) {
256 stateInfo.setError( QString("MAlignmentObjects name not matched \"%1\", expected \"%2\"").arg(ma1->objectName()).arg(ma2->objectName()) );
257 return ReportResult_Finished;
258 }
259 foreach(const MultipleSequenceAlignmentRow &maItem1, alignedSeqs1) {
260 bool nameFound = false;
261 foreach(const MultipleSequenceAlignmentRow &maItem2, alignedSeqs2) {
262 if (maItem1->getName() == maItem2->getName()) {
263 nameFound = true;
264 int l1 = maItem1->getCoreEnd();
265 int l2 = maItem2->getCoreEnd();
266 if(l1!=l2) {
267 stateInfo.setError( QString("Aligned sequences \"%1\" length not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(l1).arg(l2) );
268 return ReportResult_Finished;
269 }
270 if (*maItem1 != *maItem2) {
271 stateInfo.setError( QString("Aligned sequences \"%1\" not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(QString(maItem1->getCore())).arg(QString(maItem2->getCore())) );
272 return ReportResult_Finished;
273 }
274 }
275 }
276 if (!nameFound) {
277 stateInfo.setError( QString("aligned sequence not found \"%1\"").arg(maItem1->getName()) );
278 }
279
280 }
281
282 }
283 return ReportResult_Finished;
284 }
285
init(XMLTestFormat *,const QDomElement & el)286 void GTest_uMuscleAddUnalignedSequenceToProfile::init(XMLTestFormat */*tf*/, const QDomElement& el) {
287 origAliSeqs = 0;
288 aliObj = nullptr;
289 resultAliSeqs = 0;
290 aliDocName = el.attribute("ali-doc");
291 if (aliDocName.isEmpty()) {
292 stateInfo.setError( QString("value not set %1").arg("ali-doc") );
293 return;
294 }
295 seqDocName = el.attribute("seq-doc");
296 if (seqDocName.isEmpty()) {
297 stateInfo.setError( QString("value not set %1").arg("seq-doc") );
298 return;
299 }
300 QString gaps = el.attribute("gap-map");
301 QStringList gapsPerSeq = gaps.split('|');
302 //gapsPerSeq.removeAll(QString());
303 foreach (const QString& s, gapsPerSeq) {
304 QList<int> seqGaps;
305 QStringList nums = s.split(',');
306 foreach (const QString& n, nums) {
307 if (n.isEmpty()) {
308 continue;
309 }
310 bool ok = false;
311 int gapPos = n.toInt(&ok);
312 if (!ok) {
313 stateInfo.setError( QString("error parsing gap value '%1', line %2").arg(n).arg(s) );
314 return;
315 }
316 seqGaps.append(gapPos);
317 }
318 gapPositionsForSeqs.append(seqGaps);
319 }
320 QString resultLen = el.attribute("result-ali-len");
321 bool ok = false;
322 resultAliLen = resultLen.toInt(&ok);
323 if (!ok) {
324 stateInfo.setError( QString("error result-ali-len '%1'").arg(resultLen) );
325 return;
326 }
327 }
328
prepare()329 void GTest_uMuscleAddUnalignedSequenceToProfile::prepare() {
330 if (hasError()) {
331 return;
332 }
333 Document* aliDoc = getContext<Document>(this, aliDocName);
334 if (aliDoc == nullptr) {
335 stateInfo.setError( QString("alignment document not found in context: %1").arg(aliDocName) );
336 return;
337 }
338 Document* seqDoc = getContext<Document>(this, seqDocName);
339 if (seqDoc == nullptr) {
340 stateInfo.setError( QString("sequence document not found in context: %1").arg(seqDocName) );
341 return;
342 }
343 QList<GObject*> aliObjs = aliDoc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
344 if (aliObjs.isEmpty()) {
345 stateInfo.setError( QString("no alignment object found in doc: %1").arg(aliDoc->getURLString()) );
346 return;
347 }
348 aliObj = qobject_cast<MultipleSequenceAlignmentObject*>(aliObjs[0]);
349 origAliSeqs = aliObj->getNumRows();
350
351 QList<GObject*> seqObjs = seqDoc->findGObjectByType(GObjectTypes::SEQUENCE);
352 if (seqObjs.isEmpty()) {
353 stateInfo.setError( QString("no sequence objects found in doc: %1").arg(seqDoc->getURLString()) );
354 return;
355 }
356 MultipleSequenceAlignment unalignedMA;
357 unalignedMA->setAlphabet(aliObj->getAlphabet());
358 foreach (GObject* obj, seqObjs) {
359 U2SequenceObject* dnaObj = qobject_cast<U2SequenceObject*>(obj);
360 QByteArray seqData = dnaObj->getWholeSequenceData(stateInfo);
361 CHECK_OP(stateInfo, );
362 unalignedMA->addRow(dnaObj->getSequenceName(), seqData);
363 }
364 if (unalignedMA->getNumRows()!=gapPositionsForSeqs.size()) {
365 stateInfo.setError( QString("number of sequences not matches number of gaps in test: %1 sequences and %2 gap lines")
366 .arg(unalignedMA->getNumRows()).arg(gapPositionsForSeqs.size()) );
367 return;
368 }
369 resultAliSeqs = origAliSeqs + unalignedMA->getNumRows();
370
371 MuscleTaskSettings s;
372 s.op = MuscleTaskOp_AddUnalignedToProfile;
373 s.profile = unalignedMA;
374 bool ok = false;
375 s.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
376 if(!ok) {
377 stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
378 return;
379 }
380 addSubTask(new MuscleGObjectTask(aliObj, s));
381 }
382
report()383 Task::ReportResult GTest_uMuscleAddUnalignedSequenceToProfile::report() {
384 propagateSubtaskError();
385 if (hasError()) {
386 return ReportResult_Finished;
387 }
388 const MultipleSequenceAlignment msa = aliObj->getMultipleAlignment();
389 if (msa->getLength()!=resultAliLen) {
390 stateInfo.setError( QString("result alignment length notmatches: %1, expected: %2").arg(msa->getLength()).arg(resultAliLen) );
391 return ReportResult_Finished;
392 }
393
394 if (resultAliSeqs!=msa->getNumRows()) {
395 stateInfo.setError( QString("unexpected number of sequences in result: %1, expected: %2").arg(msa->getNumRows()).arg(resultAliSeqs) );
396 return ReportResult_Finished;
397 }
398
399 U2OpStatus2Log os;
400 for (int i = origAliSeqs, j = 0; i < msa->getNumRows(); i++, j++) {
401 const MultipleSequenceAlignmentRow row = msa->getMsaRow(i);
402 QByteArray seq = row->toByteArray(os, msa->getLength());
403 QList<int> seqGaps = gapPositionsForSeqs[j];
404 for (int pos = 0; pos < seq.size(); pos++) {
405 char c = seq[pos];
406 if (c == U2Msa::GAP_CHAR) {
407 bool found = seqGaps.contains(pos);
408 if (!found) {
409 stateInfo.setError( QString("illegal gap found! pos: %1, sequence: %2").arg(pos).arg(row->getName()) );
410 return ReportResult_Finished;
411 }
412 }
413 }
414 for (int gap = 0; gap < seqGaps.size(); gap++) {
415 int pos = seqGaps[gap];
416 char c = seq[pos];
417 if (c != U2Msa::GAP_CHAR) {
418 stateInfo.setError( QString("gap not found! pos: %1, sequence: %2").arg(pos).arg(row->getName()) );
419 return ReportResult_Finished;
420 }
421 }
422 }
423 return ReportResult_Finished;
424 }
425
init(XMLTestFormat *,const QDomElement & el)426 void GTest_Muscle_Load_Align_QScore::init(XMLTestFormat *, const QDomElement& el) {
427 inFileURL = el.attribute(IN_FILE_NAME_ATTR);
428 stateInfo.progress = 0;
429 loadTask1 = nullptr;
430 loadTask2 = nullptr;
431 muscleTask = nullptr;
432 ma1 = nullptr;
433 ma2 = nullptr;
434
435 if (inFileURL.isEmpty()) {
436 failMissingValue(IN_FILE_NAME_ATTR);
437 return;
438 }
439 patFileURL = el.attribute(OUT_FILE_NAME_ATTR);
440 if (patFileURL.isEmpty()) {
441 failMissingValue(OUT_FILE_NAME_ATTR);
442 return;
443 }
444
445 QString str_qscore = el.attribute(QSCORE_ATTR);
446 if (str_qscore.isEmpty()) {
447 failMissingValue(QSCORE_ATTR);
448 return;
449 }
450 bool ok = false;
451 qscore = str_qscore.toFloat(&ok);
452 if (!ok) {
453 failMissingValue(QSCORE_ATTR);
454 return;
455 }
456
457 this->dqscore = 0.01;
458 QString str_dqscore = el.attribute(QSCORE_DELTA_ATTR);
459 if (!str_dqscore.isEmpty()) {
460 dqscore = str_dqscore.toFloat(&ok);
461 if (!ok) {
462 failMissingValue(QSCORE_DELTA_ATTR);
463 return;
464 }
465 }
466 setUseDescriptionFromSubtask(true);
467 }
468
prepare()469 void GTest_Muscle_Load_Align_QScore::prepare() {
470
471 config.stableMode = false; //default mode is 'group' like in MUSCLE
472 bool ok = false;
473 config.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
474 if(!ok) {
475 stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
476 return;
477 }
478 QFileInfo inFile(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL);
479 if(!inFile.exists()) {
480 stateInfo.setError( QString("file not exist %1").arg(inFile.absoluteFilePath()) );
481 return;
482 }
483 QFileInfo patFile(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL);
484 if(!patFile.exists()) {
485 stateInfo.setError( QString("file not exist %1").arg(patFile.absoluteFilePath()) );
486 return;
487 }
488
489 IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL));
490 loadTask1 = new LoadDocumentTask(BaseDocumentFormats::FASTA,env->getVar("COMMON_DATA_DIR")+"/"+inFileURL,iof);
491 loadTask1->setSubtaskProgressWeight(0);
492 addSubTask(loadTask1);
493 iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL));
494
495 loadTask2 = new LoadDocumentTask(BaseDocumentFormats::FASTA,env->getVar("COMMON_DATA_DIR")+"/"+patFileURL,iof);
496
497 addSubTask(loadTask2);
498 loadTask1->setSubtaskProgressWeight(0);
499 }
500
dna_to_ma(QList<GObject * > dnaSeqs)501 MultipleSequenceAlignment GTest_Muscle_Load_Align_QScore::dna_to_ma(QList<GObject*> dnaSeqs) {
502
503 int seqCount = dnaSeqs.count();
504 U2SequenceObject *seq = qobject_cast<U2SequenceObject *>(dnaSeqs[0]);
505 MultipleSequenceAlignment ma("Alignment", seq->getAlphabet());
506 for(int i=0; i<seqCount; i++) {
507 seq = qobject_cast<U2SequenceObject *>(dnaSeqs[i]);
508 if(seq == nullptr) {
509 stateInfo.setError( QString("Can't cast GObject to U2SequenceObject") );
510 return ma;
511 }
512 QByteArray seqData = seq->getWholeSequenceData(stateInfo);
513 SAFE_POINT_OP(stateInfo, MultipleSequenceAlignment());
514 ma->addRow(seq->getSequenceName(), seqData);
515 }
516 return ma;
517 }
518
onSubTaskFinished(Task * subTask)519 QList<Task*> GTest_Muscle_Load_Align_QScore::onSubTaskFinished(Task* subTask) {
520 Q_UNUSED(subTask);
521 QList<Task*> res;
522 if (hasError() || isCanceled()) {
523 return res;
524 }
525
526 if (subTask == loadTask1) {
527 Document *doc = loadTask1->getDocument();
528 if(loadTask1->hasError()) {
529 return res;
530 }
531 assert(doc!=nullptr);
532
533 QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
534
535 if (list.size() == 0) {
536 stateInfo.setError( QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
537 return res;
538 }
539
540 MultipleSequenceAlignment malign = dna_to_ma(list);
541 if(hasError()) {
542 return res;
543 }
544
545 ma1 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
546 CHECK_OP(stateInfo, res);
547
548 if(ma1 == nullptr){
549 stateInfo.setError( QString("can't convert dna sequences to MultipleSequenceAlignment") );
550 return res;
551 }
552
553 muscleTask = new MuscleTask(ma1->getMultipleAlignment(), config);
554 res << muscleTask;
555 this->connect(muscleTask,SIGNAL(si_progressChanged()),SLOT(sl_muscleProgressChg()));
556 }
557 else if (subTask == muscleTask) {
558 if(muscleTask->hasError()) {
559 setError( muscleTask->getError() );
560 return res;
561 }
562
563 MuscleTask * localMuscle = qobject_cast<MuscleTask*>( subTask );
564 assert( nullptr != localMuscle );
565 ma1->setMultipleAlignment( localMuscle->resultMA );
566
567 }
568 else if (subTask == loadTask2) {
569 if (loadTask2->hasError()) {
570 return res;
571 }
572 Document *doc = loadTask2->getDocument();
573 if(loadTask2->hasError()) {
574 return res;
575 }
576 assert(doc!=nullptr);
577
578 QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
579
580 if (list.size() == 0) {
581 stateInfo.setError( QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
582 return res;
583 }
584
585 MultipleSequenceAlignment malign = dna_to_ma(list);
586 if(hasError()) {
587 return res;
588 }
589
590 ma2 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
591 CHECK_OP(stateInfo, res);
592
593 if(ma2 == nullptr){
594 stateInfo.setError( QString("can't convert dna sequences to MultipleSequenceAlignment") );
595 return res;
596 }
597 }
598 return res;
599 }
600
run()601 void GTest_Muscle_Load_Align_QScore::run() {
602 double qscore = QScore(ma1->getMultipleAlignment(), ma2->getMultipleAlignment(), stateInfo);
603 if(stateInfo.hasError()) {
604 return;
605 }
606
607 bool match = fabsl(this->qscore - qscore) < dqscore;
608
609 if(!match) stateInfo.setError( QString("qscore not matched: %1, expected %2").arg(qscore).arg(this->qscore));
610 }
611
report()612 Task::ReportResult GTest_Muscle_Load_Align_QScore::report() {
613 propagateSubtaskError();
614 return ReportResult_Finished;
615 }
616
~GTest_Muscle_Load_Align_QScore()617 GTest_Muscle_Load_Align_QScore::~GTest_Muscle_Load_Align_QScore() {
618 }
619
Muscle_Load_Align_Compare_Task(QString inFileURL,QString patFileURL,MuscleTaskSettings & _config,QString _name)620 Muscle_Load_Align_Compare_Task::Muscle_Load_Align_Compare_Task( QString inFileURL, QString patFileURL,
621 MuscleTaskSettings& _config, QString _name)
622 : Task(_name, TaskFlags_FOSCOE), str_inFileURL(inFileURL), str_patFileURL(patFileURL),muscleTask(nullptr), config(_config)
623 {
624 //QFileInfo fInf(inFileURL);
625 //setTaskName("Muscle_Load_Align_Compare_Task: " + fInf.fileName());
626 setUseDescriptionFromSubtask(true);
627 stateInfo.progress = 0;
628 loadTask1 = nullptr;
629 loadTask2 = nullptr;
630 muscleTask = nullptr;
631 ma1 = nullptr;
632 ma2 = nullptr;
633 }
634
prepare()635 void Muscle_Load_Align_Compare_Task::prepare() {
636 IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(str_inFileURL));
637 loadTask1 = new LoadDocumentTask(BaseDocumentFormats::FASTA,str_inFileURL,iof);
638 loadTask1->setSubtaskProgressWeight(0);
639 addSubTask(loadTask1);
640 iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(str_patFileURL));
641 loadTask2 = new LoadDocumentTask(BaseDocumentFormats::FASTA,str_patFileURL,iof);
642 addSubTask(loadTask2);
643 loadTask1->setSubtaskProgressWeight(0);
644 }
645
dna_to_ma(QList<GObject * > dnaSeqs)646 MultipleSequenceAlignment Muscle_Load_Align_Compare_Task::dna_to_ma(QList<GObject*> dnaSeqs) {
647
648 int seqCount = dnaSeqs.count();
649 U2SequenceObject *seq = qobject_cast<U2SequenceObject *>(dnaSeqs[0]);
650 MultipleSequenceAlignment ma("Alignment",seq->getAlphabet());
651 for(int i=0; i<seqCount; i++) {
652 seq = qobject_cast<U2SequenceObject *>(dnaSeqs[i]);
653 if(seq == nullptr) {
654 stateInfo.setError( QString("Can't cast GObject to U2SequenceObject") );
655 return ma;
656 }
657 QByteArray seqData = seq->getWholeSequenceData(stateInfo);
658 SAFE_POINT_OP(stateInfo, MultipleSequenceAlignment());
659 ma->addRow(seq->getSequenceName(), seqData);
660 }
661 return ma;
662 }
663
onSubTaskFinished(Task * subTask)664 QList<Task*> Muscle_Load_Align_Compare_Task::onSubTaskFinished(Task* subTask) {
665 QList<Task*> res;
666 if (hasError() || isCanceled()) {
667 return res;
668 }
669
670 if (subTask == loadTask1) {
671 Document *doc = loadTask1->getDocument();
672 if(loadTask1->hasError()) {
673 return res;
674 }
675 assert(doc!=nullptr);
676
677 QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
678
679 if (list.size() == 0) {
680 stateInfo.setError( QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
681 return res;
682 }
683
684 MultipleSequenceAlignment malign = dna_to_ma(list);
685 if(hasError()) {
686 return res;
687 }
688
689 ma1 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
690 CHECK_OP(stateInfo, res);
691
692 if(ma1 == nullptr){
693 stateInfo.setError( QString("can't convert dna sequences to MultipleSequenceAlignment") );
694 return res;
695 }
696
697 muscleTask = new MuscleTask(ma1->getMultipleAlignment(), config);
698
699 res << muscleTask;
700 this->connect(muscleTask,SIGNAL(si_progressChanged()),SLOT(sl_muscleProgressChg()));
701 }
702 else if (subTask == muscleTask) {
703 if(muscleTask->hasError()) {
704 setError( muscleTask->getError() );
705 return res;
706 }
707 MuscleTask * localMuscle = qobject_cast<MuscleTask*>( subTask );
708 assert( nullptr != localMuscle );
709 ma1->setMultipleAlignment( localMuscle->resultMA );
710
711 }
712 else if (subTask == loadTask2) {
713 if (loadTask2->hasError()) {
714 return res;
715 }
716 Document *doc = loadTask2->getDocument();
717 if(loadTask2->hasError()) {
718 return res;
719 }
720 assert(doc!=nullptr);
721
722 QList<GObject*> list = doc->findGObjectByType(GObjectTypes::SEQUENCE);
723
724 if (list.size() == 0) {
725 stateInfo.setError( QString("container of object with type \"%1\" is empty").arg(GObjectTypes::SEQUENCE) );
726 return res;
727 }
728
729 MultipleSequenceAlignment malign = dna_to_ma(list);
730 if(hasError()) {
731 return res;
732 }
733
734 ma2 = MultipleSequenceAlignmentImporter::createAlignment(doc->getDbiRef(), malign, stateInfo);
735 CHECK_OP(stateInfo, res);
736
737 if(ma2 == nullptr){
738 stateInfo.setError( QString("can't convert dna sequences to MultipleSequenceAlignment") );
739 return res;
740 }
741 }
742 return res;
743 }
744
run()745 void Muscle_Load_Align_Compare_Task::run() {
746
747 const QList<MultipleSequenceAlignmentRow> alignedSeqs1 = ma1->getMsa()->getMsaRows();
748 const QList<MultipleSequenceAlignmentRow> alignedSeqs2 = ma2->getMsa()->getMsaRows();
749
750 foreach(const MultipleSequenceAlignmentRow &maItem1, alignedSeqs1) {
751 bool nameFound = false;
752 foreach(const MultipleSequenceAlignmentRow&maItem2, alignedSeqs2) {
753 if (maItem1->getName()== maItem2->getName()) {
754 nameFound = true;
755 int l1 = maItem1->getCoreLength();
756 int l2 = maItem2->getCoreLength();
757 if (l1!= l2) {
758 stateInfo.setError( QString("Aligned sequences \"%1\" length not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(l1).arg(l2) );
759 return;
760 }
761 if (*maItem1 != *maItem2) {
762 stateInfo.setError( QString("Aligned sequences \"%1\" not matched \"%2\", expected \"%3\"").arg(maItem1->getName()).arg(QString(maItem1->getCore())).arg(QString(maItem2->getCore())) );
763 return;
764 }
765 }
766 }
767 if (!nameFound) {
768 stateInfo.setError( QString("aligned sequence not found \"%1\"").arg(maItem1->getName()) );
769 }
770 }
771 }
772
cleanup()773 void Muscle_Load_Align_Compare_Task::cleanup() {
774 delete ma1;
775 delete ma2;
776 ma1 = nullptr;
777 ma2 = nullptr;
778 }
779
report()780 Task::ReportResult Muscle_Load_Align_Compare_Task::report() {
781 propagateSubtaskError();
782 if(hasError()) {
783 stateInfo.setError( QString("input file \"%1\", pattern file \"%2\":\n").arg(str_inFileURL).arg(str_patFileURL) + stateInfo.getError() );
784 }
785 return ReportResult_Finished;
786 }
787
init(XMLTestFormat *,const QDomElement & el)788 void GTest_Muscle_Load_Align_Compare::init(XMLTestFormat *, const QDomElement& el) {
789 inFileURL = el.attribute(IN_FILE_NAME_ATTR);
790 if (inFileURL.isEmpty()) {
791 failMissingValue(IN_FILE_NAME_ATTR);
792 return;
793 }
794 patFileURL = el.attribute(OUT_FILE_NAME_ATTR);
795 if (inFileURL.isEmpty()) {
796 failMissingValue(OUT_FILE_NAME_ATTR);
797 return;
798 }
799
800 }
801
prepare()802 void GTest_Muscle_Load_Align_Compare::prepare() {
803
804 MuscleTaskSettings mSettings;
805 mSettings.stableMode = false; //default mode is 'group' like in MUSCLE
806 bool ok = false;
807 mSettings.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
808 if(!ok) {
809 stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
810 return;
811 }
812 QFileInfo inFile(env->getVar("COMMON_DATA_DIR")+"/"+inFileURL);
813 if(!inFile.exists()) {
814 stateInfo.setError( QString("file not exist %1").arg(inFile.absoluteFilePath()) );
815 return;
816 }
817 QFileInfo patFile(env->getVar("COMMON_DATA_DIR")+"/"+patFileURL);
818 if(!patFile.exists()) {
819 stateInfo.setError( QString("file not exist %1").arg(patFile.absoluteFilePath()) );
820 return;
821 }
822
823 worker = new Muscle_Load_Align_Compare_Task(inFile.absoluteFilePath(),patFile.absoluteFilePath(),mSettings,inFile.fileName());
824 addSubTask(worker);
825 }
826
report()827 Task::ReportResult GTest_Muscle_Load_Align_Compare::report() {
828 propagateSubtaskError();
829 return ReportResult_Finished;
830 }
831
~GTest_Muscle_Load_Align_Compare()832 GTest_Muscle_Load_Align_Compare::~GTest_Muscle_Load_Align_Compare() {
833 }
834
init(U2::XMLTestFormat *,const QDomElement & el)835 void GTest_uMusclePacketTest::init(U2::XMLTestFormat *, const QDomElement &el) {
836 int nThread = qMax(0, getEnv()->getVar("NUM_THREADS").toInt());
837 setMaxParallelSubtasks(nThread);
838
839 tpm = Task::Progress_SubTasksBased;
840
841 inDirName = el.attribute(IN_DIR_ATTR);
842 if (inDirName.isEmpty()) {
843 failMissingValue(IN_DIR_ATTR);
844 return;
845 }
846 patDirName = el.attribute(PAT_DIR_ATTR);
847 if (patDirName.isEmpty()) {
848 failMissingValue(PAT_DIR_ATTR);
849 return;
850 }
851 }
prepare()852 void GTest_uMusclePacketTest::prepare() {
853 QDir inDir(env->getVar("COMMON_DATA_DIR")+"/"+inDirName);
854 QDir refDir(env->getVar("COMMON_DATA_DIR")+"/"+patDirName);
855 QFileInfoList allFilesInfoList = inDir.entryInfoList();
856 QFileInfoList inFileInfoList;
857 QFileInfoList patFileInfoList;
858 if (allFilesInfoList.isEmpty()) {
859 stateInfo.setError( QString("no files in dir %1").arg(inDir.absolutePath()) );
860 return;
861 }
862
863 foreach(const QFileInfo &infoLisItem, allFilesInfoList) {
864 if(infoLisItem.isFile()) {
865 if(!infoLisItem.exists()) {
866 stateInfo.setError( QString("no not exist %1").arg(infoLisItem.absoluteFilePath()) );
867 }
868 QDir patternDir(env->getVar("COMMON_DATA_DIR")+"/"+patDirName);
869 QFileInfo patFile(patternDir.absolutePath() + "/" + infoLisItem.fileName());
870 if(!patFile.exists()) {
871 stateInfo.setError( QString("file not exist %1").arg(patFile.absoluteFilePath()) );
872 return;
873 }
874 inFileInfoList << infoLisItem;
875 patFileInfoList << patFile;
876 }
877 }
878
879 MuscleTaskSettings mSettings;
880 mSettings.stableMode = false; //default mode is 'group' like in MUSCLE
881 bool ok = false;
882 mSettings.nThreads = env->getVar(ENV_MUSCLE_N_THREADS).toInt(&ok);
883 if(!ok) {
884 stateInfo.setError(QString("Invalid test suite environment variable \"%1\"").arg(ENV_MUSCLE_N_THREADS));
885 return;
886 }
887
888 for (int i = 0; i < inFileInfoList.count(); i++) {
889 Task *task = new Muscle_Load_Align_Compare_Task(inFileInfoList[i].absoluteFilePath(),patFileInfoList[i].absoluteFilePath(),mSettings, "MusclePacketTestSubtask: "+inFileInfoList[i].fileName());
890 addSubTask(task);
891 }
892 timer.start();
893 }
894
report()895 Task::ReportResult GTest_uMusclePacketTest::report() {
896 propagateSubtaskError();
897 if (!hasError()) {
898 algoLog.trace(QString("uMusclePacketTest: \"%1\" accomplished. Time elapsed: %2 ms").arg(inDirName).arg(timer.elapsed()));
899 }
900 return ReportResult_Finished;
901 }
902
createTestFactories()903 QList<XMLTestFactory*> UMUSCLETests::createTestFactories() {
904 QList<XMLTestFactory*> res;
905 res.append(GTest_uMuscle::createFactory());
906 res.append(GTest_CompareMAlignment::createFactory());
907 res.append(GTest_uMuscleAddUnalignedSequenceToProfile::createFactory());
908 res.append(GTest_uMusclePacketTest::createFactory());
909 res.append(GTest_Muscle_Load_Align_Compare::createFactory());
910 res.append(GTest_Muscle_Load_Align_QScore::createFactory());
911 return res;
912 }
913
914 }//namespace
915