1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "SiteconAlgorithmTests.h"
23
24 #include <QDomElement>
25 #include <QFileInfo>
26
27 #include <U2Core/AppContext.h>
28 #include <U2Core/BaseDocumentFormats.h>
29 #include <U2Core/DNASequenceObject.h>
30 #include <U2Core/DocumentModel.h>
31 #include <U2Core/GObjectTypes.h>
32 #include <U2Core/GObjectUtils.h>
33 #include <U2Core/IOAdapter.h>
34 #include <U2Core/MultipleSequenceAlignmentObject.h>
35 #include <U2Core/U2SafePoints.h>
36
37 #include "DIPropertiesSitecon.h"
38 #include "SiteconIO.h"
39
40 /* TRANSLATOR U2::GTest */
41
42 namespace U2 {
43
44 #define DOC_ATTR "doc"
45 #define DOC_URL "url"
46 #define DOC1_ATTR "model1"
47 #define DOC2_ATTR "model2"
48 #define SEQNAME_ATTR "sequence"
49 #define EXPECTED_RESULTS_ATTR "expected_results"
50 #define DINUCLEOTIDE_POSITIONS "di_positions"
51 #define PROPERTIES_INDEXES "props_indexes"
52 #define OFFSET_ATTR "offset"
53 #define MODEL_ATTR "model"
54 #define STRAND_ATTR "strand"
55 #define TRESH_ATTR "treshhold"
56
init(XMLTestFormat *,const QDomElement & el)57 void GTest_CalculateACGTContent::init(XMLTestFormat *, const QDomElement &el) {
58 docName = el.attribute(DOC_ATTR);
59 if (docName.isEmpty()) {
60 failMissingValue(DOC_ATTR);
61 return;
62 }
63
64 QString expected = el.attribute(EXPECTED_RESULTS_ATTR);
65 QStringList expectedList = expected.split(QRegExp("\\,")); // may be QRegExp("\\,")
66 if (expectedList.size() != 4) {
67 stateInfo.setError(QString("here must be 4 items in %1").arg(EXPECTED_RESULTS_ATTR));
68 return;
69 }
70 int i = 0, sum = 0;
71 foreach (QString str, expectedList) {
72 bool isOk;
73 int m = str.toInt(&isOk);
74 if (!isOk) {
75 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(EXPECTED_RESULTS_ATTR));
76 return;
77 }
78 expectedACGT[i++] = m;
79 sum += m;
80 }
81 if (sum < 100 || sum > 102) {
82 stateInfo.setError(QString("Wrong %1 values").arg(EXPECTED_RESULTS_ATTR));
83 return;
84 }
85 }
86
prepare()87 void GTest_CalculateACGTContent::prepare() {
88 Document *doc = getContext<Document>(this, docName);
89 if (doc == nullptr) {
90 stateInfo.setError(QString("context not found %1").arg(docName));
91 return;
92 }
93 QList<GObject *> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
94 if (list.size() == 0) {
95 stateInfo.setError(QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
96 return;
97 }
98 GObject *obj = list.first();
99 if (obj == nullptr) {
100 stateInfo.setError(QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
101 return;
102 }
103 MultipleSequenceAlignmentObject *mao = qobject_cast<MultipleSequenceAlignmentObject *>(obj);
104 if (mao == nullptr) {
105 stateInfo.setError(QString("error can't cast to MultipleSequenceAlignmentObject from GObject"));
106 return;
107 }
108 ma = mao->getMsaCopy();
109 }
110
run()111 void GTest_CalculateACGTContent::run() {
112 SiteconAlgorithm::calculateACGTContent(ma, s);
113 }
114
report()115 Task::ReportResult GTest_CalculateACGTContent::report() {
116 for (int i = 0; i < 4; i++) {
117 if (expectedACGT[i] != s.acgtContent[i]) {
118 stateInfo.setError(QString("Actual results not equal with expected"));
119 return ReportResult_Finished;
120 }
121 }
122 return ReportResult_Finished;
123 }
124
init(XMLTestFormat *,const QDomElement & el)125 void GTest_CalculateDispersionAndAverage::init(XMLTestFormat *, const QDomElement &el) {
126 QStringList propsList = el.attribute(PROPERTIES_INDEXES).split(QRegExp("\\,")),
127 diPosStrList = el.attribute(DINUCLEOTIDE_POSITIONS).split(QRegExp("\\,")),
128 expectedStrList = el.attribute(EXPECTED_RESULTS_ATTR).split(QRegExp("\\,"));
129 QStringList::Iterator expResIt;
130 expResIt = expectedStrList.begin();
131 foreach (QString posStr, diPosStrList) {
132 bool isOk;
133 int pos = posStr.toInt(&isOk);
134 if (!isOk) {
135 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(DINUCLEOTIDE_POSITIONS));
136 return;
137 }
138 foreach (QString propStr, propsList) {
139 int propIndex = propStr.toInt(&isOk);
140 if (!isOk) {
141 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(PROPERTIES_INDEXES));
142 return;
143 }
144 ResultVector r;
145 r.push_back(pos);
146 r.push_back(propIndex);
147 int exp = qRound((*expResIt).toFloat(&isOk) * 10000);
148 if (!isOk) {
149 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(EXPECTED_RESULTS_ATTR));
150 return;
151 }
152 r.push_back(exp);
153 if (expResIt == expectedStrList.end()) {
154 stateInfo.setError(QString("Too less items in %1").arg(EXPECTED_RESULTS_ATTR));
155 return;
156 }
157 expResIt++;
158 exp = qRound((*expResIt).toFloat(&isOk) * 10000);
159 if (!isOk) {
160 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(EXPECTED_RESULTS_ATTR));
161 return;
162 }
163 r.push_back(exp);
164 expectedResults.push_back(ResultVector(r));
165 if (expResIt == expectedStrList.end()) {
166 stateInfo.setError(QString("Too less items in %1").arg(EXPECTED_RESULTS_ATTR));
167 return;
168 }
169 expResIt++;
170 }
171 }
172
173 docName = el.attribute(DOC_ATTR);
174 if (docName.isEmpty()) {
175 failMissingValue(DOC_ATTR);
176 return;
177 }
178 }
179
prepare()180 void GTest_CalculateDispersionAndAverage::prepare() {
181 Document *doc = getContext<Document>(this, docName);
182 if (doc == nullptr) {
183 stateInfo.setError(QString("context not found %1").arg(docName));
184 return;
185 }
186 QList<GObject *> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
187 if (list.size() == 0) {
188 stateInfo.setError(QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
189 return;
190 }
191 GObject *obj = list.first();
192 if (obj == nullptr) {
193 stateInfo.setError(QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
194 return;
195 }
196 MultipleSequenceAlignmentObject *mao = qobject_cast<MultipleSequenceAlignmentObject *>(obj);
197 if (mao == nullptr) {
198 stateInfo.setError(QString("error can't cast to MultipleSequenceAlignmentObject from GObject"));
199 return;
200 }
201 ma = mao->getMsaCopy();
202 }
203
run()204 void GTest_CalculateDispersionAndAverage::run() {
205 DinucleotitePropertyRegistry di;
206 s.props = di.getProperties();
207 SiteconAlgorithm::calculateACGTContent(ma, s);
208 s.numSequencesInAlignment = ma->getNumRows();
209 TaskStateInfo stub;
210 result = SiteconAlgorithm::calculateDispersionAndAverage(ma, s, stub);
211 }
212
report()213 Task::ReportResult GTest_CalculateDispersionAndAverage::report() {
214 foreach (ResultVector rv, expectedResults) {
215 int i = rv[0];
216 int j = rv[1];
217 PositionStats vec = result[i];
218 DiStat stat = vec[j];
219 int sdev = qRound(stat.sdeviation * 10000),
220 average = qRound(stat.average * 10000),
221 expAve = rv[2],
222 expSdev = rv[3];
223 if (sdev != expSdev) {
224 stateInfo.setError(QString("Expected and Actual 'SDev' values are different: %1 %2").arg(expSdev / 10000).arg(sdev / 10000));
225 return ReportResult_Finished;
226 }
227 if (average != expAve) {
228 stateInfo.setError(QString("Expected and Actual 'Average' values are different: %1 %2").arg(expAve / 10000).arg(average / 10000));
229 return ReportResult_Finished;
230 }
231 }
232 return ReportResult_Finished;
233 }
234
init(XMLTestFormat *,const QDomElement & el)235 void GTest_CalculateFirstTypeError::init(XMLTestFormat *, const QDomElement &el) {
236 docName = el.attribute(DOC_ATTR);
237 if (docName.isEmpty()) {
238 failMissingValue(DOC_ATTR);
239 return;
240 }
241
242 QString windowSizeStr = el.attribute(OFFSET_ATTR);
243 if (docName.isEmpty()) {
244 failMissingValue(OFFSET_ATTR);
245 return;
246 }
247 bool isOk;
248 offset = windowSizeStr.toInt(&isOk);
249 if (!isOk) {
250 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(OFFSET_ATTR));
251 return;
252 }
253
254 QStringList expectedStrList = el.attribute(EXPECTED_RESULTS_ATTR).split(QRegExp("\\,"));
255 foreach (QString str, expectedStrList) {
256 int exp = qRound(str.toFloat(&isOk) * 10000);
257 if (!isOk) {
258 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(EXPECTED_RESULTS_ATTR));
259 return;
260 }
261 expectedResult.push_back(exp);
262 }
263 }
264
prepare()265 void GTest_CalculateFirstTypeError::prepare() {
266 Document *doc = getContext<Document>(this, docName);
267 if (doc == nullptr) {
268 stateInfo.setError(QString("context not found %1").arg(docName));
269 return;
270 }
271 QList<GObject *> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
272 if (list.size() == 0) {
273 stateInfo.setError(QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
274 return;
275 }
276 GObject *obj = list.first();
277 if (obj == nullptr) {
278 stateInfo.setError(QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
279 return;
280 }
281 MultipleSequenceAlignmentObject *mao = qobject_cast<MultipleSequenceAlignmentObject *>(obj);
282 if (mao == nullptr) {
283 stateInfo.setError(QString("error can't cast to MultipleSequenceAlignmentObject from GObject"));
284 return;
285 }
286 ma = mao->getMsaCopy();
287 }
288
run()289 void GTest_CalculateFirstTypeError::run() {
290 DinucleotitePropertyRegistry di;
291 s.props = di.getProperties();
292 SiteconAlgorithm::calculateACGTContent(ma, s);
293 s.numSequencesInAlignment = ma->getNumRows();
294 s.windowSize = ma->getLength();
295 TaskStateInfo stub;
296 result = SiteconAlgorithm::calculateFirstTypeError(ma, s, stub);
297 }
298
report()299 Task::ReportResult GTest_CalculateFirstTypeError::report() {
300 int i = offset + 1;
301 foreach (int exp, expectedResult) {
302 int act = qRound(result[i] * 10000);
303 // printf("Expected: %i", exp);
304 // printf(" Actual: %i \r\n", act);
305 if (act != exp) {
306 stateInfo.setError(QString("Expected and Actual values are different: %1 %2").arg(exp).arg(act));
307 return ReportResult_Finished;
308 }
309 i++;
310 }
311 return ReportResult_Finished;
312 }
313
init(XMLTestFormat *,const QDomElement & el)314 void GTest_CalculateSecondTypeError::init(XMLTestFormat *, const QDomElement &el) {
315 docName = el.attribute(DOC_ATTR);
316 if (docName.isEmpty()) {
317 failMissingValue(DOC_ATTR);
318 return;
319 }
320
321 QString windowSizeStr = el.attribute(OFFSET_ATTR);
322 if (docName.isEmpty()) {
323 failMissingValue(OFFSET_ATTR);
324 return;
325 }
326 bool isOk;
327 offset = windowSizeStr.toInt(&isOk);
328 if (!isOk) {
329 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(OFFSET_ATTR));
330 return;
331 }
332
333 QStringList expectedStrList = el.attribute(EXPECTED_RESULTS_ATTR).split(QRegExp("\\,"));
334 foreach (QString str, expectedStrList) {
335 int exp = str.toInt(&isOk);
336 if (!isOk) {
337 stateInfo.setError(QString("Wrong conversion to the integer for one of the %1").arg(EXPECTED_RESULTS_ATTR));
338 return;
339 }
340 expectedResult.push_back(exp);
341 }
342 }
343
prepare()344 void GTest_CalculateSecondTypeError::prepare() {
345 Document *doc = getContext<Document>(this, docName);
346 if (doc == nullptr) {
347 stateInfo.setError(QString("context not found %1").arg(docName));
348 return;
349 }
350 QList<GObject *> list = doc->findGObjectByType(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
351 if (list.size() == 0) {
352 stateInfo.setError(QString("container of object with type \"%1\" is empty").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
353 return;
354 }
355 GObject *obj = list.first();
356 if (obj == nullptr) {
357 stateInfo.setError(QString("object with type \"%1\" not found").arg(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT));
358 return;
359 }
360 MultipleSequenceAlignmentObject *mao = qobject_cast<MultipleSequenceAlignmentObject *>(obj);
361 if (mao == nullptr) {
362 stateInfo.setError(QString("error can't cast to MultipleSequenceAlignmentObject from GObject"));
363 return;
364 }
365 ma = mao->getMsaCopy();
366 }
367
run()368 void GTest_CalculateSecondTypeError::run() {
369 DinucleotitePropertyRegistry di;
370 s.props = di.getProperties();
371 SiteconAlgorithm::calculateACGTContent(ma, s);
372 s.numSequencesInAlignment = ma->getNumRows();
373 s.windowSize = ma->getLength();
374 SiteconModel m;
375 m.aliURL = (getContext<Document>(this, docName))->getURLString();
376 m.modelName = QFileInfo(m.aliURL).baseName();
377 m.settings = s;
378 m.matrix = SiteconAlgorithm::calculateDispersionAndAverage(ma, s, stateInfo);
379 SiteconAlgorithm::calculateWeights(ma, m.matrix, m.settings, false, stateInfo);
380 TaskStateInfo stub1;
381 m.err1 = SiteconAlgorithm::calculateFirstTypeError(ma, s, stub1);
382 TaskStateInfo stub2;
383 result = SiteconAlgorithm::calculateSecondTypeError(m.matrix, s, stub2);
384 }
385
report()386 Task::ReportResult GTest_CalculateSecondTypeError::report() {
387 int i = offset + 1;
388 foreach (int exp, expectedResult) {
389 int act = qRound(1 / result[i]);
390 printf("Expected: %i", exp);
391 printf(" Actual: %i \r\n", act);
392 if (act != exp) {
393 stateInfo.setError(QString("Expected and Actual values are different: %1 %2").arg(exp).arg(act));
394 return ReportResult_Finished;
395 }
396 i++;
397 }
398 return ReportResult_Finished;
399 }
400
init(XMLTestFormat *,const QDomElement & el)401 void GTest_SiteconSearchTask::init(XMLTestFormat *, const QDomElement &el) {
402 seqName = el.attribute(SEQNAME_ATTR);
403 if (seqName.isEmpty()) {
404 failMissingValue(SEQNAME_ATTR);
405 return;
406 }
407
408 QString modelPath = el.attribute(MODEL_ATTR);
409 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
410 QString url = env->getVar("COMMON_DATA_DIR") + "/" + modelPath;
411 model = SiteconIO::readModel(iof, url, stateInfo);
412
413 QString strandStr = el.attribute(STRAND_ATTR);
414 if (strandStr.isEmpty()) {
415 failMissingValue(STRAND_ATTR);
416 return;
417 }
418 if (strandStr == "direct") {
419 complOnly = false;
420 isNeedCompliment = false;
421 } else if (strandStr == "compliment") {
422 complOnly = true;
423 isNeedCompliment = true;
424 } else if (strandStr == "both") {
425 complOnly = false;
426 isNeedCompliment = true;
427 } else {
428 stateInfo.setError(QString("%1 has incorrect value").arg(STRAND_ATTR));
429 return;
430 }
431
432 bool isOk;
433 QString tre = el.attribute(TRESH_ATTR);
434 if (tre.isEmpty()) {
435 failMissingValue(TRESH_ATTR);
436 return;
437 }
438 tresh = tre.toInt(&isOk);
439 if (!isOk) {
440 stateInfo.setError(QString("unable to convert %1 to integer").arg(TRESH_ATTR));
441 return;
442 }
443
444 QString expected = el.attribute(EXPECTED_RESULTS_ATTR);
445 if (!expected.isEmpty()) {
446 QStringList expectedList = expected.split(QRegExp("\\;"));
447 foreach (QString propsArray, expectedList) {
448 QStringList props = propsArray.split(QRegExp("\\,"));
449 QString middleStr = props[0], scoreStr = props[1], strStr = props[2];
450 int middle = middleStr.toInt(&isOk);
451 if (!isOk) {
452 stateInfo.setError(QString("unable to convert %1 to integer").arg(EXPECTED_RESULTS_ATTR));
453 return;
454 }
455 U2Strand strand;
456 U2Region reg;
457 reg.length = model.settings.windowSize;
458 if (strStr == "direct") {
459 strand = U2Strand::Direct;
460 reg.startPos = middle - (int)(model.settings.windowSize / 2);
461 } else if (strStr == "compliment") {
462 strand = U2Strand::Complementary;
463 reg.startPos = middle - (int)(model.settings.windowSize / 2) + 1;
464 /*
465 if(model.settings.weightAlg == SiteconWeightAlg_Alg2){
466 reg.startPos++;
467 }
468 */
469 } else {
470 stateInfo.setError(QString("%1 has incorrect value").arg(STRAND_ATTR));
471 return;
472 }
473 float psum = scoreStr.toFloat(&isOk);
474 if (!isOk) {
475 stateInfo.setError(QString("unable to convert %1 to float").arg(EXPECTED_RESULTS_ATTR));
476 return;
477 }
478 SiteconSearchResult ssr;
479 ssr.psum = psum;
480 ssr.region = reg;
481 ssr.strand = strand;
482 expectedResults.append(ssr);
483 }
484 }
485 }
486
prepare()487 void GTest_SiteconSearchTask::prepare() {
488 U2SequenceObject *mySequence = getContext<U2SequenceObject>(this, seqName);
489 CHECK_EXT(mySequence != nullptr, setError(QString("error can't cast to sequence from GObject")), );
490
491 SiteconSearchCfg cfg;
492 cfg.complOnly = complOnly;
493 cfg.minPSUM = tresh;
494 if (isNeedCompliment) {
495 cfg.complTT = GObjectUtils::findComplementTT(mySequence->getAlphabet());
496 }
497 QByteArray seqData = mySequence->getWholeSequenceData(stateInfo);
498 CHECK_OP(stateInfo, );
499 task = new SiteconSearchTask(model, seqData, cfg, 0);
500 addSubTask(task);
501 }
502
report()503 Task::ReportResult GTest_SiteconSearchTask::report() {
504 results = task->takeResults();
505 int matchesCount = 0;
506 /*
507 printf("Actual: \r\n");
508 foreach(SiteconSearchResult r, results) {
509 printf("%i ", r.region.startPos + (int)(model.settings.windowSize/2));
510 if(r.complement){
511 printf("C \r\n");
512 }else{
513 printf("D \r\n");
514 }
515 }
516 printf("Expected: \r\n");
517 foreach(SiteconSearchResult r, expectedResults) {
518 printf("%i ", r.region.startPos + (int)(model.settings.windowSize/2));
519 if(r.complement){
520 printf("C \r\n");
521 }else{
522 printf("D \r\n");
523 }
524 }
525 */
526 if (results.size() != expectedResults.size()) {
527 stateInfo.setError(QString("expected and equal result lists not equal by size, expected: %1, actual: %2").arg(expectedResults.size()).arg(results.size()));
528 return ReportResult_Finished;
529 }
530 /**/
531 foreach (SiteconSearchResult exp, expectedResults) {
532 foreach (SiteconSearchResult act, results) {
533 int ePsum = qRound(exp.psum * 10), aPsum = qRound(act.psum * 10);
534 if (exp.region == act.region && aPsum == ePsum && exp.strand == act.strand) {
535 matchesCount++;
536 }
537 }
538 }
539 if (matchesCount != expectedResults.size()) {
540 stateInfo.setError(QString("expected and equal result lists not equal"));
541 return ReportResult_Finished;
542 }
543 return ReportResult_Finished;
544 }
545
init(XMLTestFormat *,const QDomElement & el)546 void GTest_CompareSiteconModels::init(XMLTestFormat *, const QDomElement &el) {
547 doc1ContextName = el.attribute(DOC1_ATTR);
548 if (doc1ContextName.isEmpty()) {
549 failMissingValue(DOC1_ATTR);
550 return;
551 }
552
553 doc2ContextName = el.attribute(DOC2_ATTR);
554 if (doc2ContextName.isEmpty()) {
555 failMissingValue(DOC2_ATTR);
556 return;
557 }
558 }
559
report()560 Task::ReportResult GTest_CompareSiteconModels::report() {
561 // SiteconModel model1 = getContext<SiteconModel>(doc1ContextName);
562 // SiteconModel model2 = getContext<SiteconModel>(doc2ContextName);
563 Document *doc1 = getContext<Document>(this, doc1ContextName);
564 if (doc1 == nullptr) {
565 stateInfo.setError(QString("document not found %1").arg(doc1ContextName));
566 return ReportResult_Finished;
567 }
568 Document *doc2 = getContext<Document>(this, doc2ContextName);
569 if (doc2 == nullptr) {
570 stateInfo.setError(QString("document not found %1").arg(doc2ContextName));
571 return ReportResult_Finished;
572 }
573 SiteconModel model1 = SiteconIO::readModel(doc1->getIOAdapterFactory(), doc1->getURLString(), stateInfo);
574 SiteconModel model2 = SiteconIO::readModel(doc2->getIOAdapterFactory(), doc2->getURLString(), stateInfo);
575 if (model1 != model2) {
576 stateInfo.setError(tr("Models not equal"));
577 }
578 return ReportResult_Finished;
579 }
580
581 } // namespace U2
582