1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "SmithWatermanTests.h"
23 
24 #include <U2Algorithm/SmithWatermanSettings.h>
25 #include <U2Algorithm/SmithWatermanTaskFactoryRegistry.h>
26 #include <U2Algorithm/SubstMatrixRegistry.h>
27 
28 #include <U2Core/AppContext.h>
29 #include <U2Core/DNASequenceObject.h>
30 #include <U2Core/SMatrix.h>
31 #include <U2Core/SequenceWalkerTask.h>
32 #include <U2Core/U2SafePoints.h>
33 
34 #define FILE_SUBSTITUTION_MATRIX_ATTR "subst_f"
35 #define FILE_FASTA_CONTAIN_SEQUENCE_ATTR "seq_f"
36 #define FILE_FASTA_CONTAIN_PATTERN_ATTR "pattern_f"
37 #define GAP_OPEN_ATTR "g_o"
38 #define GAP_EXT_ATTR "g_e"
39 #define PERCENT_OF_SCORE_ATTR "percent_of_score"
40 #define EXPECTED_RESULT_ATTR "expected_res"
41 #define ENV_IMPL_ATTR "IMPL"
42 #define IMPL_ATTR "impl"
43 
44 using namespace std;
45 namespace U2 {
46 
sortByScore(QList<SmithWatermanResult> & resultsForSort)47 void GTest_SmithWatermnan::sortByScore(QList<SmithWatermanResult> &resultsForSort) {
48     for (int i = 0; i < resultsForSort.size(); i++) {
49         for (int j = i + 1; j < resultsForSort.size(); j++) {
50             if (resultsForSort.at(i).score < resultsForSort.at(j).score) {
51                 SmithWatermanResult buf = resultsForSort.at(i);
52                 resultsForSort[i] = resultsForSort.at(j);
53                 resultsForSort[j] = buf;
54             }
55             if (resultsForSort.at(i).score == resultsForSort.at(j).score &&
56                 resultsForSort.at(i).refSubseq.startPos > resultsForSort.at(j).refSubseq.startPos) {
57                 SmithWatermanResult buf = resultsForSort.at(i);
58                 resultsForSort[i] = resultsForSort.at(j);
59                 resultsForSort[j] = buf;
60             }
61             if (resultsForSort.at(i).score == resultsForSort.at(j).score &&
62                 resultsForSort.at(i).refSubseq.startPos == resultsForSort.at(j).refSubseq.startPos &&
63                 resultsForSort.at(i).refSubseq.length > resultsForSort.at(j).refSubseq.length) {
64                 SmithWatermanResult buf = resultsForSort.at(i);
65                 resultsForSort[i] = resultsForSort.at(j);
66                 resultsForSort[j] = buf;
67             }
68         }
69     }
70 }
71 
init(XMLTestFormat *,const QDomElement & el)72 void GTest_SmithWatermnan::init(XMLTestFormat *, const QDomElement &el) {
73     searchSeqDocName = el.attribute(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
74     if (searchSeqDocName.isEmpty()) {
75         failMissingValue(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
76         return;
77     }
78 
79     patternSeqDocName = el.attribute(FILE_FASTA_CONTAIN_PATTERN_ATTR);
80     if (patternSeqDocName.isEmpty()) {
81         failMissingValue(FILE_FASTA_CONTAIN_PATTERN_ATTR);
82         return;
83     }
84 
85     pathToSubst = el.attribute(FILE_SUBSTITUTION_MATRIX_ATTR);
86     if (pathToSubst.isEmpty()) {
87         failMissingValue(FILE_SUBSTITUTION_MATRIX_ATTR);
88         return;
89     }
90 
91     QString buffer = el.attribute(GAP_OPEN_ATTR);
92     bool ok = false;
93 
94     if (!buffer.isEmpty()) {
95         ok = false;
96         gapOpen = buffer.toInt(&ok);
97         if (!ok) {
98             failMissingValue(GAP_OPEN_ATTR);
99             return;
100         }
101     }
102 
103     buffer = el.attribute(GAP_EXT_ATTR);
104     if (!buffer.isEmpty()) {
105         ok = false;
106         gapExtension = buffer.toInt(&ok);
107         if (!ok) {
108             failMissingValue(GAP_EXT_ATTR);
109             return;
110         }
111     }
112 
113     buffer = el.attribute(PERCENT_OF_SCORE_ATTR);
114     if (!buffer.isEmpty()) {
115         ok = false;
116         percentOfScore = buffer.toFloat(&ok);
117         if (!ok) {
118             failMissingValue(PERCENT_OF_SCORE_ATTR);
119             return;
120         }
121     }
122 
123     expected_res = el.attribute(EXPECTED_RESULT_ATTR);
124     if (expected_res.isEmpty()) {
125         failMissingValue(EXPECTED_RESULT_ATTR);
126         return;
127     }
128 
129     if (!parseExpected_res()) {
130         stateInfo.setError(QString("value not correct %1").arg(EXPECTED_RESULT_ATTR));
131         return;
132     }
133 
134     impl = env->getVar(ENV_IMPL_ATTR);
135     if (impl.isEmpty()) {
136         failMissingValue(ENV_IMPL_ATTR);
137         return;
138     }
139 }
140 
prepare()141 void GTest_SmithWatermnan::prepare() {
142     // get search sequence
143     U2SequenceObject *searchSeqObj = getContext<U2SequenceObject>(this, searchSeqDocName);
144     if (searchSeqObj == nullptr) {
145         stateInfo.setError(QString("error can't cast to sequence from GObject"));
146         return;
147     }
148     searchSeq = searchSeqObj->getWholeSequenceData(stateInfo);
149     CHECK_OP(stateInfo, );
150 
151     // get pattern sequence
152     U2SequenceObject *patternSeqObj = getContext<U2SequenceObject>(this, patternSeqDocName);
153     if (patternSeqObj == nullptr) {
154         stateInfo.setError(QString("error can't cast to sequence from GObject"));
155         return;
156     }
157     patternSeq = patternSeqObj->getWholeSequenceData(stateInfo);
158     CHECK_OP(stateInfo, );
159 
160     // set subst matrix
161 
162     QString pathToCommonData = getEnv()->getVar("COMMON_DATA_DIR");
163     if (patternSeqObj == nullptr) {
164         stateInfo.setError(QString("error can't get path to common_data dir"));
165         return;
166     }
167     QString fullPathToSubst = pathToCommonData + "/" + pathToSubst;
168 
169     QString error;
170     SMatrix mtx = SubstMatrixRegistry::readMatrixFromFile(fullPathToSubst, error);
171     if (mtx.isEmpty()) {
172         stateInfo.setError(QString("value not set %1").arg(FILE_SUBSTITUTION_MATRIX_ATTR));
173         return;
174     }
175 
176     s.pSm = mtx;
177     s.sqnc = searchSeq;
178     s.ptrn = patternSeq;
179     s.globalRegion.startPos = 0;
180     s.globalRegion.length = searchSeq.length();
181     s.gapModel.scoreGapOpen = gapOpen;
182     s.gapModel.scoreGapExtd = gapExtension;
183     s.percentOfScore = percentOfScore;
184     s.aminoTT = nullptr;
185     s.complTT = nullptr;
186     s.strand = StrandOption_DirectOnly;
187     s.resultCallback = nullptr;
188     s.resultListener = nullptr;
189     s.resultFilter = 0;
190 
191     if (!machinePath.isEmpty()) { /* run smith-waterman on remote machine */
192         // TODO: BUG-001870
193         assert(0);
194         //         SmithWatermanLocalTaskSettings localTaskSettings( s );
195         //         RemoteMachine * machine = NULL;
196         //         if( !SerializeUtils::deserializeRemoteMachineSettingsFromFile( machinePath, &machine ) ) {
197         //             setError( QString( "Cannot create remote machine from '%1'" ).arg( machinePath ) );
198         //             return;
199         //         }
200         //         assert( NULL != machine );
201         //         swAlgorithmTask = new RemoteTask( SmithWatermanLocalTaskFactory::ID, localTaskSettings, machine );
202     } else { /* run on local machine */
203         s.resultListener = new SmithWatermanResultListener();
204         if (0 != AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)) {
205             swAlgorithmTask = (Task *)AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)->getTaskInstance(s, "tests SmithWaterman");
206         } else {
207             stateInfo.setError(QString("Not known impl of Smith-Waterman: %1").arg(impl));
208             return;
209         }
210     }
211     addSubTask(swAlgorithmTask);
212 }
213 
parseExpected_res()214 bool GTest_SmithWatermnan::parseExpected_res() {
215     SWresult swRes;
216     QStringList expectedList = expected_res.split(tr("**"));
217 
218     foreach (QString res, expectedList) {
219         QStringList resValues = res.split(tr(","));
220         if (resValues.size() != 2) {
221             stateInfo.setError(QString("wrong count values in expected result: %1").arg(resValues.size()));
222             return false;
223         }
224 
225         //////// first enterval
226         QStringList bounds = resValues.at(1).split(tr(".."));
227         if (bounds.size() != 2) {
228             stateInfo.setError(QString("wrong region in expected result %1").arg(resValues.at(1)));
229             return false;
230         }
231         bool startOk, finishOk;
232         int start = bounds.first().toInt(&startOk);
233         int finish = bounds.last().toInt(&finishOk);
234         if (startOk && finishOk != true) {
235             stateInfo.setError(QString("wrong region in expected result %1").arg(resValues.at(1)));
236             return false;
237         }
238         swRes.sInterval.startPos = start;
239         swRes.sInterval.length = finish - start;
240 
241         start = resValues.at(0).toInt(&startOk);
242         if (startOk != true) {
243             stateInfo.setError(QString("wrong scorein expected result %1").arg(resValues.at(0)));
244             return false;
245         }
246         swRes.score = start;
247 
248         expectedRes.append(swRes);
249     }
250     return true;
251 }
252 
toInt(QString & str,int & num)253 bool GTest_SmithWatermnan::toInt(QString &str, int &num) {
254     bool ok = false;
255     if (!str.isEmpty()) {
256         num = str.toInt(&ok);
257     }
258     return ok;
259 }
260 
report()261 Task::ReportResult GTest_SmithWatermnan::report() {
262     propagateSubtaskError();
263     if (hasError()) {
264         return ReportResult_Finished;
265     }
266 
267     QList<SmithWatermanResult> resultList;
268     if (!machinePath.isEmpty()) { /* remote task used */
269         // TODO: BUG-0001870
270         //          RemoteTask * remoteSW = qobject_cast<RemoteTask*>( swAlgorithmTask );
271         //          assert( NULL != remoteSW );
272         //          SmithWatermanLocalTaskResult * result = dynamic_cast<SmithWatermanLocalTaskResult*>( remoteSW->getResult() );
273         //          assert( NULL != result );
274         //          resultList = result->getResult();
275     } else { /* task on local machine */
276         resultList = s.resultListener->popResults();
277     }
278     sortByScore(resultList);
279 
280     if (expectedRes.size() != resultList.size()) {
281         stateInfo.setError(QString("Not expected result: count result not coincide, expected: %1, current: %2")
282                                .arg(expectedRes.size())
283                                .arg(resultList.size()));
284         return ReportResult_Finished;
285     }
286 
287     for (int i = 0; i < resultList.size(); i++) {
288         if (expectedRes.at(i).score != resultList.at(i).score ||
289             expectedRes.at(i).sInterval != resultList.at(i).refSubseq) {
290             stateInfo.setError(QString("Not expected result"));
291             return ReportResult_Finished;
292         }
293     }
294 
295     return ReportResult_Finished;
296 }
297 
init(XMLTestFormat *,const QDomElement & el)298 void GTest_SmithWatermnanPerf::init(XMLTestFormat *, const QDomElement &el) {
299     searchSeqDocName = el.attribute(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
300     if (searchSeqDocName.isEmpty()) {
301         failMissingValue(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
302         return;
303     }
304 
305     patternSeqDocName = el.attribute(FILE_FASTA_CONTAIN_PATTERN_ATTR);
306     if (patternSeqDocName.isEmpty()) {
307         failMissingValue(FILE_FASTA_CONTAIN_PATTERN_ATTR);
308         return;
309     }
310 
311     impl = el.attribute(IMPL_ATTR);
312     if (patternSeqDocName.isEmpty()) {
313         failMissingValue(IMPL_ATTR);
314         return;
315     }
316 
317     pathToSubst = "smith_waterman2/blosum62.txt";
318     gapOpen = -1;
319     gapExtension = -1;
320     percentOfScore = 100;
321 }
322 
prepare()323 void GTest_SmithWatermnanPerf::prepare() {
324     // get search sequence
325     U2SequenceObject *searchSeqObj = getContext<U2SequenceObject>(this, searchSeqDocName);
326     if (searchSeqObj == nullptr) {
327         stateInfo.setError(QString("error can't cast to sequence from GObject"));
328         return;
329     }
330     searchSeq = searchSeqObj->getWholeSequenceData(stateInfo);
331     CHECK_OP(stateInfo, );
332 
333     // get pattern sequence
334     U2SequenceObject *patternSeqObj = getContext<U2SequenceObject>(this, patternSeqDocName);
335     if (patternSeqObj == nullptr) {
336         stateInfo.setError(QString("error can't cast to sequence from GObject"));
337         return;
338     }
339     patternSeq = patternSeqObj->getWholeSequenceData(stateInfo);
340     CHECK_OP(stateInfo, );
341 
342     setTaskName(QString("Test seq size %1").arg(patternSeq.size()));
343 
344     // set subst matrix
345 
346     QString pathToCommonData = getEnv()->getVar("COMMON_DATA_DIR");
347     if (patternSeqObj == nullptr) {
348         stateInfo.setError(QString("error can't get path to common_data dir"));
349         return;
350     }
351     QString fullPathToSubst = pathToCommonData + "/" + pathToSubst;
352 
353     QString error;
354     SMatrix mtx = SubstMatrixRegistry::readMatrixFromFile(fullPathToSubst, error);
355     if (mtx.isEmpty()) {
356         stateInfo.setError(QString("value not set %1").arg(FILE_SUBSTITUTION_MATRIX_ATTR));
357         return;
358     }
359 
360     s.pSm = mtx;
361     s.sqnc = searchSeq;
362     s.ptrn = patternSeq;
363     s.globalRegion.startPos = 0;
364     s.globalRegion.length = searchSeq.length();
365     s.gapModel.scoreGapOpen = gapOpen;
366     s.gapModel.scoreGapExtd = gapExtension;
367     s.percentOfScore = percentOfScore;
368     s.aminoTT = nullptr;
369     s.complTT = nullptr;
370     s.strand = StrandOption_DirectOnly;
371     s.resultCallback = nullptr;
372     s.resultListener = nullptr;
373     s.resultFilter = 0;
374     s.resultListener = new SmithWatermanResultListener();
375     if (0 != AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)) {
376         swAlgorithmTask = (Task *)AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)->getTaskInstance(s, "test SW performance");
377     } else {
378         stateInfo.setError(QString("Not known impl of Smith-Waterman: %1").arg(impl));
379         return;
380     }
381     addSubTask(swAlgorithmTask);
382 }
383 
report()384 Task::ReportResult GTest_SmithWatermnanPerf::report() {
385     propagateSubtaskError();
386     if (hasError()) {
387         return ReportResult_Finished;
388     }
389 
390     return ReportResult_Finished;
391 }
392 
393 }  // namespace U2
394