1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "SmithWatermanTests.h"
23
24 #include <U2Algorithm/SmithWatermanSettings.h>
25 #include <U2Algorithm/SmithWatermanTaskFactoryRegistry.h>
26 #include <U2Algorithm/SubstMatrixRegistry.h>
27
28 #include <U2Core/AppContext.h>
29 #include <U2Core/DNASequenceObject.h>
30 #include <U2Core/SMatrix.h>
31 #include <U2Core/SequenceWalkerTask.h>
32 #include <U2Core/U2SafePoints.h>
33
34 #define FILE_SUBSTITUTION_MATRIX_ATTR "subst_f"
35 #define FILE_FASTA_CONTAIN_SEQUENCE_ATTR "seq_f"
36 #define FILE_FASTA_CONTAIN_PATTERN_ATTR "pattern_f"
37 #define GAP_OPEN_ATTR "g_o"
38 #define GAP_EXT_ATTR "g_e"
39 #define PERCENT_OF_SCORE_ATTR "percent_of_score"
40 #define EXPECTED_RESULT_ATTR "expected_res"
41 #define ENV_IMPL_ATTR "IMPL"
42 #define IMPL_ATTR "impl"
43
44 using namespace std;
45 namespace U2 {
46
sortByScore(QList<SmithWatermanResult> & resultsForSort)47 void GTest_SmithWatermnan::sortByScore(QList<SmithWatermanResult> &resultsForSort) {
48 for (int i = 0; i < resultsForSort.size(); i++) {
49 for (int j = i + 1; j < resultsForSort.size(); j++) {
50 if (resultsForSort.at(i).score < resultsForSort.at(j).score) {
51 SmithWatermanResult buf = resultsForSort.at(i);
52 resultsForSort[i] = resultsForSort.at(j);
53 resultsForSort[j] = buf;
54 }
55 if (resultsForSort.at(i).score == resultsForSort.at(j).score &&
56 resultsForSort.at(i).refSubseq.startPos > resultsForSort.at(j).refSubseq.startPos) {
57 SmithWatermanResult buf = resultsForSort.at(i);
58 resultsForSort[i] = resultsForSort.at(j);
59 resultsForSort[j] = buf;
60 }
61 if (resultsForSort.at(i).score == resultsForSort.at(j).score &&
62 resultsForSort.at(i).refSubseq.startPos == resultsForSort.at(j).refSubseq.startPos &&
63 resultsForSort.at(i).refSubseq.length > resultsForSort.at(j).refSubseq.length) {
64 SmithWatermanResult buf = resultsForSort.at(i);
65 resultsForSort[i] = resultsForSort.at(j);
66 resultsForSort[j] = buf;
67 }
68 }
69 }
70 }
71
init(XMLTestFormat *,const QDomElement & el)72 void GTest_SmithWatermnan::init(XMLTestFormat *, const QDomElement &el) {
73 searchSeqDocName = el.attribute(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
74 if (searchSeqDocName.isEmpty()) {
75 failMissingValue(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
76 return;
77 }
78
79 patternSeqDocName = el.attribute(FILE_FASTA_CONTAIN_PATTERN_ATTR);
80 if (patternSeqDocName.isEmpty()) {
81 failMissingValue(FILE_FASTA_CONTAIN_PATTERN_ATTR);
82 return;
83 }
84
85 pathToSubst = el.attribute(FILE_SUBSTITUTION_MATRIX_ATTR);
86 if (pathToSubst.isEmpty()) {
87 failMissingValue(FILE_SUBSTITUTION_MATRIX_ATTR);
88 return;
89 }
90
91 QString buffer = el.attribute(GAP_OPEN_ATTR);
92 bool ok = false;
93
94 if (!buffer.isEmpty()) {
95 ok = false;
96 gapOpen = buffer.toInt(&ok);
97 if (!ok) {
98 failMissingValue(GAP_OPEN_ATTR);
99 return;
100 }
101 }
102
103 buffer = el.attribute(GAP_EXT_ATTR);
104 if (!buffer.isEmpty()) {
105 ok = false;
106 gapExtension = buffer.toInt(&ok);
107 if (!ok) {
108 failMissingValue(GAP_EXT_ATTR);
109 return;
110 }
111 }
112
113 buffer = el.attribute(PERCENT_OF_SCORE_ATTR);
114 if (!buffer.isEmpty()) {
115 ok = false;
116 percentOfScore = buffer.toFloat(&ok);
117 if (!ok) {
118 failMissingValue(PERCENT_OF_SCORE_ATTR);
119 return;
120 }
121 }
122
123 expected_res = el.attribute(EXPECTED_RESULT_ATTR);
124 if (expected_res.isEmpty()) {
125 failMissingValue(EXPECTED_RESULT_ATTR);
126 return;
127 }
128
129 if (!parseExpected_res()) {
130 stateInfo.setError(QString("value not correct %1").arg(EXPECTED_RESULT_ATTR));
131 return;
132 }
133
134 impl = env->getVar(ENV_IMPL_ATTR);
135 if (impl.isEmpty()) {
136 failMissingValue(ENV_IMPL_ATTR);
137 return;
138 }
139 }
140
prepare()141 void GTest_SmithWatermnan::prepare() {
142 // get search sequence
143 U2SequenceObject *searchSeqObj = getContext<U2SequenceObject>(this, searchSeqDocName);
144 if (searchSeqObj == nullptr) {
145 stateInfo.setError(QString("error can't cast to sequence from GObject"));
146 return;
147 }
148 searchSeq = searchSeqObj->getWholeSequenceData(stateInfo);
149 CHECK_OP(stateInfo, );
150
151 // get pattern sequence
152 U2SequenceObject *patternSeqObj = getContext<U2SequenceObject>(this, patternSeqDocName);
153 if (patternSeqObj == nullptr) {
154 stateInfo.setError(QString("error can't cast to sequence from GObject"));
155 return;
156 }
157 patternSeq = patternSeqObj->getWholeSequenceData(stateInfo);
158 CHECK_OP(stateInfo, );
159
160 // set subst matrix
161
162 QString pathToCommonData = getEnv()->getVar("COMMON_DATA_DIR");
163 if (patternSeqObj == nullptr) {
164 stateInfo.setError(QString("error can't get path to common_data dir"));
165 return;
166 }
167 QString fullPathToSubst = pathToCommonData + "/" + pathToSubst;
168
169 QString error;
170 SMatrix mtx = SubstMatrixRegistry::readMatrixFromFile(fullPathToSubst, error);
171 if (mtx.isEmpty()) {
172 stateInfo.setError(QString("value not set %1").arg(FILE_SUBSTITUTION_MATRIX_ATTR));
173 return;
174 }
175
176 s.pSm = mtx;
177 s.sqnc = searchSeq;
178 s.ptrn = patternSeq;
179 s.globalRegion.startPos = 0;
180 s.globalRegion.length = searchSeq.length();
181 s.gapModel.scoreGapOpen = gapOpen;
182 s.gapModel.scoreGapExtd = gapExtension;
183 s.percentOfScore = percentOfScore;
184 s.aminoTT = nullptr;
185 s.complTT = nullptr;
186 s.strand = StrandOption_DirectOnly;
187 s.resultCallback = nullptr;
188 s.resultListener = nullptr;
189 s.resultFilter = 0;
190
191 if (!machinePath.isEmpty()) { /* run smith-waterman on remote machine */
192 // TODO: BUG-001870
193 assert(0);
194 // SmithWatermanLocalTaskSettings localTaskSettings( s );
195 // RemoteMachine * machine = NULL;
196 // if( !SerializeUtils::deserializeRemoteMachineSettingsFromFile( machinePath, &machine ) ) {
197 // setError( QString( "Cannot create remote machine from '%1'" ).arg( machinePath ) );
198 // return;
199 // }
200 // assert( NULL != machine );
201 // swAlgorithmTask = new RemoteTask( SmithWatermanLocalTaskFactory::ID, localTaskSettings, machine );
202 } else { /* run on local machine */
203 s.resultListener = new SmithWatermanResultListener();
204 if (0 != AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)) {
205 swAlgorithmTask = (Task *)AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)->getTaskInstance(s, "tests SmithWaterman");
206 } else {
207 stateInfo.setError(QString("Not known impl of Smith-Waterman: %1").arg(impl));
208 return;
209 }
210 }
211 addSubTask(swAlgorithmTask);
212 }
213
parseExpected_res()214 bool GTest_SmithWatermnan::parseExpected_res() {
215 SWresult swRes;
216 QStringList expectedList = expected_res.split(tr("**"));
217
218 foreach (QString res, expectedList) {
219 QStringList resValues = res.split(tr(","));
220 if (resValues.size() != 2) {
221 stateInfo.setError(QString("wrong count values in expected result: %1").arg(resValues.size()));
222 return false;
223 }
224
225 //////// first enterval
226 QStringList bounds = resValues.at(1).split(tr(".."));
227 if (bounds.size() != 2) {
228 stateInfo.setError(QString("wrong region in expected result %1").arg(resValues.at(1)));
229 return false;
230 }
231 bool startOk, finishOk;
232 int start = bounds.first().toInt(&startOk);
233 int finish = bounds.last().toInt(&finishOk);
234 if (startOk && finishOk != true) {
235 stateInfo.setError(QString("wrong region in expected result %1").arg(resValues.at(1)));
236 return false;
237 }
238 swRes.sInterval.startPos = start;
239 swRes.sInterval.length = finish - start;
240
241 start = resValues.at(0).toInt(&startOk);
242 if (startOk != true) {
243 stateInfo.setError(QString("wrong scorein expected result %1").arg(resValues.at(0)));
244 return false;
245 }
246 swRes.score = start;
247
248 expectedRes.append(swRes);
249 }
250 return true;
251 }
252
toInt(QString & str,int & num)253 bool GTest_SmithWatermnan::toInt(QString &str, int &num) {
254 bool ok = false;
255 if (!str.isEmpty()) {
256 num = str.toInt(&ok);
257 }
258 return ok;
259 }
260
report()261 Task::ReportResult GTest_SmithWatermnan::report() {
262 propagateSubtaskError();
263 if (hasError()) {
264 return ReportResult_Finished;
265 }
266
267 QList<SmithWatermanResult> resultList;
268 if (!machinePath.isEmpty()) { /* remote task used */
269 // TODO: BUG-0001870
270 // RemoteTask * remoteSW = qobject_cast<RemoteTask*>( swAlgorithmTask );
271 // assert( NULL != remoteSW );
272 // SmithWatermanLocalTaskResult * result = dynamic_cast<SmithWatermanLocalTaskResult*>( remoteSW->getResult() );
273 // assert( NULL != result );
274 // resultList = result->getResult();
275 } else { /* task on local machine */
276 resultList = s.resultListener->popResults();
277 }
278 sortByScore(resultList);
279
280 if (expectedRes.size() != resultList.size()) {
281 stateInfo.setError(QString("Not expected result: count result not coincide, expected: %1, current: %2")
282 .arg(expectedRes.size())
283 .arg(resultList.size()));
284 return ReportResult_Finished;
285 }
286
287 for (int i = 0; i < resultList.size(); i++) {
288 if (expectedRes.at(i).score != resultList.at(i).score ||
289 expectedRes.at(i).sInterval != resultList.at(i).refSubseq) {
290 stateInfo.setError(QString("Not expected result"));
291 return ReportResult_Finished;
292 }
293 }
294
295 return ReportResult_Finished;
296 }
297
init(XMLTestFormat *,const QDomElement & el)298 void GTest_SmithWatermnanPerf::init(XMLTestFormat *, const QDomElement &el) {
299 searchSeqDocName = el.attribute(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
300 if (searchSeqDocName.isEmpty()) {
301 failMissingValue(FILE_FASTA_CONTAIN_SEQUENCE_ATTR);
302 return;
303 }
304
305 patternSeqDocName = el.attribute(FILE_FASTA_CONTAIN_PATTERN_ATTR);
306 if (patternSeqDocName.isEmpty()) {
307 failMissingValue(FILE_FASTA_CONTAIN_PATTERN_ATTR);
308 return;
309 }
310
311 impl = el.attribute(IMPL_ATTR);
312 if (patternSeqDocName.isEmpty()) {
313 failMissingValue(IMPL_ATTR);
314 return;
315 }
316
317 pathToSubst = "smith_waterman2/blosum62.txt";
318 gapOpen = -1;
319 gapExtension = -1;
320 percentOfScore = 100;
321 }
322
prepare()323 void GTest_SmithWatermnanPerf::prepare() {
324 // get search sequence
325 U2SequenceObject *searchSeqObj = getContext<U2SequenceObject>(this, searchSeqDocName);
326 if (searchSeqObj == nullptr) {
327 stateInfo.setError(QString("error can't cast to sequence from GObject"));
328 return;
329 }
330 searchSeq = searchSeqObj->getWholeSequenceData(stateInfo);
331 CHECK_OP(stateInfo, );
332
333 // get pattern sequence
334 U2SequenceObject *patternSeqObj = getContext<U2SequenceObject>(this, patternSeqDocName);
335 if (patternSeqObj == nullptr) {
336 stateInfo.setError(QString("error can't cast to sequence from GObject"));
337 return;
338 }
339 patternSeq = patternSeqObj->getWholeSequenceData(stateInfo);
340 CHECK_OP(stateInfo, );
341
342 setTaskName(QString("Test seq size %1").arg(patternSeq.size()));
343
344 // set subst matrix
345
346 QString pathToCommonData = getEnv()->getVar("COMMON_DATA_DIR");
347 if (patternSeqObj == nullptr) {
348 stateInfo.setError(QString("error can't get path to common_data dir"));
349 return;
350 }
351 QString fullPathToSubst = pathToCommonData + "/" + pathToSubst;
352
353 QString error;
354 SMatrix mtx = SubstMatrixRegistry::readMatrixFromFile(fullPathToSubst, error);
355 if (mtx.isEmpty()) {
356 stateInfo.setError(QString("value not set %1").arg(FILE_SUBSTITUTION_MATRIX_ATTR));
357 return;
358 }
359
360 s.pSm = mtx;
361 s.sqnc = searchSeq;
362 s.ptrn = patternSeq;
363 s.globalRegion.startPos = 0;
364 s.globalRegion.length = searchSeq.length();
365 s.gapModel.scoreGapOpen = gapOpen;
366 s.gapModel.scoreGapExtd = gapExtension;
367 s.percentOfScore = percentOfScore;
368 s.aminoTT = nullptr;
369 s.complTT = nullptr;
370 s.strand = StrandOption_DirectOnly;
371 s.resultCallback = nullptr;
372 s.resultListener = nullptr;
373 s.resultFilter = 0;
374 s.resultListener = new SmithWatermanResultListener();
375 if (0 != AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)) {
376 swAlgorithmTask = (Task *)AppContext::getSmithWatermanTaskFactoryRegistry()->getFactory(impl)->getTaskInstance(s, "test SW performance");
377 } else {
378 stateInfo.setError(QString("Not known impl of Smith-Waterman: %1").arg(impl));
379 return;
380 }
381 addSubTask(swAlgorithmTask);
382 }
383
report()384 Task::ReportResult GTest_SmithWatermnanPerf::report() {
385 propagateSubtaskError();
386 if (hasError()) {
387 return ReportResult_Finished;
388 }
389
390 return ReportResult_Finished;
391 }
392
393 } // namespace U2
394