1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "SequenceWalkerTask.h"
23 
24 #include <U2Core/DNATranslation.h>
25 #include <U2Core/TextUtils.h>
26 
27 namespace U2 {
28 
SequenceWalkerConfig()29 SequenceWalkerConfig::SequenceWalkerConfig()
30     : seq(nullptr), seqSize(0), complTrans(nullptr), aminoTrans(nullptr),
31       chunkSize(0), lastChunkExtraLen(0), overlapSize(0), nThreads(MAX_PARALLEL_SUBTASKS_SERIAL),
32       walkCircular(false), walkCircularDistance(0) {
33     strandToWalk = (complTrans != nullptr) ? StrandOption_Both : StrandOption_DirectOnly;
34 }
35 
SequenceWalkerTask(const SequenceWalkerConfig & c,SequenceWalkerCallback * cb,const QString & name,TaskFlags tf)36 SequenceWalkerTask::SequenceWalkerTask(const SequenceWalkerConfig &c, SequenceWalkerCallback *cb, const QString &name, TaskFlags tf)
37     : Task(name, tf),
38       config(c),
39       callback(cb),
40       tempBuffer(nullptr) {
41     assert(config.chunkSize > static_cast<uint>(config.overlapSize));  // if chunk == overlap -> infinite loop occurs
42     assert(cb != nullptr);
43     assert(config.strandToWalk == StrandOption_DirectOnly || config.complTrans != nullptr);
44 
45     maxParallelSubtasks = config.nThreads;
46     QList<SequenceWalkerSubtask *> subs = prepareSubtasks();
47     foreach (SequenceWalkerSubtask *sub, subs) {
48         addSubTask(sub);
49     }
50 }
51 
prepareSubtasks()52 QList<SequenceWalkerSubtask *> SequenceWalkerTask::prepareSubtasks() {
53     QList<SequenceWalkerSubtask *> res;
54 
55     if (config.range.isEmpty()) {
56         config.range.startPos = 0;
57         config.range.length = config.seqSize;
58     } else {
59         U2Region wholeSeqReg(0, config.seqSize);
60         assert(wholeSeqReg.contains(config.range));
61         config.range = wholeSeqReg.intersect(config.range);
62     }
63 
64     if (config.walkCircular && static_cast<quint64>(config.range.length) == config.seqSize) {
65         tempBuffer.clear();
66         tempBuffer.append(QByteArray(config.seq, config.seqSize));
67         tempBuffer.append(QByteArray(config.seq).left(config.walkCircularDistance * (config.aminoTrans == nullptr ? 1 : 3)));
68 
69         config.seq = tempBuffer.constData();
70         config.range.length += config.walkCircularDistance * (config.aminoTrans == nullptr ? 1 : 3);
71     }
72 
73     if (config.aminoTrans == nullptr) {
74         // try walk direct and complement strands
75         QVector<U2Region> chunks = splitRange(config.range, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, false);
76 
77         if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_DirectOnly) {
78             QList<SequenceWalkerSubtask *> directTasks = createSubs(chunks, false, false);
79             res += directTasks;
80         }
81         if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_ComplementOnly) {
82             assert(config.complTrans != nullptr);
83             QList<SequenceWalkerSubtask *> complTasks = createSubs(chunks, true, false);
84             res += complTasks;
85         }
86     } else {
87         // in case of amino walk (chunk - overlap) should be devisible by 3
88         if ((config.chunkSize - config.overlapSize) % 3 != 0 && config.overlapSize != 0) {
89             config.chunkSize += 3 - (config.chunkSize - config.overlapSize) % 3;
90         }
91 
92         // try walk 3 direct and 3 complement translations
93         if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_DirectOnly) {
94             for (int i = 0; i < 3; i++) {
95                 U2Region strandRange(config.range.startPos + i, config.range.length - i);
96                 QVector<U2Region> chunks = splitRange(strandRange, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, false);
97                 QList<SequenceWalkerSubtask *> directTasks = createSubs(chunks, false, true);
98                 res += directTasks;
99             }
100         }
101         if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_ComplementOnly) {
102             assert(config.complTrans != nullptr);
103             for (int i = 0; i < 3; i++) {
104                 U2Region strandRange(config.range.startPos, config.range.length - i);
105                 QVector<U2Region> chunks = splitRange(strandRange, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, true);
106                 QList<SequenceWalkerSubtask *> complTasks = createSubs(chunks, true, true);
107                 res += complTasks;
108             }
109         }
110     }
111     return res;
112 }
113 
createSubs(const QVector<U2Region> & chunks,bool doCompl,bool doAmino)114 QList<SequenceWalkerSubtask *> SequenceWalkerTask::createSubs(const QVector<U2Region> &chunks, bool doCompl, bool doAmino) {
115     QList<SequenceWalkerSubtask *> res;
116     for (int i = 0, n = chunks.size(); i < n; i++) {
117         const U2Region &chunk = chunks[i];
118         bool lo = config.overlapSize > 0 && i > 0;
119         bool ro = config.overlapSize > 0 && i + 1 < n;
120         SequenceWalkerSubtask *t = new SequenceWalkerSubtask(this, chunk, lo, ro, config.seq + chunk.startPos, chunk.length, doCompl, doAmino);
121         res.append(t);
122     }
123     return res;
124 }
125 
splitRange(const U2Region & range,int chunkSize,int overlapSize,int lastChunkExtraLen,bool reverseMode)126 QVector<U2Region> SequenceWalkerTask::splitRange(const U2Region &range, int chunkSize, int overlapSize, int lastChunkExtraLen, bool reverseMode) {
127     assert(chunkSize > overlapSize);
128     int stepSize = chunkSize - overlapSize;
129 
130     QVector<U2Region> res;
131     for (int pos = range.startPos, end = range.endPos(), lastPos = range.startPos; lastPos < end; pos += stepSize) {
132         int chunkLen = qMin(pos + chunkSize, end) - pos;
133         if (end - chunkLen - pos <= lastChunkExtraLen) {
134             chunkLen = end - pos;
135         }
136         lastPos = pos + chunkLen;
137         res.append(U2Region(pos, chunkLen));
138     }
139 
140     if (reverseMode) {
141         QVector<U2Region> revertedRegions;
142         foreach (const U2Region &r, res) {
143             U2Region rr(range.startPos + (range.endPos() - r.endPos()), r.length);
144             revertedRegions.prepend(rr);
145         }
146         res = revertedRegions;
147     }
148     return res;
149 }
150 
151 //////////////////////////////////////////////////////////////////////////
152 // subtask
SequenceWalkerSubtask(SequenceWalkerTask * _t,const U2Region & glob,bool lo,bool ro,const char * _seq,int _len,bool _doCompl,bool _doAmino)153 SequenceWalkerSubtask::SequenceWalkerSubtask(SequenceWalkerTask *_t, const U2Region &glob, bool lo, bool ro, const char *_seq, int _len, bool _doCompl, bool _doAmino)
154     : Task(tr("Sequence walker subtask"), TaskFlag_None),
155       t(_t), globalRegion(glob), localSeq(_seq), originalLocalSeq(_seq),
156       localLen(_len), originalLocalLen(_len), doCompl(_doCompl), doAmino(_doAmino),
157       leftOverlap(lo), rightOverlap(ro) {
158     tpm = Task::Progress_Manual;
159 
160     // get resources
161     QList<TaskResourceUsage> resources = t->getCallback()->getResources(this);
162     foreach (const TaskResourceUsage &resource, resources) {
163         addTaskResource(resource);
164     }
165 }
166 
getRegionSequence()167 const char *SequenceWalkerSubtask::getRegionSequence() {
168     if (needLocalRegionProcessing()) {
169         prepareLocalRegion();
170     }
171     return localSeq;
172 }
173 
getRegionSequenceLen()174 int SequenceWalkerSubtask::getRegionSequenceLen() {
175     if (needLocalRegionProcessing()) {
176         prepareLocalRegion();
177     }
178     return localLen;
179 }
180 
prepareLocalRegion()181 void SequenceWalkerSubtask::prepareLocalRegion() {
182     assert(doAmino || doCompl);
183 
184     QByteArray res(localSeq, localLen);
185     if (doCompl) {
186         // do complement;
187         assert(t->getConfig().complTrans != nullptr);
188         const QByteArray &complementMap = t->getConfig().complTrans->getOne2OneMapper();
189         TextUtils::translate(complementMap, res.data(), res.length());
190         TextUtils::reverse(res.data(), res.length());
191     }
192     if (doAmino) {
193         assert(t->getConfig().aminoTrans != nullptr && t->getConfig().aminoTrans->isThree2One());
194         t->getConfig().aminoTrans->translate(res.data(), res.length(), res.data(), res.length());
195         int newLen = res.length() / 3;
196         res.resize(newLen);
197     }
198     processedSeqImage = res;
199     localLen = processedSeqImage.size();
200     localSeq = processedSeqImage.constData();
201 }
202 
run()203 void SequenceWalkerSubtask::run() {
204     assert(!t->hasError());
205     t->getCallback()->onRegion(this, stateInfo);
206 }
207 
intersectsWithOverlaps(const U2Region & reg) const208 bool SequenceWalkerSubtask::intersectsWithOverlaps(const U2Region &reg) const {
209     int overlap = getGlobalConfig().overlapSize;
210     if (overlap == 0) {
211         return false;
212     }
213     bool intersects = false;
214     if (leftOverlap) {
215         intersects = reg.intersects(U2Region(globalRegion.startPos, overlap));
216     }
217     if (!intersects && rightOverlap) {
218         intersects = reg.intersects(U2Region(globalRegion.endPos() - overlap, overlap));
219     }
220     return intersects;
221 }
222 
223 }  // namespace U2
224