1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "SequenceWalkerTask.h"
23
24 #include <U2Core/DNATranslation.h>
25 #include <U2Core/TextUtils.h>
26
27 namespace U2 {
28
SequenceWalkerConfig()29 SequenceWalkerConfig::SequenceWalkerConfig()
30 : seq(nullptr), seqSize(0), complTrans(nullptr), aminoTrans(nullptr),
31 chunkSize(0), lastChunkExtraLen(0), overlapSize(0), nThreads(MAX_PARALLEL_SUBTASKS_SERIAL),
32 walkCircular(false), walkCircularDistance(0) {
33 strandToWalk = (complTrans != nullptr) ? StrandOption_Both : StrandOption_DirectOnly;
34 }
35
SequenceWalkerTask(const SequenceWalkerConfig & c,SequenceWalkerCallback * cb,const QString & name,TaskFlags tf)36 SequenceWalkerTask::SequenceWalkerTask(const SequenceWalkerConfig &c, SequenceWalkerCallback *cb, const QString &name, TaskFlags tf)
37 : Task(name, tf),
38 config(c),
39 callback(cb),
40 tempBuffer(nullptr) {
41 assert(config.chunkSize > static_cast<uint>(config.overlapSize)); // if chunk == overlap -> infinite loop occurs
42 assert(cb != nullptr);
43 assert(config.strandToWalk == StrandOption_DirectOnly || config.complTrans != nullptr);
44
45 maxParallelSubtasks = config.nThreads;
46 QList<SequenceWalkerSubtask *> subs = prepareSubtasks();
47 foreach (SequenceWalkerSubtask *sub, subs) {
48 addSubTask(sub);
49 }
50 }
51
prepareSubtasks()52 QList<SequenceWalkerSubtask *> SequenceWalkerTask::prepareSubtasks() {
53 QList<SequenceWalkerSubtask *> res;
54
55 if (config.range.isEmpty()) {
56 config.range.startPos = 0;
57 config.range.length = config.seqSize;
58 } else {
59 U2Region wholeSeqReg(0, config.seqSize);
60 assert(wholeSeqReg.contains(config.range));
61 config.range = wholeSeqReg.intersect(config.range);
62 }
63
64 if (config.walkCircular && static_cast<quint64>(config.range.length) == config.seqSize) {
65 tempBuffer.clear();
66 tempBuffer.append(QByteArray(config.seq, config.seqSize));
67 tempBuffer.append(QByteArray(config.seq).left(config.walkCircularDistance * (config.aminoTrans == nullptr ? 1 : 3)));
68
69 config.seq = tempBuffer.constData();
70 config.range.length += config.walkCircularDistance * (config.aminoTrans == nullptr ? 1 : 3);
71 }
72
73 if (config.aminoTrans == nullptr) {
74 // try walk direct and complement strands
75 QVector<U2Region> chunks = splitRange(config.range, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, false);
76
77 if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_DirectOnly) {
78 QList<SequenceWalkerSubtask *> directTasks = createSubs(chunks, false, false);
79 res += directTasks;
80 }
81 if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_ComplementOnly) {
82 assert(config.complTrans != nullptr);
83 QList<SequenceWalkerSubtask *> complTasks = createSubs(chunks, true, false);
84 res += complTasks;
85 }
86 } else {
87 // in case of amino walk (chunk - overlap) should be devisible by 3
88 if ((config.chunkSize - config.overlapSize) % 3 != 0 && config.overlapSize != 0) {
89 config.chunkSize += 3 - (config.chunkSize - config.overlapSize) % 3;
90 }
91
92 // try walk 3 direct and 3 complement translations
93 if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_DirectOnly) {
94 for (int i = 0; i < 3; i++) {
95 U2Region strandRange(config.range.startPos + i, config.range.length - i);
96 QVector<U2Region> chunks = splitRange(strandRange, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, false);
97 QList<SequenceWalkerSubtask *> directTasks = createSubs(chunks, false, true);
98 res += directTasks;
99 }
100 }
101 if (config.strandToWalk == StrandOption_Both || config.strandToWalk == StrandOption_ComplementOnly) {
102 assert(config.complTrans != nullptr);
103 for (int i = 0; i < 3; i++) {
104 U2Region strandRange(config.range.startPos, config.range.length - i);
105 QVector<U2Region> chunks = splitRange(strandRange, config.chunkSize, config.overlapSize, config.lastChunkExtraLen, true);
106 QList<SequenceWalkerSubtask *> complTasks = createSubs(chunks, true, true);
107 res += complTasks;
108 }
109 }
110 }
111 return res;
112 }
113
createSubs(const QVector<U2Region> & chunks,bool doCompl,bool doAmino)114 QList<SequenceWalkerSubtask *> SequenceWalkerTask::createSubs(const QVector<U2Region> &chunks, bool doCompl, bool doAmino) {
115 QList<SequenceWalkerSubtask *> res;
116 for (int i = 0, n = chunks.size(); i < n; i++) {
117 const U2Region &chunk = chunks[i];
118 bool lo = config.overlapSize > 0 && i > 0;
119 bool ro = config.overlapSize > 0 && i + 1 < n;
120 SequenceWalkerSubtask *t = new SequenceWalkerSubtask(this, chunk, lo, ro, config.seq + chunk.startPos, chunk.length, doCompl, doAmino);
121 res.append(t);
122 }
123 return res;
124 }
125
splitRange(const U2Region & range,int chunkSize,int overlapSize,int lastChunkExtraLen,bool reverseMode)126 QVector<U2Region> SequenceWalkerTask::splitRange(const U2Region &range, int chunkSize, int overlapSize, int lastChunkExtraLen, bool reverseMode) {
127 assert(chunkSize > overlapSize);
128 int stepSize = chunkSize - overlapSize;
129
130 QVector<U2Region> res;
131 for (int pos = range.startPos, end = range.endPos(), lastPos = range.startPos; lastPos < end; pos += stepSize) {
132 int chunkLen = qMin(pos + chunkSize, end) - pos;
133 if (end - chunkLen - pos <= lastChunkExtraLen) {
134 chunkLen = end - pos;
135 }
136 lastPos = pos + chunkLen;
137 res.append(U2Region(pos, chunkLen));
138 }
139
140 if (reverseMode) {
141 QVector<U2Region> revertedRegions;
142 foreach (const U2Region &r, res) {
143 U2Region rr(range.startPos + (range.endPos() - r.endPos()), r.length);
144 revertedRegions.prepend(rr);
145 }
146 res = revertedRegions;
147 }
148 return res;
149 }
150
151 //////////////////////////////////////////////////////////////////////////
152 // subtask
SequenceWalkerSubtask(SequenceWalkerTask * _t,const U2Region & glob,bool lo,bool ro,const char * _seq,int _len,bool _doCompl,bool _doAmino)153 SequenceWalkerSubtask::SequenceWalkerSubtask(SequenceWalkerTask *_t, const U2Region &glob, bool lo, bool ro, const char *_seq, int _len, bool _doCompl, bool _doAmino)
154 : Task(tr("Sequence walker subtask"), TaskFlag_None),
155 t(_t), globalRegion(glob), localSeq(_seq), originalLocalSeq(_seq),
156 localLen(_len), originalLocalLen(_len), doCompl(_doCompl), doAmino(_doAmino),
157 leftOverlap(lo), rightOverlap(ro) {
158 tpm = Task::Progress_Manual;
159
160 // get resources
161 QList<TaskResourceUsage> resources = t->getCallback()->getResources(this);
162 foreach (const TaskResourceUsage &resource, resources) {
163 addTaskResource(resource);
164 }
165 }
166
getRegionSequence()167 const char *SequenceWalkerSubtask::getRegionSequence() {
168 if (needLocalRegionProcessing()) {
169 prepareLocalRegion();
170 }
171 return localSeq;
172 }
173
getRegionSequenceLen()174 int SequenceWalkerSubtask::getRegionSequenceLen() {
175 if (needLocalRegionProcessing()) {
176 prepareLocalRegion();
177 }
178 return localLen;
179 }
180
prepareLocalRegion()181 void SequenceWalkerSubtask::prepareLocalRegion() {
182 assert(doAmino || doCompl);
183
184 QByteArray res(localSeq, localLen);
185 if (doCompl) {
186 // do complement;
187 assert(t->getConfig().complTrans != nullptr);
188 const QByteArray &complementMap = t->getConfig().complTrans->getOne2OneMapper();
189 TextUtils::translate(complementMap, res.data(), res.length());
190 TextUtils::reverse(res.data(), res.length());
191 }
192 if (doAmino) {
193 assert(t->getConfig().aminoTrans != nullptr && t->getConfig().aminoTrans->isThree2One());
194 t->getConfig().aminoTrans->translate(res.data(), res.length(), res.data(), res.length());
195 int newLen = res.length() / 3;
196 res.resize(newLen);
197 }
198 processedSeqImage = res;
199 localLen = processedSeqImage.size();
200 localSeq = processedSeqImage.constData();
201 }
202
run()203 void SequenceWalkerSubtask::run() {
204 assert(!t->hasError());
205 t->getCallback()->onRegion(this, stateInfo);
206 }
207
intersectsWithOverlaps(const U2Region & reg) const208 bool SequenceWalkerSubtask::intersectsWithOverlaps(const U2Region ®) const {
209 int overlap = getGlobalConfig().overlapSize;
210 if (overlap == 0) {
211 return false;
212 }
213 bool intersects = false;
214 if (leftOverlap) {
215 intersects = reg.intersects(U2Region(globalRegion.startPos, overlap));
216 }
217 if (!intersects && rightOverlap) {
218 intersects = reg.intersects(U2Region(globalRegion.endPos() - overlap, overlap));
219 }
220 return intersects;
221 }
222
223 } // namespace U2
224