1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_SEQUENCE_WALKER_TASK_H_
23 #define _U2_SEQUENCE_WALKER_TASK_H_
24 
25 #include <U2Core/Task.h>
26 #include <U2Core/U2Region.h>
27 
28 namespace U2 {
29 
30 class DNATranslation;
31 class SequenceWalkerSubtask;
32 
33 enum StrandOption {
34     StrandOption_DirectOnly,
35     StrandOption_ComplementOnly,
36     StrandOption_Both
37 };
38 class U2CORE_EXPORT SequenceWalkerConfig {
39 public:
40     // TODO: allow select custom strand only!
41 
42     SequenceWalkerConfig();
43 
44     const char *seq;  //  sequence to split
45     quint64 seqSize;  //  size of the sequence to split
46     U2Region range;  //  if not empty -> only this region is processed
47     DNATranslation *complTrans;
48     DNATranslation *aminoTrans;
49 
50     quint64 chunkSize;  // optimal chunk size, used by default for all regions except last one
51     int lastChunkExtraLen;  // extra length allowed to be added to the last chunk
52     int overlapSize;  // overlap for 2 neighbor regions
53     int nThreads;
54     StrandOption strandToWalk;
55 
56     bool walkCircular;
57     quint64 walkCircularDistance;
58 };
59 
60 class U2CORE_EXPORT SequenceWalkerCallback {
61 public:
~SequenceWalkerCallback()62     virtual ~SequenceWalkerCallback() {
63     }
64 
65     virtual void onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) = 0;
66 
67     /* implement this to give SequenceWalkerSubtask required resources
68      * here are resources for ONE(!) SequenceWalkerSubtask execution e.g. for one execution of onRegion function
69      */
getResources(SequenceWalkerSubtask *)70     virtual QList<TaskResourceUsage> getResources(SequenceWalkerSubtask *) {
71         return QList<TaskResourceUsage>();
72     }
73 };
74 
75 class U2CORE_EXPORT SequenceWalkerTask : public Task {
76     Q_OBJECT
77 public:
78     SequenceWalkerTask(const SequenceWalkerConfig &config, SequenceWalkerCallback *callback, const QString &name, TaskFlags tf = TaskFlags_NR_FOSE_COSC);
79 
getCallback()80     SequenceWalkerCallback *getCallback() const {
81         return callback;
82     }
getConfig()83     const SequenceWalkerConfig &getConfig() const {
84         return config;
85     }
86 
87     // reverseMode - start splitting from the end of the range
88     static QVector<U2Region> splitRange(const U2Region &range, int chunkSize, int overlapSize, int lastChunkExtraLen, bool reverseMode);
89 
setError(const QString & err)90     void setError(const QString &err) {
91         stateInfo.setError(err);
92     }
93 
94 private:
95     QList<SequenceWalkerSubtask *> prepareSubtasks();
96     QList<SequenceWalkerSubtask *> createSubs(const QVector<U2Region> &chunks, bool doCompl, bool doAmino);
97 
98     SequenceWalkerConfig config;
99     SequenceWalkerCallback *callback;
100 
101     QByteArray tempBuffer;
102 };
103 
104 class U2CORE_EXPORT SequenceWalkerSubtask : public Task {
105     Q_OBJECT
106 public:
107     SequenceWalkerSubtask(SequenceWalkerTask *t, const U2Region &globalReg, bool lo, bool ro, const char *localSeq, int localLen, bool doCompl, bool doAmino);
108 
109     void run();
110 
111     const char *getRegionSequence();
112 
113     int getRegionSequenceLen();
114 
isDNAComplemented()115     bool isDNAComplemented() const {
116         return doCompl;
117     }
118 
isAminoTranslated()119     bool isAminoTranslated() const {
120         return doAmino;
121     }
122 
getGlobalRegion()123     U2Region getGlobalRegion() const {
124         return globalRegion;
125     }
126 
getGlobalConfig()127     const SequenceWalkerConfig &getGlobalConfig() const {
128         return t->getConfig();
129     }
130 
131     bool intersectsWithOverlaps(const U2Region &globalReg) const;
hasLeftOverlap()132     bool hasLeftOverlap() const {
133         return leftOverlap;
134     }
hasRightOverlap()135     bool hasRightOverlap() const {
136         return rightOverlap;
137     }
138 
139 private:
needLocalRegionProcessing()140     bool needLocalRegionProcessing() const {
141         return (doAmino || doCompl) && processedSeqImage.isEmpty();
142     }
143     void prepareLocalRegion();
144 
145     SequenceWalkerTask *t;
146     U2Region globalRegion;
147     const char *localSeq;
148     const char *originalLocalSeq;
149     int localLen;
150     int originalLocalLen;
151     bool doCompl;
152     bool doAmino;
153     bool leftOverlap;
154     bool rightOverlap;
155 
156     QByteArray processedSeqImage;
157 };
158 
159 }  // namespace U2
160 
161 #endif
162