1 /** 2 * UGENE - Integrated Bioinformatics Tools. 3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru> 4 * http://ugene.net 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19 * MA 02110-1301, USA. 20 */ 21 22 #ifndef RF_SARRAYTANDEMFINDER_H 23 #define RF_SARRAYTANDEMFINDER_H 24 25 #include <QMap> 26 #include <QMutex> 27 28 #include <U2Algorithm/BitsTable.h> 29 #include <U2Algorithm/SArrayIndex.h> 30 31 #include <U2Core/AnnotationData.h> 32 #include <U2Core/AppResources.h> 33 #include <U2Core/DNASequence.h> 34 #include <U2Core/GObjectReference.h> 35 #include <U2Core/SequenceWalkerTask.h> 36 #include <U2Core/Task.h> 37 38 #include "RFBase.h" 39 #include "RF_SuffixArray.h" 40 41 namespace U2 { 42 43 namespace TSConstants { 44 enum TSAlgo { AlgoSuffix = 0, 45 AlgoSuffixBinary = 1 }; 46 enum TSPreset { PresetAll = 0, 47 PresetMicro = 1, 48 PresetMini = 2, 49 PresetBigPeriod = 3, 50 PresetCustom = 4 }; 51 } // namespace TSConstants 52 53 class FindTandemsTaskSettings { 54 public: 55 static const int DEFAULT_MIN_TANDEM_SIZE; 56 static const int DEFAULT_MIN_REPEAT_COUNT; 57 static const TSConstants::TSAlgo DEFAULT_ALGO = TSConstants::AlgoSuffixBinary; 58 59 public: FindTandemsTaskSettings()60 FindTandemsTaskSettings() 61 : minPeriod(1), maxPeriod(INT_MAX), minTandemSize(DEFAULT_MIN_TANDEM_SIZE), minRepeatCount(DEFAULT_MIN_REPEAT_COUNT), 62 accuracy(0), maxResults(10 * 1000 * 100), reportSeqShift(0), showOverlappedTandems(false), algo(DEFAULT_ALGO), nThreads(MAX_PARALLEL_SUBTASKS_AUTO) { 63 } 64 65 int minPeriod; 66 int maxPeriod; 67 int minTandemSize; 68 int minRepeatCount; 69 int accuracy; 70 int maxResults; 71 qint64 reportSeqShift; 72 U2Region seqRegion; 73 bool showOverlappedTandems; 74 75 TSConstants::TSAlgo algo; 76 int nThreads; 77 }; 78 79 class FindTandemsToAnnotationsTask : public Task { 80 Q_OBJECT 81 public: 82 FindTandemsToAnnotationsTask(const FindTandemsTaskSettings &s, const DNASequence &seq, const QString &annName, const QString &groupName, const QString &annDescription, const GObjectReference &annObjRef); 83 FindTandemsToAnnotationsTask(const FindTandemsTaskSettings &s, const DNASequence &seq); 84 85 QList<Task *> onSubTaskFinished(Task *subTask); 86 QList<SharedAnnotationData> importTandemAnnotations(const QList<Tandem> &tandems, qint64 seqStart, const bool showOverlapped); 87 getResult()88 QList<SharedAnnotationData> getResult() const { 89 return result; 90 } 91 92 private: 93 bool saveAnns; 94 DNASequence mainSeq; 95 QString annName; 96 QString annGroup; 97 const QString annDescription; 98 GObjectReference annObjRef; 99 100 QList<SharedAnnotationData> result; 101 const FindTandemsTaskSettings s; 102 }; 103 104 class TandemFinder : public Task, public SequenceWalkerCallback { 105 Q_OBJECT 106 public: 107 TandemFinder(const FindTandemsTaskSettings &s, const DNASequence &seq); 108 void prepare(); 109 void run(); getResults()110 const QList<Tandem> &getResults() { 111 return foundTandems; 112 } getSettings()113 const FindTandemsTaskSettings &getSettings() const { 114 return settings; 115 } 116 const static quint32 maxCheckPeriod = 1024; // max period is 1k 117 protected: 118 // main sequence 119 char *sequence; 120 FindTandemsTaskSettings settings; 121 QMutex tandemsAccessMutex; 122 QList<Tandem> foundTandems; 123 QList<Task *> onSubTaskFinished(Task *subTask); 124 void onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti); 125 126 private: 127 QMutex subtasksQueue; 128 quint32 regionCount; 129 quint64 startTime; 130 QList<Task *> regionTasks; 131 }; 132 133 class TandemFinder_Region : public Task { 134 Q_OBJECT 135 public: TandemFinder_Region(const int regionId,const char * _sequence,const quint32 _seqSize,const quint64 _regionOffset,const FindTandemsTaskSettings & _settings)136 TandemFinder_Region(const int regionId, const char *_sequence, const quint32 _seqSize, const quint64 _regionOffset, const FindTandemsTaskSettings &_settings) 137 : Task(tr("Find tandems in %1 region").arg(regionId), TaskFlags_NR_FOSCOE), 138 sequence(_sequence), seqSize(_seqSize), id(regionId), regionOffset(_regionOffset), settings(_settings) { 139 } 140 141 ~TandemFinder_Region(); 142 getResult()143 const QList<Tandem> getResult() { 144 QMutexLocker tandemsAccessLocker(&tandemsAccessMutex); 145 return foundTandems; 146 }; getRegionId()147 quint64 getRegionId() const { 148 return id; 149 } getRegionOffset()150 quint64 getRegionOffset() const { 151 return regionOffset; 152 } 153 void prepare(); 154 155 protected: 156 // main sequence 157 const char *sequence; 158 const long seqSize; 159 160 QList<Task *> onSubTaskFinished(Task *subTask); 161 162 private: 163 const int id; 164 const quint64 regionOffset; 165 166 const FindTandemsTaskSettings &settings; 167 QList<Tandem> foundTandems; 168 QMutex tandemsAccessMutex; 169 friend class ExactSizedTandemFinder; 170 friend class LargeSizedTandemFinder; 171 void addResult(const Tandem &tandem); 172 void addResults(const QMap<Tandem, Tandem> &tandems); 173 }; 174 175 class ConcreteTandemFinder : public Task { 176 Q_OBJECT 177 public: 178 ConcreteTandemFinder(QString taskName, const char *_sequence, const long _seqSize, const FindTandemsTaskSettings &_settings, const int _analysisSize); ~ConcreteTandemFinder()179 ~ConcreteTandemFinder() {}; 180 181 void prepare(); 182 void cleanup(); 183 184 protected: 185 // main sequence 186 const char *sequence; 187 const int seqSize; 188 189 SArrayIndex *index; 190 SuffixArray *suffixArray; 191 const FindTandemsTaskSettings &settings; 192 const int prefixLength; 193 194 const quint32 suffArrSize; 195 QList<Task *> onSubTaskFinished(Task *subTask); 196 197 private: 198 const BitsTable bitsTable; 199 200 protected: 201 QMap<Tandem, Tandem> rawTandems; 202 }; 203 204 class ExactSizedTandemFinder : public ConcreteTandemFinder { 205 Q_OBJECT 206 public: 207 ExactSizedTandemFinder(const char *_sequence, const long _seqSize, const FindTandemsTaskSettings &_settings, const int _analysisSize); 208 ~ExactSizedTandemFinder(); 209 210 void run(); 211 212 private: 213 quint32 *checkAndSpreadTandem(const quint32 *tandemStart, const quint32 *tandemLast, quint32 repeatLen); 214 quint32 *checkAndSpreadTandem_bv(const quint32 *tandemStart, const quint32 *tandemLast, quint32 repeatLen); 215 bool comparePrefixChars(const char *, const char *); 216 }; 217 218 class LargeSizedTandemFinder : public ConcreteTandemFinder { 219 Q_OBJECT 220 public: 221 LargeSizedTandemFinder(const char *_sequence, const long _seqSize, const FindTandemsTaskSettings &_settings, const int _analysisSize); 222 ~LargeSizedTandemFinder(); 223 224 void run(); 225 226 private: 227 quint32 *checkAndSpreadTandem(const quint32 *tandemStart, const quint32 *tandemLast, const unsigned repeatLen); 228 quint32 *checkAndSpreadTandem_bv(const quint32 *tandemStart, const quint32 *tandemLast, const unsigned repeatLen); 229 bool comparePrefixChars(const char *, const char *); 230 }; 231 232 } // namespace U2 233 234 #endif // RF_SARRAYTANDEMFINDER_H 235