1 /** 2 * UGENE - Integrated Bioinformatics Tools. 3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru> 4 * http://ugene.net 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19 * MA 02110-1301, USA. 20 */ 21 22 #ifndef _U2_SITECON_ALG_H_ 23 #define _U2_SITECON_ALG_H_ 24 25 #include <QVector> 26 27 #include <U2Core/MultipleSequenceAlignment.h> 28 #include <U2Core/Task.h> 29 #include <U2Core/U2Region.h> 30 31 namespace U2 { 32 33 class DiPropertySitecon; 34 35 /** Average and deviation for one property. */ 36 class DiStat { 37 public: DiStat(DiPropertySitecon * p,qreal d,qreal a)38 DiStat(DiPropertySitecon *p, qreal d, qreal a) 39 : prop(p), sdeviation(d), average(a), weighted(false) { 40 } DiStat()41 DiStat() 42 : prop(nullptr), sdeviation(-1), average(-1), weighted(false) { 43 } 44 45 DiPropertySitecon *prop; 46 qreal sdeviation; 47 qreal average; 48 bool weighted; 49 }; 50 typedef QVector<DiStat> PositionStats; 51 52 enum SiteconWeightAlg { 53 SiteconWeightAlg_None, 54 SiteconWeightAlg_Alg2 55 }; 56 57 class SiteconBuildSettings { 58 public: SiteconBuildSettings()59 SiteconBuildSettings() 60 : windowSize(0), randomSeed(0), secondTypeErrorCalibrationLen(100 * 1000), 61 chisquare(0.95f), numSequencesInAlignment(0), weightAlg(SiteconWeightAlg_None) { 62 acgtContent[0] = acgtContent[1] = acgtContent[2] = acgtContent[3] = 25; 63 } 64 65 int windowSize; 66 int randomSeed; 67 int secondTypeErrorCalibrationLen; 68 qreal chisquare; 69 int numSequencesInAlignment; 70 SiteconWeightAlg weightAlg; 71 int acgtContent[4]; 72 QList<DiPropertySitecon *> props; 73 }; 74 75 class SiteconModel { 76 public: SiteconModel()77 SiteconModel() { 78 deviationThresh = -1; 79 } 80 QString aliURL; 81 QString modelName; 82 QString description; 83 SiteconBuildSettings settings; 84 QVector<PositionStats> matrix; 85 QVector<qreal> err1; 86 QVector<qreal> err2; 87 qreal deviationThresh; 88 bool checkState(bool doAssert = true) const; 89 bool operator!=(const SiteconModel &model) const; 90 }; 91 92 class DNATranslation; 93 class SiteconAlgorithm : public QObject { 94 Q_OBJECT 95 public: 96 static QVector<PositionStats> calculateDispersionAndAverage(const MultipleSequenceAlignment &ma, const SiteconBuildSettings &s, TaskStateInfo &ts); 97 98 static qreal calculatePSum(const char *seq, int len, const QVector<PositionStats> &normalizedMatrix, const SiteconBuildSettings &settings, qreal devThreshold, DNATranslation *complMap = nullptr); 99 100 static QVector<qreal> calculateFirstTypeError(const MultipleSequenceAlignment &ma, const SiteconBuildSettings &s, TaskStateInfo &ts); 101 102 static QVector<qreal> calculateSecondTypeError(const QVector<PositionStats> &matrix, const SiteconBuildSettings &s, TaskStateInfo &ts); 103 104 static QVector<PositionStats> normalize(const QVector<PositionStats> &matrix, const SiteconBuildSettings &s); 105 106 static int calculateWeights(const MultipleSequenceAlignment &ma, QVector<PositionStats> &matrix, const SiteconBuildSettings &settings, bool matrixIsNormalized, TaskStateInfo &s); 107 108 static void calculateACGTContent(const MultipleSequenceAlignment &ma, SiteconBuildSettings &bs); 109 110 static QByteArray generateRandomSequence(const int *actgContent, int seqLen, TaskStateInfo &ts); 111 }; 112 113 } // namespace U2 114 115 #endif 116