1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_SITECON_ALG_H_
23 #define _U2_SITECON_ALG_H_
24 
25 #include <QVector>
26 
27 #include <U2Core/MultipleSequenceAlignment.h>
28 #include <U2Core/Task.h>
29 #include <U2Core/U2Region.h>
30 
31 namespace U2 {
32 
33 class DiPropertySitecon;
34 
35 /** Average and deviation for one property. */
36 class DiStat {
37 public:
DiStat(DiPropertySitecon * p,qreal d,qreal a)38     DiStat(DiPropertySitecon *p, qreal d, qreal a)
39         : prop(p), sdeviation(d), average(a), weighted(false) {
40     }
DiStat()41     DiStat()
42         : prop(nullptr), sdeviation(-1), average(-1), weighted(false) {
43     }
44 
45     DiPropertySitecon *prop;
46     qreal sdeviation;
47     qreal average;
48     bool weighted;
49 };
50 typedef QVector<DiStat> PositionStats;
51 
52 enum SiteconWeightAlg {
53     SiteconWeightAlg_None,
54     SiteconWeightAlg_Alg2
55 };
56 
57 class SiteconBuildSettings {
58 public:
SiteconBuildSettings()59     SiteconBuildSettings()
60         : windowSize(0), randomSeed(0), secondTypeErrorCalibrationLen(100 * 1000),
61           chisquare(0.95f), numSequencesInAlignment(0), weightAlg(SiteconWeightAlg_None) {
62         acgtContent[0] = acgtContent[1] = acgtContent[2] = acgtContent[3] = 25;
63     }
64 
65     int windowSize;
66     int randomSeed;
67     int secondTypeErrorCalibrationLen;
68     qreal chisquare;
69     int numSequencesInAlignment;
70     SiteconWeightAlg weightAlg;
71     int acgtContent[4];
72     QList<DiPropertySitecon *> props;
73 };
74 
75 class SiteconModel {
76 public:
SiteconModel()77     SiteconModel() {
78         deviationThresh = -1;
79     }
80     QString aliURL;
81     QString modelName;
82     QString description;
83     SiteconBuildSettings settings;
84     QVector<PositionStats> matrix;
85     QVector<qreal> err1;
86     QVector<qreal> err2;
87     qreal deviationThresh;
88     bool checkState(bool doAssert = true) const;
89     bool operator!=(const SiteconModel &model) const;
90 };
91 
92 class DNATranslation;
93 class SiteconAlgorithm : public QObject {
94     Q_OBJECT
95 public:
96     static QVector<PositionStats> calculateDispersionAndAverage(const MultipleSequenceAlignment &ma, const SiteconBuildSettings &s, TaskStateInfo &ts);
97 
98     static qreal calculatePSum(const char *seq, int len, const QVector<PositionStats> &normalizedMatrix, const SiteconBuildSettings &settings, qreal devThreshold, DNATranslation *complMap = nullptr);
99 
100     static QVector<qreal> calculateFirstTypeError(const MultipleSequenceAlignment &ma, const SiteconBuildSettings &s, TaskStateInfo &ts);
101 
102     static QVector<qreal> calculateSecondTypeError(const QVector<PositionStats> &matrix, const SiteconBuildSettings &s, TaskStateInfo &ts);
103 
104     static QVector<PositionStats> normalize(const QVector<PositionStats> &matrix, const SiteconBuildSettings &s);
105 
106     static int calculateWeights(const MultipleSequenceAlignment &ma, QVector<PositionStats> &matrix, const SiteconBuildSettings &settings, bool matrixIsNormalized, TaskStateInfo &s);
107 
108     static void calculateACGTContent(const MultipleSequenceAlignment &ma, SiteconBuildSettings &bs);
109 
110     static QByteArray generateRandomSequence(const int *actgContent, int seqLen, TaskStateInfo &ts);
111 };
112 
113 }  // namespace U2
114 
115 #endif
116