1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "CharOccurTask.h"
23 
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/U2DbiUtils.h>
26 #include <U2Core/U2OpStatusUtils.h>
27 #include <U2Core/U2Region.h>
28 #include <U2Core/U2SafePoints.h>
29 #include <U2Core/U2SequenceDbi.h>
30 
31 namespace U2 {
32 
CharOccurResult(char _charInSequence,qint64 _numberOfOccurrence,double _percentageOfOccur)33 CharOccurResult::CharOccurResult(char _charInSequence, qint64 _numberOfOccurrence, double _percentageOfOccur)
34     : charInSequence(_charInSequence),
35       numberOfOccurr(_numberOfOccurrence),
36       percentageOfOccur(_percentageOfOccur) {
37 }
38 
CharOccurTask(const DNAAlphabet * _alphabet,U2EntityRef _seqRef,const QVector<U2Region> & regions)39 CharOccurTask::CharOccurTask(const DNAAlphabet *_alphabet,
40                              U2EntityRef _seqRef,
41                              const QVector<U2Region> &regions)
42     : BackgroundTask<QList<CharOccurResult>>(
43           "Calculating characters occurrence",
44           TaskFlag_None),
45       alphabet(_alphabet),
46       seqRef(_seqRef),
47       regions(regions) {
48     tpm = Progress_Manual;
49     stateInfo.setProgress(0);
50 }
51 
run()52 void CharOccurTask::run() {
53     // Create the connection
54     U2OpStatus2Log os;
55     DbiConnection dbiConnection(seqRef.dbiRef, os);
56     CHECK_OP(os, );
57 
58     U2SequenceDbi *sequenceDbi = dbiConnection.dbi->getSequenceDbi();
59 
60     // Verify the alphabet
61     SAFE_POINT(0 != alphabet, "The alphabet is NULL!", )
62 
63     QByteArray alphabetChars = alphabet->getAlphabetChars();
64     SAFE_POINT(!alphabetChars.isEmpty(), "There are no characters in the alphabet!", );
65 
66     QVector<quint64> charactersOccurrence(256, 0);
67     qint64 totalLength = U2Region::sumLength(regions);
68     qint64 processedLength = 0;
69     foreach (const U2Region &region, regions) {
70         QList<U2Region> blocks = U2Region::split(region, REGION_TO_ANALAYZE);
71         foreach (const U2Region &block, blocks) {
72             // Get the selected region and verify that the data has been correctly read
73             QByteArray sequence = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
74             if (os.hasError() || sequence.isEmpty()) {
75                 taskLog.details("Skipping calculation of the characters occurrence.");
76                 break;
77             }
78 
79             // Calculating the values
80             const char *sequenceData = sequence.constData();
81             for (int i = 0, n = sequence.size(); i < n; i++) {
82                 char c = sequenceData[i];
83                 charactersOccurrence[c]++;
84             }
85 
86             // Update the task progress
87             processedLength += block.length;
88             stateInfo.setProgress(processedLength * 100 / totalLength);
89             CHECK_OP(stateInfo, );
90         }
91     }
92 
93     // Calculate the percentage and format the result
94     QList<CharOccurResult> calculatedResults;
95     for (int i = 0; i < charactersOccurrence.length(); i++) {
96         char c = (char)i;
97         qint64 numberOfOccur = charactersOccurrence[i];
98         if (numberOfOccur == 0) {
99             continue;
100         }
101         SAFE_POINT(alphabetChars.contains(c),
102                    QString("Unexpected characters has been detected in the sequence: {%1}").arg(c), );
103         double percentageOfOccur = numberOfOccur * 100.0 / totalLength;
104         CharOccurResult calcResult(c, numberOfOccur, percentageOfOccur);
105         calculatedResults.append(calcResult);
106     }
107 
108     result = calculatedResults;
109 }
110 
111 }  // namespace U2
112