1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "CharOccurTask.h"
23
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/U2DbiUtils.h>
26 #include <U2Core/U2OpStatusUtils.h>
27 #include <U2Core/U2Region.h>
28 #include <U2Core/U2SafePoints.h>
29 #include <U2Core/U2SequenceDbi.h>
30
31 namespace U2 {
32
CharOccurResult(char _charInSequence,qint64 _numberOfOccurrence,double _percentageOfOccur)33 CharOccurResult::CharOccurResult(char _charInSequence, qint64 _numberOfOccurrence, double _percentageOfOccur)
34 : charInSequence(_charInSequence),
35 numberOfOccurr(_numberOfOccurrence),
36 percentageOfOccur(_percentageOfOccur) {
37 }
38
CharOccurTask(const DNAAlphabet * _alphabet,U2EntityRef _seqRef,const QVector<U2Region> & regions)39 CharOccurTask::CharOccurTask(const DNAAlphabet *_alphabet,
40 U2EntityRef _seqRef,
41 const QVector<U2Region> ®ions)
42 : BackgroundTask<QList<CharOccurResult>>(
43 "Calculating characters occurrence",
44 TaskFlag_None),
45 alphabet(_alphabet),
46 seqRef(_seqRef),
47 regions(regions) {
48 tpm = Progress_Manual;
49 stateInfo.setProgress(0);
50 }
51
run()52 void CharOccurTask::run() {
53 // Create the connection
54 U2OpStatus2Log os;
55 DbiConnection dbiConnection(seqRef.dbiRef, os);
56 CHECK_OP(os, );
57
58 U2SequenceDbi *sequenceDbi = dbiConnection.dbi->getSequenceDbi();
59
60 // Verify the alphabet
61 SAFE_POINT(0 != alphabet, "The alphabet is NULL!", )
62
63 QByteArray alphabetChars = alphabet->getAlphabetChars();
64 SAFE_POINT(!alphabetChars.isEmpty(), "There are no characters in the alphabet!", );
65
66 QVector<quint64> charactersOccurrence(256, 0);
67 qint64 totalLength = U2Region::sumLength(regions);
68 qint64 processedLength = 0;
69 foreach (const U2Region ®ion, regions) {
70 QList<U2Region> blocks = U2Region::split(region, REGION_TO_ANALAYZE);
71 foreach (const U2Region &block, blocks) {
72 // Get the selected region and verify that the data has been correctly read
73 QByteArray sequence = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
74 if (os.hasError() || sequence.isEmpty()) {
75 taskLog.details("Skipping calculation of the characters occurrence.");
76 break;
77 }
78
79 // Calculating the values
80 const char *sequenceData = sequence.constData();
81 for (int i = 0, n = sequence.size(); i < n; i++) {
82 char c = sequenceData[i];
83 charactersOccurrence[c]++;
84 }
85
86 // Update the task progress
87 processedLength += block.length;
88 stateInfo.setProgress(processedLength * 100 / totalLength);
89 CHECK_OP(stateInfo, );
90 }
91 }
92
93 // Calculate the percentage and format the result
94 QList<CharOccurResult> calculatedResults;
95 for (int i = 0; i < charactersOccurrence.length(); i++) {
96 char c = (char)i;
97 qint64 numberOfOccur = charactersOccurrence[i];
98 if (numberOfOccur == 0) {
99 continue;
100 }
101 SAFE_POINT(alphabetChars.contains(c),
102 QString("Unexpected characters has been detected in the sequence: {%1}").arg(c), );
103 double percentageOfOccur = numberOfOccur * 100.0 / totalLength;
104 CharOccurResult calcResult(c, numberOfOccur, percentageOfOccur);
105 calculatedResults.append(calcResult);
106 }
107
108 result = calculatedResults;
109 }
110
111 } // namespace U2
112