1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include <math.h>
23 
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/DNASequenceUtils.h>
26 #include <U2Core/U2DbiUtils.h>
27 #include <U2Core/U2OpStatusUtils.h>
28 #include <U2Core/U2SafePoints.h>
29 #include <U2Core/U2SequenceDbi.h>
30 
31 #include "DNAStatisticsTask.h"
32 
33 namespace U2 {
34 
35 #define MAP_SIZE 256
36 
DNAStatistics()37 DNAStatistics::DNAStatistics() {
38     clear();
39 }
40 
clear()41 void DNAStatistics::clear() {
42     length = 0;
43     gcContent = 0;
44     meltingTemp = 0;
45 
46     ssMolecularWeight = 0;
47     ssExtinctionCoefficient = 0;
48     ssOd260AmountOfSubstance = 0;
49     ssOd260Mass = 0;
50 
51     dsMolecularWeight = 0;
52     dsExtinctionCoefficient = 0;
53     dsOd260AmountOfSubstance = 0;
54     dsOd260Mass = 0;
55 
56     isoelectricPoint = 0;
57 }
58 
createDnaMolecularWeightMap()59 static QVector<double> createDnaMolecularWeightMap() {
60     QVector<double> res(MAP_SIZE, 0);
61 
62     res['A'] = 251.24;
63     res['C'] = 227.22;
64     res['G'] = 267.24;
65     res['T'] = 242.23;
66 
67     // Characters from the extended alphabet are calculated as avarage value
68     res['W'] = (res['A'] + res['T']) / 2;
69     res['S'] = (res['C'] + res['G']) / 2;
70     res['M'] = (res['A'] + res['C']) / 2;
71     res['K'] = (res['G'] + res['T']) / 2;
72     res['R'] = (res['A'] + res['G']) / 2;
73     res['Y'] = (res['C'] + res['T']) / 2;
74 
75     res['B'] = (res['C'] + res['G'] + res['T']) / 3;
76     res['D'] = (res['A'] + res['G'] + res['T']) / 3;
77     res['H'] = (res['A'] + res['C'] + res['T']) / 3;
78     res['V'] = (res['A'] + res['C'] + res['G']) / 3;
79 
80     res['N'] = (res['A'] + res['C'] + res['G'] + res['T']) / 4;
81 
82     return res;
83 }
84 
createRnaMolecularWeightMap()85 static QVector<double> createRnaMolecularWeightMap() {
86     QVector<double> res(MAP_SIZE, 0);
87 
88     res['A'] = 267.24;
89     res['C'] = 243.22;
90     res['G'] = 283.24;
91     res['U'] = 244.20;
92 
93     // Characters from the extended alphabet are calculated as avarage value
94     res['W'] = (res['A'] + res['U']) / 2;
95     res['S'] = (res['C'] + res['G']) / 2;
96     res['M'] = (res['A'] + res['C']) / 2;
97     res['K'] = (res['G'] + res['U']) / 2;
98     res['R'] = (res['A'] + res['G']) / 2;
99     res['Y'] = (res['C'] + res['U']) / 2;
100 
101     res['B'] = (res['C'] + res['G'] + res['U']) / 3;
102     res['D'] = (res['A'] + res['G'] + res['U']) / 3;
103     res['H'] = (res['A'] + res['C'] + res['U']) / 3;
104     res['V'] = (res['A'] + res['C'] + res['G']) / 3;
105 
106     res['N'] = (res['A'] + res['C'] + res['G'] + res['U']) / 4;
107 
108     return res;
109 }
110 
createDnaAlphabetResolutionMap()111 static QMap<char, QByteArray> createDnaAlphabetResolutionMap() {
112     QMap<char, QByteArray> res;
113     res['A'] = "A";
114     res['C'] = "C";
115     res['G'] = "G";
116     res['T'] = "T";
117     res['W'] = "AT";
118     res['S'] = "CG";
119     res['M'] = "AC";
120     res['K'] = "GT";
121     res['R'] = "AG";
122     res['Y'] = "CT";
123     res['B'] = "CGT";
124     res['D'] = "AGT";
125     res['H'] = "ACT";
126     res['V'] = "ACG";
127     res['N'] = "ACGT";
128     return res;
129 }
130 
createRnaAlphabetResolutionMap()131 static QMap<char, QByteArray> createRnaAlphabetResolutionMap() {
132     QMap<char, QByteArray> res;
133     res['A'] = "A";
134     res['C'] = "C";
135     res['G'] = "G";
136     res['U'] = "U";
137     res['W'] = "AU";
138     res['S'] = "CG";
139     res['M'] = "AC";
140     res['K'] = "GU";
141     res['R'] = "AG";
142     res['Y'] = "CU";
143     res['B'] = "CGU";
144     res['D'] = "AGU";
145     res['H'] = "ACU";
146     res['V'] = "ACG";
147     res['N'] = "ACGU";
148     return res;
149 }
150 
fillMapWithAvarageValues(QVector<QVector<int>> & map,const QMap<char,QByteArray> & alphabetResolutionMap)151 static void fillMapWithAvarageValues(QVector<QVector<int>> &map, const QMap<char, QByteArray> &alphabetResolutionMap) {
152     foreach (const char i, alphabetResolutionMap.keys()) {
153         foreach (const char j, alphabetResolutionMap.keys()) {
154             if (0 == map[i][j]) {
155                 // Unambiguous nucleotide pairs are already registered
156                 // If at least one nucleotide in pair is ambiguous, then the pair value should be an avarage value of all possible variants.
157                 int value = 0;
158                 for (int k = 0; k < alphabetResolutionMap[i].length(); ++k) {
159                     for (int m = 0; m < alphabetResolutionMap[j].length(); ++m) {
160                         char char1 = alphabetResolutionMap[i][k];
161                         char char2 = alphabetResolutionMap[j][m];
162                         value += map[char1][char2];
163                     }
164                 }
165                 const int count = alphabetResolutionMap[i].length() * alphabetResolutionMap[j].length();
166                 value /= count;
167                 map[i][j] = value;
168             }
169         }
170     }
171 }
172 
createDnaMononucleotidesExtinctionCoefficients()173 static MononucleotidesExtinctionCoefficientsMap createDnaMononucleotidesExtinctionCoefficients() {
174     MononucleotidesExtinctionCoefficientsMap res(MAP_SIZE, 0);
175     res['A'] = 15400;
176     res['C'] = 7400;
177     res['G'] = 11500;
178     res['T'] = 8700;
179     return res;
180 }
181 
createDnaDinucleotidesExtinctionCoefficients()182 static DinucleotidesExtinctionCoefficientsMap createDnaDinucleotidesExtinctionCoefficients() {
183     DinucleotidesExtinctionCoefficientsMap res(MAP_SIZE, QVector<int>(MAP_SIZE, 0));
184 
185     res['A']['A'] = 27400;
186     res['A']['C'] = 21200;
187     res['A']['G'] = 25000;
188     res['A']['T'] = 22800;
189 
190     res['C']['A'] = 21200;
191     res['C']['C'] = 14600;
192     res['C']['G'] = 18000;
193     res['C']['T'] = 15200;
194 
195     res['G']['A'] = 25200;
196     res['G']['C'] = 17600;
197     res['G']['G'] = 21600;
198     res['G']['T'] = 20000;
199 
200     res['T']['A'] = 23400;
201     res['T']['C'] = 16200;
202     res['T']['G'] = 19000;
203     res['T']['T'] = 16800;
204 
205     fillMapWithAvarageValues(res, createDnaAlphabetResolutionMap());
206 
207     return res;
208 }
209 
createRnaMononucleotidesExtinctionCoefficients()210 static MononucleotidesExtinctionCoefficientsMap createRnaMononucleotidesExtinctionCoefficients() {
211     MononucleotidesExtinctionCoefficientsMap res(MAP_SIZE, 0);
212     res['A'] = 15400;
213     res['C'] = 7200;
214     res['G'] = 11500;
215     res['U'] = 9900;
216     return res;
217 }
218 
createRnaDinucleotidesExtinctionCoefficients()219 static DinucleotidesExtinctionCoefficientsMap createRnaDinucleotidesExtinctionCoefficients() {
220     DinucleotidesExtinctionCoefficientsMap res(MAP_SIZE, QVector<int>(MAP_SIZE, 0));
221 
222     res['A']['A'] = 27400;
223     res['A']['C'] = 21000;
224     res['A']['G'] = 25000;
225     res['A']['U'] = 24000;
226 
227     res['C']['A'] = 21000;
228     res['C']['C'] = 14200;
229     res['C']['G'] = 17800;
230     res['C']['U'] = 16200;
231 
232     res['G']['A'] = 25200;
233     res['G']['C'] = 17400;
234     res['G']['G'] = 21600;
235     res['G']['U'] = 21200;
236 
237     res['U']['A'] = 24600;
238     res['U']['C'] = 17200;
239     res['U']['G'] = 20000;
240     res['U']['U'] = 19600;
241 
242     fillMapWithAvarageValues(res, createRnaAlphabetResolutionMap());
243 
244     return res;
245 }
246 
createProteinMWMap()247 static QVector<double> createProteinMWMap() {
248     QVector<double> mwMap(MAP_SIZE, 0);
249     mwMap['A'] = 89.09;  // ALA
250     mwMap['R'] = 174.20;  // ARG
251     mwMap['N'] = 132.12;  // ASN
252     mwMap['D'] = 133.10;  // ASP
253     mwMap['B'] = 132.61;  // ASX
254     mwMap['C'] = 121.15;  // CYS
255     mwMap['Q'] = 146.15;  // GLN
256     mwMap['E'] = 147.13;  // GLU
257     mwMap['Z'] = 146.64;  // GLX
258     mwMap['G'] = 75.07;  // GLY
259     mwMap['H'] = 155.16;  // HIS
260     mwMap['I'] = 131.17;  // ILE
261     mwMap['L'] = 131.17;  // LEU
262     mwMap['K'] = 146.19;  // LYS
263     mwMap['M'] = 149.21;  // MET
264     mwMap['F'] = 165.19;  // PHE
265     mwMap['P'] = 115.13;  // PRO
266     mwMap['S'] = 105.09;  // SER
267     mwMap['T'] = 119.12;  // THR
268     mwMap['W'] = 204.23;  // TRP
269     mwMap['Y'] = 181.19;  // TYR
270     mwMap['V'] = 117.15;  // VAL
271     return mwMap;
272 }
273 
createPKAMap()274 static QVector<double> createPKAMap() {
275     QVector<double> res(MAP_SIZE, 0);
276     res['D'] = 4.0;
277     res['C'] = 8.5;
278     res['E'] = 4.4;
279     res['Y'] = 10.0;
280     res['c'] = 3.1;  // CTERM
281     res['R'] = 12.0;
282     res['H'] = 6.5;
283     res['K'] = 10.4;
284     res['n'] = 8.0;  // NTERM
285     return res;
286 }
287 
createChargeMap()288 static QVector<int> createChargeMap() {
289     QVector<int> res(MAP_SIZE, 0);
290     res['D'] = -1;
291     res['C'] = -1;
292     res['E'] = -1;
293     res['Y'] = -1;
294     res['c'] = -1;  // CTERM
295     res['R'] = 1;
296     res['H'] = 1;
297     res['K'] = 1;
298     res['n'] = 1;  // NTERM
299     return res;
300 }
301 
createGcRatioMap()302 static QVector<double> createGcRatioMap() {
303     QVector<double> res(MAP_SIZE, 0);
304     res['B'] = 2.0 / 3;
305     res['C'] = 1;
306     res['D'] = 1.0 / 3;
307     res['G'] = 1;
308     res['H'] = 1.0 / 3;
309     res['K'] = 0.5;
310     res['M'] = 0.5;
311     res['N'] = 0.5;
312     res['R'] = 0.5;
313     res['S'] = 1;
314     res['V'] = 2.0 / 3;
315     res['X'] = 0.5;
316     res['Y'] = 0.5;
317     return res;
318 }
319 
320 const QVector<double> DNAStatisticsTask::DNA_MOLECULAR_WEIGHT_MAP = createDnaMolecularWeightMap();
321 const QVector<double> DNAStatisticsTask::RNA_MOLECULAR_WEIGHT_MAP = createRnaMolecularWeightMap();
322 
323 const QVector<int> DNAStatisticsTask::DNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS = createDnaMononucleotidesExtinctionCoefficients();
324 const QVector<QVector<int>> DNAStatisticsTask::DNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS = createDnaDinucleotidesExtinctionCoefficients();
325 const QVector<int> DNAStatisticsTask::RNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS = createRnaMononucleotidesExtinctionCoefficients();
326 const QVector<QVector<int>> DNAStatisticsTask::RNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS = createRnaDinucleotidesExtinctionCoefficients();
327 
328 const QVector<double> DNAStatisticsTask::PROTEIN_MOLECULAR_WEIGHT_MAP = createProteinMWMap();
329 const QVector<double> DNAStatisticsTask::pKaMap = createPKAMap();
330 const QVector<int> DNAStatisticsTask::PROTEIN_CHARGES_MAP = createChargeMap();
331 const QVector<double> DNAStatisticsTask::GC_RATIO_MAP = createGcRatioMap();
332 
DNAStatisticsTask(const DNAAlphabet * alphabet,const U2EntityRef seqRef,const QVector<U2Region> & regions)333 DNAStatisticsTask::DNAStatisticsTask(const DNAAlphabet *alphabet,
334                                      const U2EntityRef seqRef,
335                                      const QVector<U2Region> &regions)
336     : BackgroundTask<DNAStatistics>(tr("Calculate sequence statistics"), TaskFlag_None),
337       alphabet(alphabet),
338       seqRef(seqRef),
339       regions(regions),
340       charactersCount(MAP_SIZE, 0),
341       rcCharactersCount(MAP_SIZE, 0),
342       dinucleotidesCount(MAP_SIZE, QVector<qint64>(MAP_SIZE, 0)),
343       rcDinucleotidesCount(MAP_SIZE, QVector<qint64>(MAP_SIZE, 0)) {
344     SAFE_POINT_EXT(alphabet != nullptr, setError(tr("Alphabet is NULL")), );
345 }
346 
run()347 void DNAStatisticsTask::run() {
348     computeStats();
349 }
350 
computeStats()351 void DNAStatisticsTask::computeStats() {
352     result.clear();
353 
354     U2OpStatus2Log os;
355     DbiConnection dbiConnection(seqRef.dbiRef, os);
356     CHECK_OP(os, );
357 
358     U2SequenceDbi *sequenceDbi = dbiConnection.dbi->getSequenceDbi();
359     CHECK(sequenceDbi != nullptr, );
360     SAFE_POINT_EXT(alphabet != nullptr, setError(tr("Alphabet is NULL")), );
361     qint64 totalLength = U2Region::sumLength(regions);
362     qint64 processedLength = 0;
363 
364     result.length = totalLength;
365     if (alphabet->isRaw()) {
366         // Other stats can't be computed for raw alphabet
367         return;
368     }
369 
370     foreach (const U2Region &region, regions) {
371         QList<U2Region> blocks = U2Region::split(region, 1024 * 1024);
372         foreach (const U2Region &block, blocks) {
373             if (isCanceled() || hasError()) {
374                 break;
375             }
376             const QByteArray seqBlock = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
377             CHECK_OP(os, );
378             const char *sequenceData = seqBlock.constData();
379 
380             int previousChar = U2Msa::GAP_CHAR;
381             for (int i = 0, n = seqBlock.size(); i < n; i++) {
382                 const int character = static_cast<int>(sequenceData[i]);
383                 charactersCount[character]++;
384 
385                 if (alphabet->isNucleic()) {
386                     if (previousChar != U2Msa::GAP_CHAR && character != U2Msa::GAP_CHAR) {
387                         dinucleotidesCount[previousChar][character]++;
388                     }
389                 }
390                 if (U2Msa::GAP_CHAR != character) {
391                     previousChar = character;
392                 }
393             }
394 
395             if (alphabet->isNucleic()) {
396                 const QByteArray rcSeqBlock = DNASequenceUtils::reverseComplement(seqBlock, alphabet);
397                 const char *rcSequenceData = rcSeqBlock.constData();
398 
399                 int previousRcChar = U2Msa::GAP_CHAR;
400                 for (int i = 0, n = rcSeqBlock.size(); i < n; i++) {
401                     const int rcCharacter = static_cast<int>(rcSequenceData[i]);
402                     rcCharactersCount[rcCharacter]++;
403                     if (previousRcChar != U2Msa::GAP_CHAR && rcCharacter != U2Msa::GAP_CHAR) {
404                         rcDinucleotidesCount[rcCharacter][previousRcChar]++;  // dinucleotides on the complement strand are calculated in 5'->3' direction
405                     }
406                     if (U2Msa::GAP_CHAR != rcCharacter) {
407                         previousRcChar = rcCharacter;
408                     }
409                 }
410             }
411 
412             processedLength += block.length;
413             stateInfo.setProgress(static_cast<int>(processedLength * 100 / totalLength));
414             CHECK_OP(stateInfo, );
415         }
416     }
417 
418     const qint64 ungappedLength = totalLength - charactersCount.value(U2Msa::GAP_CHAR, 0);
419 
420     if (alphabet->isNucleic()) {
421         //  gcContent = ((nG + nC + nS + 0.5*nM + 0.5*nK + 0.5*nR + 0.5*nY + (2/3)*nB + (1/3)*nD + (1/3)*nH + (2/3)*nV + 0.5*nN) / n ) * 100%
422         for (int i = 0, n = charactersCount.size(); i < n; ++i) {
423             result.gcContent += charactersCount[i] * GC_RATIO_MAP[i];
424         }
425         result.gcContent = 100.0 * result.gcContent / ungappedLength;
426 
427         // Calculating molecular weight
428         // Source: http://www.basic.northwestern.edu/biotools/oligocalc.html
429         const QVector<double> *molecularWeightMap = nullptr;
430         if (alphabet->isRNA()) {
431             molecularWeightMap = &RNA_MOLECULAR_WEIGHT_MAP;
432         } else if (alphabet->isDNA()) {
433             molecularWeightMap = &DNA_MOLECULAR_WEIGHT_MAP;
434         }
435         SAFE_POINT_EXT(nullptr != molecularWeightMap, os.setError("An unknown alphabet"), );
436 
437         for (int i = 0, n = charactersCount.size(); i < n; ++i) {
438             result.ssMolecularWeight += charactersCount[i] * molecularWeightMap->at(i);
439             result.dsMolecularWeight += charactersCount[i] * molecularWeightMap->at(i) + rcCharactersCount[i] * molecularWeightMap->at(i);
440         }
441 
442         static const double PHOSPHATE_WEIGHT = 61.97;
443         result.ssMolecularWeight += (ungappedLength - 1) * PHOSPHATE_WEIGHT;
444         result.dsMolecularWeight += (ungappedLength - 1) * PHOSPHATE_WEIGHT * 2;
445 
446         // Calculating extinction coefficient
447         // Source: http://www.owczarzy.net/extinctionDNA.htm
448         const MononucleotidesExtinctionCoefficientsMap *mononucleotideExtinctionCoefficientsMap = nullptr;
449         const DinucleotidesExtinctionCoefficientsMap *dinucleotideExtinctionCoefficientsMap = nullptr;
450         if (alphabet->isRNA()) {
451             mononucleotideExtinctionCoefficientsMap = &RNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS;
452             dinucleotideExtinctionCoefficientsMap = &RNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS;
453         } else if (alphabet->isDNA()) {
454             mononucleotideExtinctionCoefficientsMap = &DNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS;
455             dinucleotideExtinctionCoefficientsMap = &DNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS;
456         }
457         SAFE_POINT_EXT(nullptr != mononucleotideExtinctionCoefficientsMap, os.setError("An unknown alphabet"), );
458         SAFE_POINT_EXT(nullptr != dinucleotideExtinctionCoefficientsMap, os.setError("An unknown alphabet"), );
459 
460         for (int i = 0, n = dinucleotidesCount.size(); i < n; ++i) {
461             for (int j = 0, m = dinucleotidesCount[i].size(); j < m; ++j) {
462                 result.ssExtinctionCoefficient += dinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j);
463                 result.dsExtinctionCoefficient += dinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j) +
464                                                   rcDinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j);
465             }
466         }
467 
468         for (int i = 0; i < charactersCount.count(); ++i) {
469             result.ssExtinctionCoefficient -= charactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i);
470             result.dsExtinctionCoefficient -= charactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i) +
471                                               rcCharactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i);
472         }
473 
474         // h = 0.287 * SEQ_AT-content + 0.059 * SEQ_GC-content
475         const double hypochromicity = 0.287 * (1 - result.gcContent / 100) + 0.059 * (result.gcContent / 100);
476 
477         result.dsExtinctionCoefficient *= (1 - hypochromicity);
478 
479         // Calculating melting temperature
480         const qint64 nA = charactersCount['A'];
481         const qint64 nC = charactersCount['C'];
482         const qint64 nG = charactersCount['G'];
483         const qint64 nT = charactersCount['T'];
484         if (totalLength < 15) {
485             result.meltingTemp = (nA + nT) * 2 + (nG + nC) * 4;
486         } else if (nA + nT + nG + nC != 0) {
487             result.meltingTemp = 64.9 + 41 * (nG + nC - 16.4) / static_cast<double>(nA + nT + nG + nC);
488         }
489 
490         // Calculating nmole/OD260
491         if (result.ssExtinctionCoefficient != 0) {
492             result.ssOd260AmountOfSubstance = 1000000.0 / result.ssExtinctionCoefficient;
493         }
494 
495         if (result.dsExtinctionCoefficient != 0) {
496             result.dsOd260AmountOfSubstance = 1000000.0 / result.dsExtinctionCoefficient;
497         }
498 
499         // Calculating μg/OD260
500         result.ssOd260Mass = result.ssOd260AmountOfSubstance * result.ssMolecularWeight * 0.001;
501         result.dsOd260Mass = result.dsOd260AmountOfSubstance * result.dsMolecularWeight * 0.001;
502     } else if (alphabet->isAmino()) {
503         // Calculating molecular weight
504         for (int i = 0, n = charactersCount.size(); i < n; ++i) {
505             result.ssMolecularWeight += charactersCount[i] * PROTEIN_MOLECULAR_WEIGHT_MAP.value(i);
506         }
507         static const double MWH2O = 18.0;
508         result.ssMolecularWeight -= (totalLength - 1) * MWH2O;
509 
510         // Calculating isoelectric point
511         result.isoelectricPoint = calcPi(sequenceDbi);
512     }
513 }
514 
calcPi(U2SequenceDbi * sequenceDbi)515 double DNAStatisticsTask::calcPi(U2SequenceDbi *sequenceDbi) {
516     U2OpStatus2Log os;
517     QVector<qint64> countMap(256, 0);
518     foreach (const U2Region &region, regions) {
519         QList<U2Region> blocks = U2Region::split(region, 1024 * 1024);
520         foreach (const U2Region &block, blocks) {
521             if (isCanceled() || hasError()) {
522                 break;
523             }
524             QByteArray seqBlock = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
525             CHECK_OP(os, 0);
526             const char *sequenceData = seqBlock.constData();
527             for (int i = 0, n = seqBlock.size(); i < n; i++) {
528                 char c = sequenceData[i];
529                 if (pKaMap[c] != 0) {
530                     countMap[c]++;
531                 }
532             }
533             CHECK_OP(stateInfo, 0);
534         }
535     }
536 
537     countMap['c'] = 1;
538     countMap['n'] = 1;
539 
540     static const double CUTOFF = 0.001;
541     static const double INITIAL_CUTOFF = 2.0;
542 
543     double step = INITIAL_CUTOFF;
544     double pH = 0;
545     while (step > CUTOFF) {
546         if (calcChargeState(countMap, pH) > 0) {
547             pH += step;
548         } else {
549             step *= 0.5;
550             pH -= step;
551         }
552     }
553     return pH;
554 }
555 
calcChargeState(const QVector<qint64> & countMap,double pH)556 double DNAStatisticsTask::calcChargeState(const QVector<qint64> &countMap, double pH) {
557     double chargeState = 0.;
558     for (int i = 0; i < countMap.length(); i++) {
559         if (isCanceled() || hasError()) {
560             break;
561         }
562         double pKa = pKaMap[i];
563         double charge = PROTEIN_CHARGES_MAP[i];
564         chargeState += countMap[i] * charge / (1 + pow(10.0, charge * (pH - pKa)));
565     }
566     return chargeState;
567 }
568 
569 }  // namespace U2
570