1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include <math.h>
23
24 #include <U2Core/DNAAlphabet.h>
25 #include <U2Core/DNASequenceUtils.h>
26 #include <U2Core/U2DbiUtils.h>
27 #include <U2Core/U2OpStatusUtils.h>
28 #include <U2Core/U2SafePoints.h>
29 #include <U2Core/U2SequenceDbi.h>
30
31 #include "DNAStatisticsTask.h"
32
33 namespace U2 {
34
35 #define MAP_SIZE 256
36
DNAStatistics()37 DNAStatistics::DNAStatistics() {
38 clear();
39 }
40
clear()41 void DNAStatistics::clear() {
42 length = 0;
43 gcContent = 0;
44 meltingTemp = 0;
45
46 ssMolecularWeight = 0;
47 ssExtinctionCoefficient = 0;
48 ssOd260AmountOfSubstance = 0;
49 ssOd260Mass = 0;
50
51 dsMolecularWeight = 0;
52 dsExtinctionCoefficient = 0;
53 dsOd260AmountOfSubstance = 0;
54 dsOd260Mass = 0;
55
56 isoelectricPoint = 0;
57 }
58
createDnaMolecularWeightMap()59 static QVector<double> createDnaMolecularWeightMap() {
60 QVector<double> res(MAP_SIZE, 0);
61
62 res['A'] = 251.24;
63 res['C'] = 227.22;
64 res['G'] = 267.24;
65 res['T'] = 242.23;
66
67 // Characters from the extended alphabet are calculated as avarage value
68 res['W'] = (res['A'] + res['T']) / 2;
69 res['S'] = (res['C'] + res['G']) / 2;
70 res['M'] = (res['A'] + res['C']) / 2;
71 res['K'] = (res['G'] + res['T']) / 2;
72 res['R'] = (res['A'] + res['G']) / 2;
73 res['Y'] = (res['C'] + res['T']) / 2;
74
75 res['B'] = (res['C'] + res['G'] + res['T']) / 3;
76 res['D'] = (res['A'] + res['G'] + res['T']) / 3;
77 res['H'] = (res['A'] + res['C'] + res['T']) / 3;
78 res['V'] = (res['A'] + res['C'] + res['G']) / 3;
79
80 res['N'] = (res['A'] + res['C'] + res['G'] + res['T']) / 4;
81
82 return res;
83 }
84
createRnaMolecularWeightMap()85 static QVector<double> createRnaMolecularWeightMap() {
86 QVector<double> res(MAP_SIZE, 0);
87
88 res['A'] = 267.24;
89 res['C'] = 243.22;
90 res['G'] = 283.24;
91 res['U'] = 244.20;
92
93 // Characters from the extended alphabet are calculated as avarage value
94 res['W'] = (res['A'] + res['U']) / 2;
95 res['S'] = (res['C'] + res['G']) / 2;
96 res['M'] = (res['A'] + res['C']) / 2;
97 res['K'] = (res['G'] + res['U']) / 2;
98 res['R'] = (res['A'] + res['G']) / 2;
99 res['Y'] = (res['C'] + res['U']) / 2;
100
101 res['B'] = (res['C'] + res['G'] + res['U']) / 3;
102 res['D'] = (res['A'] + res['G'] + res['U']) / 3;
103 res['H'] = (res['A'] + res['C'] + res['U']) / 3;
104 res['V'] = (res['A'] + res['C'] + res['G']) / 3;
105
106 res['N'] = (res['A'] + res['C'] + res['G'] + res['U']) / 4;
107
108 return res;
109 }
110
createDnaAlphabetResolutionMap()111 static QMap<char, QByteArray> createDnaAlphabetResolutionMap() {
112 QMap<char, QByteArray> res;
113 res['A'] = "A";
114 res['C'] = "C";
115 res['G'] = "G";
116 res['T'] = "T";
117 res['W'] = "AT";
118 res['S'] = "CG";
119 res['M'] = "AC";
120 res['K'] = "GT";
121 res['R'] = "AG";
122 res['Y'] = "CT";
123 res['B'] = "CGT";
124 res['D'] = "AGT";
125 res['H'] = "ACT";
126 res['V'] = "ACG";
127 res['N'] = "ACGT";
128 return res;
129 }
130
createRnaAlphabetResolutionMap()131 static QMap<char, QByteArray> createRnaAlphabetResolutionMap() {
132 QMap<char, QByteArray> res;
133 res['A'] = "A";
134 res['C'] = "C";
135 res['G'] = "G";
136 res['U'] = "U";
137 res['W'] = "AU";
138 res['S'] = "CG";
139 res['M'] = "AC";
140 res['K'] = "GU";
141 res['R'] = "AG";
142 res['Y'] = "CU";
143 res['B'] = "CGU";
144 res['D'] = "AGU";
145 res['H'] = "ACU";
146 res['V'] = "ACG";
147 res['N'] = "ACGU";
148 return res;
149 }
150
fillMapWithAvarageValues(QVector<QVector<int>> & map,const QMap<char,QByteArray> & alphabetResolutionMap)151 static void fillMapWithAvarageValues(QVector<QVector<int>> &map, const QMap<char, QByteArray> &alphabetResolutionMap) {
152 foreach (const char i, alphabetResolutionMap.keys()) {
153 foreach (const char j, alphabetResolutionMap.keys()) {
154 if (0 == map[i][j]) {
155 // Unambiguous nucleotide pairs are already registered
156 // If at least one nucleotide in pair is ambiguous, then the pair value should be an avarage value of all possible variants.
157 int value = 0;
158 for (int k = 0; k < alphabetResolutionMap[i].length(); ++k) {
159 for (int m = 0; m < alphabetResolutionMap[j].length(); ++m) {
160 char char1 = alphabetResolutionMap[i][k];
161 char char2 = alphabetResolutionMap[j][m];
162 value += map[char1][char2];
163 }
164 }
165 const int count = alphabetResolutionMap[i].length() * alphabetResolutionMap[j].length();
166 value /= count;
167 map[i][j] = value;
168 }
169 }
170 }
171 }
172
createDnaMononucleotidesExtinctionCoefficients()173 static MononucleotidesExtinctionCoefficientsMap createDnaMononucleotidesExtinctionCoefficients() {
174 MononucleotidesExtinctionCoefficientsMap res(MAP_SIZE, 0);
175 res['A'] = 15400;
176 res['C'] = 7400;
177 res['G'] = 11500;
178 res['T'] = 8700;
179 return res;
180 }
181
createDnaDinucleotidesExtinctionCoefficients()182 static DinucleotidesExtinctionCoefficientsMap createDnaDinucleotidesExtinctionCoefficients() {
183 DinucleotidesExtinctionCoefficientsMap res(MAP_SIZE, QVector<int>(MAP_SIZE, 0));
184
185 res['A']['A'] = 27400;
186 res['A']['C'] = 21200;
187 res['A']['G'] = 25000;
188 res['A']['T'] = 22800;
189
190 res['C']['A'] = 21200;
191 res['C']['C'] = 14600;
192 res['C']['G'] = 18000;
193 res['C']['T'] = 15200;
194
195 res['G']['A'] = 25200;
196 res['G']['C'] = 17600;
197 res['G']['G'] = 21600;
198 res['G']['T'] = 20000;
199
200 res['T']['A'] = 23400;
201 res['T']['C'] = 16200;
202 res['T']['G'] = 19000;
203 res['T']['T'] = 16800;
204
205 fillMapWithAvarageValues(res, createDnaAlphabetResolutionMap());
206
207 return res;
208 }
209
createRnaMononucleotidesExtinctionCoefficients()210 static MononucleotidesExtinctionCoefficientsMap createRnaMononucleotidesExtinctionCoefficients() {
211 MononucleotidesExtinctionCoefficientsMap res(MAP_SIZE, 0);
212 res['A'] = 15400;
213 res['C'] = 7200;
214 res['G'] = 11500;
215 res['U'] = 9900;
216 return res;
217 }
218
createRnaDinucleotidesExtinctionCoefficients()219 static DinucleotidesExtinctionCoefficientsMap createRnaDinucleotidesExtinctionCoefficients() {
220 DinucleotidesExtinctionCoefficientsMap res(MAP_SIZE, QVector<int>(MAP_SIZE, 0));
221
222 res['A']['A'] = 27400;
223 res['A']['C'] = 21000;
224 res['A']['G'] = 25000;
225 res['A']['U'] = 24000;
226
227 res['C']['A'] = 21000;
228 res['C']['C'] = 14200;
229 res['C']['G'] = 17800;
230 res['C']['U'] = 16200;
231
232 res['G']['A'] = 25200;
233 res['G']['C'] = 17400;
234 res['G']['G'] = 21600;
235 res['G']['U'] = 21200;
236
237 res['U']['A'] = 24600;
238 res['U']['C'] = 17200;
239 res['U']['G'] = 20000;
240 res['U']['U'] = 19600;
241
242 fillMapWithAvarageValues(res, createRnaAlphabetResolutionMap());
243
244 return res;
245 }
246
createProteinMWMap()247 static QVector<double> createProteinMWMap() {
248 QVector<double> mwMap(MAP_SIZE, 0);
249 mwMap['A'] = 89.09; // ALA
250 mwMap['R'] = 174.20; // ARG
251 mwMap['N'] = 132.12; // ASN
252 mwMap['D'] = 133.10; // ASP
253 mwMap['B'] = 132.61; // ASX
254 mwMap['C'] = 121.15; // CYS
255 mwMap['Q'] = 146.15; // GLN
256 mwMap['E'] = 147.13; // GLU
257 mwMap['Z'] = 146.64; // GLX
258 mwMap['G'] = 75.07; // GLY
259 mwMap['H'] = 155.16; // HIS
260 mwMap['I'] = 131.17; // ILE
261 mwMap['L'] = 131.17; // LEU
262 mwMap['K'] = 146.19; // LYS
263 mwMap['M'] = 149.21; // MET
264 mwMap['F'] = 165.19; // PHE
265 mwMap['P'] = 115.13; // PRO
266 mwMap['S'] = 105.09; // SER
267 mwMap['T'] = 119.12; // THR
268 mwMap['W'] = 204.23; // TRP
269 mwMap['Y'] = 181.19; // TYR
270 mwMap['V'] = 117.15; // VAL
271 return mwMap;
272 }
273
createPKAMap()274 static QVector<double> createPKAMap() {
275 QVector<double> res(MAP_SIZE, 0);
276 res['D'] = 4.0;
277 res['C'] = 8.5;
278 res['E'] = 4.4;
279 res['Y'] = 10.0;
280 res['c'] = 3.1; // CTERM
281 res['R'] = 12.0;
282 res['H'] = 6.5;
283 res['K'] = 10.4;
284 res['n'] = 8.0; // NTERM
285 return res;
286 }
287
createChargeMap()288 static QVector<int> createChargeMap() {
289 QVector<int> res(MAP_SIZE, 0);
290 res['D'] = -1;
291 res['C'] = -1;
292 res['E'] = -1;
293 res['Y'] = -1;
294 res['c'] = -1; // CTERM
295 res['R'] = 1;
296 res['H'] = 1;
297 res['K'] = 1;
298 res['n'] = 1; // NTERM
299 return res;
300 }
301
createGcRatioMap()302 static QVector<double> createGcRatioMap() {
303 QVector<double> res(MAP_SIZE, 0);
304 res['B'] = 2.0 / 3;
305 res['C'] = 1;
306 res['D'] = 1.0 / 3;
307 res['G'] = 1;
308 res['H'] = 1.0 / 3;
309 res['K'] = 0.5;
310 res['M'] = 0.5;
311 res['N'] = 0.5;
312 res['R'] = 0.5;
313 res['S'] = 1;
314 res['V'] = 2.0 / 3;
315 res['X'] = 0.5;
316 res['Y'] = 0.5;
317 return res;
318 }
319
320 const QVector<double> DNAStatisticsTask::DNA_MOLECULAR_WEIGHT_MAP = createDnaMolecularWeightMap();
321 const QVector<double> DNAStatisticsTask::RNA_MOLECULAR_WEIGHT_MAP = createRnaMolecularWeightMap();
322
323 const QVector<int> DNAStatisticsTask::DNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS = createDnaMononucleotidesExtinctionCoefficients();
324 const QVector<QVector<int>> DNAStatisticsTask::DNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS = createDnaDinucleotidesExtinctionCoefficients();
325 const QVector<int> DNAStatisticsTask::RNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS = createRnaMononucleotidesExtinctionCoefficients();
326 const QVector<QVector<int>> DNAStatisticsTask::RNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS = createRnaDinucleotidesExtinctionCoefficients();
327
328 const QVector<double> DNAStatisticsTask::PROTEIN_MOLECULAR_WEIGHT_MAP = createProteinMWMap();
329 const QVector<double> DNAStatisticsTask::pKaMap = createPKAMap();
330 const QVector<int> DNAStatisticsTask::PROTEIN_CHARGES_MAP = createChargeMap();
331 const QVector<double> DNAStatisticsTask::GC_RATIO_MAP = createGcRatioMap();
332
DNAStatisticsTask(const DNAAlphabet * alphabet,const U2EntityRef seqRef,const QVector<U2Region> & regions)333 DNAStatisticsTask::DNAStatisticsTask(const DNAAlphabet *alphabet,
334 const U2EntityRef seqRef,
335 const QVector<U2Region> ®ions)
336 : BackgroundTask<DNAStatistics>(tr("Calculate sequence statistics"), TaskFlag_None),
337 alphabet(alphabet),
338 seqRef(seqRef),
339 regions(regions),
340 charactersCount(MAP_SIZE, 0),
341 rcCharactersCount(MAP_SIZE, 0),
342 dinucleotidesCount(MAP_SIZE, QVector<qint64>(MAP_SIZE, 0)),
343 rcDinucleotidesCount(MAP_SIZE, QVector<qint64>(MAP_SIZE, 0)) {
344 SAFE_POINT_EXT(alphabet != nullptr, setError(tr("Alphabet is NULL")), );
345 }
346
run()347 void DNAStatisticsTask::run() {
348 computeStats();
349 }
350
computeStats()351 void DNAStatisticsTask::computeStats() {
352 result.clear();
353
354 U2OpStatus2Log os;
355 DbiConnection dbiConnection(seqRef.dbiRef, os);
356 CHECK_OP(os, );
357
358 U2SequenceDbi *sequenceDbi = dbiConnection.dbi->getSequenceDbi();
359 CHECK(sequenceDbi != nullptr, );
360 SAFE_POINT_EXT(alphabet != nullptr, setError(tr("Alphabet is NULL")), );
361 qint64 totalLength = U2Region::sumLength(regions);
362 qint64 processedLength = 0;
363
364 result.length = totalLength;
365 if (alphabet->isRaw()) {
366 // Other stats can't be computed for raw alphabet
367 return;
368 }
369
370 foreach (const U2Region ®ion, regions) {
371 QList<U2Region> blocks = U2Region::split(region, 1024 * 1024);
372 foreach (const U2Region &block, blocks) {
373 if (isCanceled() || hasError()) {
374 break;
375 }
376 const QByteArray seqBlock = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
377 CHECK_OP(os, );
378 const char *sequenceData = seqBlock.constData();
379
380 int previousChar = U2Msa::GAP_CHAR;
381 for (int i = 0, n = seqBlock.size(); i < n; i++) {
382 const int character = static_cast<int>(sequenceData[i]);
383 charactersCount[character]++;
384
385 if (alphabet->isNucleic()) {
386 if (previousChar != U2Msa::GAP_CHAR && character != U2Msa::GAP_CHAR) {
387 dinucleotidesCount[previousChar][character]++;
388 }
389 }
390 if (U2Msa::GAP_CHAR != character) {
391 previousChar = character;
392 }
393 }
394
395 if (alphabet->isNucleic()) {
396 const QByteArray rcSeqBlock = DNASequenceUtils::reverseComplement(seqBlock, alphabet);
397 const char *rcSequenceData = rcSeqBlock.constData();
398
399 int previousRcChar = U2Msa::GAP_CHAR;
400 for (int i = 0, n = rcSeqBlock.size(); i < n; i++) {
401 const int rcCharacter = static_cast<int>(rcSequenceData[i]);
402 rcCharactersCount[rcCharacter]++;
403 if (previousRcChar != U2Msa::GAP_CHAR && rcCharacter != U2Msa::GAP_CHAR) {
404 rcDinucleotidesCount[rcCharacter][previousRcChar]++; // dinucleotides on the complement strand are calculated in 5'->3' direction
405 }
406 if (U2Msa::GAP_CHAR != rcCharacter) {
407 previousRcChar = rcCharacter;
408 }
409 }
410 }
411
412 processedLength += block.length;
413 stateInfo.setProgress(static_cast<int>(processedLength * 100 / totalLength));
414 CHECK_OP(stateInfo, );
415 }
416 }
417
418 const qint64 ungappedLength = totalLength - charactersCount.value(U2Msa::GAP_CHAR, 0);
419
420 if (alphabet->isNucleic()) {
421 // gcContent = ((nG + nC + nS + 0.5*nM + 0.5*nK + 0.5*nR + 0.5*nY + (2/3)*nB + (1/3)*nD + (1/3)*nH + (2/3)*nV + 0.5*nN) / n ) * 100%
422 for (int i = 0, n = charactersCount.size(); i < n; ++i) {
423 result.gcContent += charactersCount[i] * GC_RATIO_MAP[i];
424 }
425 result.gcContent = 100.0 * result.gcContent / ungappedLength;
426
427 // Calculating molecular weight
428 // Source: http://www.basic.northwestern.edu/biotools/oligocalc.html
429 const QVector<double> *molecularWeightMap = nullptr;
430 if (alphabet->isRNA()) {
431 molecularWeightMap = &RNA_MOLECULAR_WEIGHT_MAP;
432 } else if (alphabet->isDNA()) {
433 molecularWeightMap = &DNA_MOLECULAR_WEIGHT_MAP;
434 }
435 SAFE_POINT_EXT(nullptr != molecularWeightMap, os.setError("An unknown alphabet"), );
436
437 for (int i = 0, n = charactersCount.size(); i < n; ++i) {
438 result.ssMolecularWeight += charactersCount[i] * molecularWeightMap->at(i);
439 result.dsMolecularWeight += charactersCount[i] * molecularWeightMap->at(i) + rcCharactersCount[i] * molecularWeightMap->at(i);
440 }
441
442 static const double PHOSPHATE_WEIGHT = 61.97;
443 result.ssMolecularWeight += (ungappedLength - 1) * PHOSPHATE_WEIGHT;
444 result.dsMolecularWeight += (ungappedLength - 1) * PHOSPHATE_WEIGHT * 2;
445
446 // Calculating extinction coefficient
447 // Source: http://www.owczarzy.net/extinctionDNA.htm
448 const MononucleotidesExtinctionCoefficientsMap *mononucleotideExtinctionCoefficientsMap = nullptr;
449 const DinucleotidesExtinctionCoefficientsMap *dinucleotideExtinctionCoefficientsMap = nullptr;
450 if (alphabet->isRNA()) {
451 mononucleotideExtinctionCoefficientsMap = &RNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS;
452 dinucleotideExtinctionCoefficientsMap = &RNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS;
453 } else if (alphabet->isDNA()) {
454 mononucleotideExtinctionCoefficientsMap = &DNA_MONONUCLEOTIDES_EXTINCTION_COEFFICIENTS;
455 dinucleotideExtinctionCoefficientsMap = &DNA_DINUCLEOTIDES_EXTINCTION_COEFFICIENTS;
456 }
457 SAFE_POINT_EXT(nullptr != mononucleotideExtinctionCoefficientsMap, os.setError("An unknown alphabet"), );
458 SAFE_POINT_EXT(nullptr != dinucleotideExtinctionCoefficientsMap, os.setError("An unknown alphabet"), );
459
460 for (int i = 0, n = dinucleotidesCount.size(); i < n; ++i) {
461 for (int j = 0, m = dinucleotidesCount[i].size(); j < m; ++j) {
462 result.ssExtinctionCoefficient += dinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j);
463 result.dsExtinctionCoefficient += dinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j) +
464 rcDinucleotidesCount[i][j] * dinucleotideExtinctionCoefficientsMap->at(i).at(j);
465 }
466 }
467
468 for (int i = 0; i < charactersCount.count(); ++i) {
469 result.ssExtinctionCoefficient -= charactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i);
470 result.dsExtinctionCoefficient -= charactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i) +
471 rcCharactersCount[i] * mononucleotideExtinctionCoefficientsMap->at(i);
472 }
473
474 // h = 0.287 * SEQ_AT-content + 0.059 * SEQ_GC-content
475 const double hypochromicity = 0.287 * (1 - result.gcContent / 100) + 0.059 * (result.gcContent / 100);
476
477 result.dsExtinctionCoefficient *= (1 - hypochromicity);
478
479 // Calculating melting temperature
480 const qint64 nA = charactersCount['A'];
481 const qint64 nC = charactersCount['C'];
482 const qint64 nG = charactersCount['G'];
483 const qint64 nT = charactersCount['T'];
484 if (totalLength < 15) {
485 result.meltingTemp = (nA + nT) * 2 + (nG + nC) * 4;
486 } else if (nA + nT + nG + nC != 0) {
487 result.meltingTemp = 64.9 + 41 * (nG + nC - 16.4) / static_cast<double>(nA + nT + nG + nC);
488 }
489
490 // Calculating nmole/OD260
491 if (result.ssExtinctionCoefficient != 0) {
492 result.ssOd260AmountOfSubstance = 1000000.0 / result.ssExtinctionCoefficient;
493 }
494
495 if (result.dsExtinctionCoefficient != 0) {
496 result.dsOd260AmountOfSubstance = 1000000.0 / result.dsExtinctionCoefficient;
497 }
498
499 // Calculating μg/OD260
500 result.ssOd260Mass = result.ssOd260AmountOfSubstance * result.ssMolecularWeight * 0.001;
501 result.dsOd260Mass = result.dsOd260AmountOfSubstance * result.dsMolecularWeight * 0.001;
502 } else if (alphabet->isAmino()) {
503 // Calculating molecular weight
504 for (int i = 0, n = charactersCount.size(); i < n; ++i) {
505 result.ssMolecularWeight += charactersCount[i] * PROTEIN_MOLECULAR_WEIGHT_MAP.value(i);
506 }
507 static const double MWH2O = 18.0;
508 result.ssMolecularWeight -= (totalLength - 1) * MWH2O;
509
510 // Calculating isoelectric point
511 result.isoelectricPoint = calcPi(sequenceDbi);
512 }
513 }
514
calcPi(U2SequenceDbi * sequenceDbi)515 double DNAStatisticsTask::calcPi(U2SequenceDbi *sequenceDbi) {
516 U2OpStatus2Log os;
517 QVector<qint64> countMap(256, 0);
518 foreach (const U2Region ®ion, regions) {
519 QList<U2Region> blocks = U2Region::split(region, 1024 * 1024);
520 foreach (const U2Region &block, blocks) {
521 if (isCanceled() || hasError()) {
522 break;
523 }
524 QByteArray seqBlock = sequenceDbi->getSequenceData(seqRef.entityId, block, os);
525 CHECK_OP(os, 0);
526 const char *sequenceData = seqBlock.constData();
527 for (int i = 0, n = seqBlock.size(); i < n; i++) {
528 char c = sequenceData[i];
529 if (pKaMap[c] != 0) {
530 countMap[c]++;
531 }
532 }
533 CHECK_OP(stateInfo, 0);
534 }
535 }
536
537 countMap['c'] = 1;
538 countMap['n'] = 1;
539
540 static const double CUTOFF = 0.001;
541 static const double INITIAL_CUTOFF = 2.0;
542
543 double step = INITIAL_CUTOFF;
544 double pH = 0;
545 while (step > CUTOFF) {
546 if (calcChargeState(countMap, pH) > 0) {
547 pH += step;
548 } else {
549 step *= 0.5;
550 pH -= step;
551 }
552 }
553 return pH;
554 }
555
calcChargeState(const QVector<qint64> & countMap,double pH)556 double DNAStatisticsTask::calcChargeState(const QVector<qint64> &countMap, double pH) {
557 double chargeState = 0.;
558 for (int i = 0; i < countMap.length(); i++) {
559 if (isCanceled() || hasError()) {
560 break;
561 }
562 double pKa = pKaMap[i];
563 double charge = PROTEIN_CHARGES_MAP[i];
564 chargeState += countMap[i] * charge / (1 + pow(10.0, charge * (pH - pKa)));
565 }
566 return chargeState;
567 }
568
569 } // namespace U2
570