1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "AssemblyConsensusAlgorithmSamtools.h"
23 #include <SamtoolsAdapter.h>
24 #include <cstring>
25 #include <stdio.h>
26
27 #include "BuiltInAssemblyConsensusAlgorithms.h"
28 extern "C" {
29 #include <bam2bcf.h>
30 }
31 #include <U2Core/Log.h>
32 #include <U2Core/U2Assembly.h>
33 #include <U2Core/U2SafePoints.h>
34
35 namespace U2 {
36
37 //////////////////////////////////////////////////////////////////////////
38 // Factory
39
AssemblyConsensusAlgorithmFactorySamtools()40 AssemblyConsensusAlgorithmFactorySamtools::AssemblyConsensusAlgorithmFactorySamtools()
41 : AssemblyConsensusAlgorithmFactory(BuiltInAssemblyConsensusAlgorithms::SAMTOOLS_ALGO) {
42 }
43
getName() const44 QString AssemblyConsensusAlgorithmFactorySamtools::getName() const {
45 return tr("SAMtools");
46 }
47
getDescription() const48 QString AssemblyConsensusAlgorithmFactorySamtools::getDescription() const {
49 return tr("Uses SAMtools to calculate consensus with regard to quality of reads");
50 }
51
createAlgorithm()52 AssemblyConsensusAlgorithm *AssemblyConsensusAlgorithmFactorySamtools::createAlgorithm() {
53 return new AssemblyConsensusAlgorithmSamtools(this);
54 }
55
56 //////////////////////////////////////////////////////////////////////////
57 // Algorithm
58
59 struct AlgorithmInternal {
AlgorithmInternalU2::AlgorithmInternal60 AlgorithmInternal(const U2Region ®ion_, QByteArray referenceFragment_, U2OpStatus &os_)
61 : region(region_), os(os_), referenceFragment(referenceFragment_), result(region_.length, AssemblyConsensusAlgorithm::EMPTY_CHAR) {
62 lplbuf = bam_lplbuf_init(processBaseCallback, this);
63 bam_lplbuf_reset(lplbuf);
64
65 bca = bcf_call_init(0.83, 13);
66 }
67
processReadsU2::AlgorithmInternal68 void processReads(U2DbiIterator<U2AssemblyRead> *reads) {
69 ReadsContainer samtoolsReads;
70 os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Fetching reads from database and converting to SAMtools format"));
71 SamtoolsAdapter::reads2samtools(reads, os, samtoolsReads);
72 CHECK_OP(os, );
73 os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Sorting reads"));
74 samtoolsReads.sortByStartPos();
75
76 os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Calculating consensus"));
77 const int readsCount = samtoolsReads.size();
78 for (int i = 0; i < readsCount; ++i) {
79 bam_lplbuf_push(&samtoolsReads[i], lplbuf);
80 os.setProgress(i * 100 / readsCount);
81 CHECK_OP(os, );
82 }
83 bam_lplbuf_push(0, lplbuf);
84 }
85
processBaseU2::AlgorithmInternal86 void processBase(uint32_t /*tid*/, uint32_t pos, int n, const bam_pileup1_t *pl) {
87 if (pos < region.startPos || pos >= region.endPos() || os.isCoR()) {
88 return;
89 }
90 uint32_t posInArray = pos - region.startPos;
91 // From bam_tview.c, tv_pl_func function
92 int i, j, rb;
93 uint32_t call = 0;
94 bcf_callret1_t bcr;
95
96 rb = referenceFragment.isEmpty() ? 'N' : referenceFragment[posInArray];
97
98 int qsum[4], a1, a2, tmp;
99 double p[3], prior = 30;
100 bcf_call_glfgen(n, pl, bam_nt16_table[rb], bca, &bcr);
101
102 for (i = 0; i < 4; ++i)
103 qsum[i] = bcr.qsum[i] << 2 | i;
104 for (i = 1; i < 4; ++i) // insertion sort
105 for (j = i; j > 0 && qsum[j] > qsum[j - 1]; --j)
106 tmp = qsum[j], qsum[j] = qsum[j - 1], qsum[j - 1] = tmp;
107 a1 = qsum[0] & 3;
108 a2 = qsum[1] & 3;
109 p[0] = bcr.p[a1 * 5 + a1];
110 p[1] = bcr.p[a1 * 5 + a2] + prior;
111 p[2] = bcr.p[a2 * 5 + a2];
112 if ("ACGT"[a1] != toupper(rb))
113 p[0] += prior + 3;
114 if ("ACGT"[a2] != toupper(rb))
115 p[2] += prior + 3;
116 if (p[0] < p[1] && p[0] < p[2])
117 call = (1 << a1) << 16 | (int)((p[1] < p[2] ? p[1] : p[2]) - p[0] + .499);
118 else if (p[2] < p[1] && p[2] < p[0])
119 call = (1 << a2) << 16 | (int)((p[0] < p[1] ? p[0] : p[1]) - p[2] + .499);
120 else
121 call = (1 << a1 | 1 << a2) << 16 | (int)((p[0] < p[2] ? p[0] : p[2]) - p[1] + .499);
122
123 char consensusChar = bam_nt16_rev_table[call >> 16 & 0xf];
124 result[posInArray] = consensusChar;
125 }
126
processBaseCallbackU2::AlgorithmInternal127 static int processBaseCallback(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) {
128 AlgorithmInternal *algorithm = (AlgorithmInternal *)data;
129 algorithm->processBase(tid, pos, n, pl);
130 return 0;
131 }
132
getResultU2::AlgorithmInternal133 QByteArray getResult() {
134 return result;
135 }
136
~AlgorithmInternalU2::AlgorithmInternal137 ~AlgorithmInternal() {
138 bcf_call_destroy(bca);
139 bam_lplbuf_destroy(lplbuf);
140 }
141
142 private:
143 const U2Region ®ion;
144 U2OpStatus &os;
145 QByteArray referenceFragment;
146 bam_lplbuf_t *lplbuf;
147 bcf_callaux_t *bca;
148 QByteArray result;
149 };
150
getConsensusRegion(const U2Region & region,U2DbiIterator<U2AssemblyRead> * reads,QByteArray referenceFragment,U2OpStatus & os)151 QByteArray AssemblyConsensusAlgorithmSamtools::getConsensusRegion(const U2Region ®ion, U2DbiIterator<U2AssemblyRead> *reads, QByteArray referenceFragment, U2OpStatus &os) {
152 AlgorithmInternal algorithm(region, referenceFragment, os);
153
154 algorithm.processReads(reads);
155
156 return algorithm.getResult();
157 }
158
159 } // namespace U2
160