1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "AssemblyConsensusAlgorithmSamtools.h"
23 #include <SamtoolsAdapter.h>
24 #include <cstring>
25 #include <stdio.h>
26 
27 #include "BuiltInAssemblyConsensusAlgorithms.h"
28 extern "C" {
29 #include <bam2bcf.h>
30 }
31 #include <U2Core/Log.h>
32 #include <U2Core/U2Assembly.h>
33 #include <U2Core/U2SafePoints.h>
34 
35 namespace U2 {
36 
37 //////////////////////////////////////////////////////////////////////////
38 // Factory
39 
AssemblyConsensusAlgorithmFactorySamtools()40 AssemblyConsensusAlgorithmFactorySamtools::AssemblyConsensusAlgorithmFactorySamtools()
41     : AssemblyConsensusAlgorithmFactory(BuiltInAssemblyConsensusAlgorithms::SAMTOOLS_ALGO) {
42 }
43 
getName() const44 QString AssemblyConsensusAlgorithmFactorySamtools::getName() const {
45     return tr("SAMtools");
46 }
47 
getDescription() const48 QString AssemblyConsensusAlgorithmFactorySamtools::getDescription() const {
49     return tr("Uses SAMtools to calculate consensus with regard to quality of reads");
50 }
51 
createAlgorithm()52 AssemblyConsensusAlgorithm *AssemblyConsensusAlgorithmFactorySamtools::createAlgorithm() {
53     return new AssemblyConsensusAlgorithmSamtools(this);
54 }
55 
56 //////////////////////////////////////////////////////////////////////////
57 // Algorithm
58 
59 struct AlgorithmInternal {
AlgorithmInternalU2::AlgorithmInternal60     AlgorithmInternal(const U2Region &region_, QByteArray referenceFragment_, U2OpStatus &os_)
61         : region(region_), os(os_), referenceFragment(referenceFragment_), result(region_.length, AssemblyConsensusAlgorithm::EMPTY_CHAR) {
62         lplbuf = bam_lplbuf_init(processBaseCallback, this);
63         bam_lplbuf_reset(lplbuf);
64 
65         bca = bcf_call_init(0.83, 13);
66     }
67 
processReadsU2::AlgorithmInternal68     void processReads(U2DbiIterator<U2AssemblyRead> *reads) {
69         ReadsContainer samtoolsReads;
70         os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Fetching reads from database and converting to SAMtools format"));
71         SamtoolsAdapter::reads2samtools(reads, os, samtoolsReads);
72         CHECK_OP(os, );
73         os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Sorting reads"));
74         samtoolsReads.sortByStartPos();
75 
76         os.setDescription(AssemblyConsensusAlgorithmFactorySamtools::tr("Calculating consensus"));
77         const int readsCount = samtoolsReads.size();
78         for (int i = 0; i < readsCount; ++i) {
79             bam_lplbuf_push(&samtoolsReads[i], lplbuf);
80             os.setProgress(i * 100 / readsCount);
81             CHECK_OP(os, );
82         }
83         bam_lplbuf_push(0, lplbuf);
84     }
85 
processBaseU2::AlgorithmInternal86     void processBase(uint32_t /*tid*/, uint32_t pos, int n, const bam_pileup1_t *pl) {
87         if (pos < region.startPos || pos >= region.endPos() || os.isCoR()) {
88             return;
89         }
90         uint32_t posInArray = pos - region.startPos;
91         // From bam_tview.c, tv_pl_func function
92         int i, j, rb;
93         uint32_t call = 0;
94         bcf_callret1_t bcr;
95 
96         rb = referenceFragment.isEmpty() ? 'N' : referenceFragment[posInArray];
97 
98         int qsum[4], a1, a2, tmp;
99         double p[3], prior = 30;
100         bcf_call_glfgen(n, pl, bam_nt16_table[rb], bca, &bcr);
101 
102         for (i = 0; i < 4; ++i)
103             qsum[i] = bcr.qsum[i] << 2 | i;
104         for (i = 1; i < 4; ++i)  // insertion sort
105             for (j = i; j > 0 && qsum[j] > qsum[j - 1]; --j)
106                 tmp = qsum[j], qsum[j] = qsum[j - 1], qsum[j - 1] = tmp;
107         a1 = qsum[0] & 3;
108         a2 = qsum[1] & 3;
109         p[0] = bcr.p[a1 * 5 + a1];
110         p[1] = bcr.p[a1 * 5 + a2] + prior;
111         p[2] = bcr.p[a2 * 5 + a2];
112         if ("ACGT"[a1] != toupper(rb))
113             p[0] += prior + 3;
114         if ("ACGT"[a2] != toupper(rb))
115             p[2] += prior + 3;
116         if (p[0] < p[1] && p[0] < p[2])
117             call = (1 << a1) << 16 | (int)((p[1] < p[2] ? p[1] : p[2]) - p[0] + .499);
118         else if (p[2] < p[1] && p[2] < p[0])
119             call = (1 << a2) << 16 | (int)((p[0] < p[1] ? p[0] : p[1]) - p[2] + .499);
120         else
121             call = (1 << a1 | 1 << a2) << 16 | (int)((p[0] < p[2] ? p[0] : p[2]) - p[1] + .499);
122 
123         char consensusChar = bam_nt16_rev_table[call >> 16 & 0xf];
124         result[posInArray] = consensusChar;
125     }
126 
processBaseCallbackU2::AlgorithmInternal127     static int processBaseCallback(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) {
128         AlgorithmInternal *algorithm = (AlgorithmInternal *)data;
129         algorithm->processBase(tid, pos, n, pl);
130         return 0;
131     }
132 
getResultU2::AlgorithmInternal133     QByteArray getResult() {
134         return result;
135     }
136 
~AlgorithmInternalU2::AlgorithmInternal137     ~AlgorithmInternal() {
138         bcf_call_destroy(bca);
139         bam_lplbuf_destroy(lplbuf);
140     }
141 
142 private:
143     const U2Region &region;
144     U2OpStatus &os;
145     QByteArray referenceFragment;
146     bam_lplbuf_t *lplbuf;
147     bcf_callaux_t *bca;
148     QByteArray result;
149 };
150 
getConsensusRegion(const U2Region & region,U2DbiIterator<U2AssemblyRead> * reads,QByteArray referenceFragment,U2OpStatus & os)151 QByteArray AssemblyConsensusAlgorithmSamtools::getConsensusRegion(const U2Region &region, U2DbiIterator<U2AssemblyRead> *reads, QByteArray referenceFragment, U2OpStatus &os) {
152     AlgorithmInternal algorithm(region, referenceFragment, os);
153 
154     algorithm.processReads(reads);
155 
156     return algorithm.getResult();
157 }
158 
159 }  // namespace U2
160