1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "MysqlSingleTableAssemblyAdapter.h"
23 
24 #include <U2Core/U2AssemblyUtils.h>
25 #include <U2Core/U2SafePoints.h>
26 
27 #include "MysqlAssemblyUtils.h"
28 #include "MysqlHelpers.h"
29 #include "mysql_dbi/MysqlDbi.h"
30 #include "mysql_dbi/MysqlObjectDbi.h"
31 
32 namespace U2 {
33 
34 // :end -> end of the region
35 // :start -> start of the region  (- max read size)
36 // :realStart -> real start of the region
37 const QString MysqlSingleTableAssemblyAdapter::DEFAULT_RANGE_CONDITION_CHECK = " (gstart < :end AND gstart + elen > :start) ";
38 const QString MysqlSingleTableAssemblyAdapter::RTM_RANGE_CONDITION_CHECK = " ((gstart < :end AND gstart > :start) AND gstart + elen > :realStart) ";
39 const QString MysqlSingleTableAssemblyAdapter::RTM_RANGE_CONDITION_CHECK_COUNT = "  (gstart < :end AND gstart > :start) ";
40 const QString MysqlSingleTableAssemblyAdapter::ALL_READ_FIELDS = " id, prow, gstart, elen, flags, mq, data";
41 const QString MysqlSingleTableAssemblyAdapter::SORTED_READS = " ORDER BY gstart ASC ";
42 
MysqlSingleTableAssemblyAdapter(MysqlDbi * dbi,const U2DataId & assemblyId,char tablePrefix,const QString & tableSuffix,const AssemblyCompressor * compressor,MysqlDbRef * db,U2OpStatus &)43 MysqlSingleTableAssemblyAdapter::MysqlSingleTableAssemblyAdapter(MysqlDbi *dbi,
44                                                                  const U2DataId &assemblyId,
45                                                                  char tablePrefix,
46                                                                  const QString &tableSuffix,
47                                                                  const AssemblyCompressor *compressor,
48                                                                  MysqlDbRef *db,
49                                                                  U2OpStatus &)
50     : MysqlAssemblyAdapter(assemblyId, compressor, db),
51       dbi(dbi),
52       readsTable(getReadsTableName(assemblyId, tablePrefix, tableSuffix)),
53       rangeConditionCheck(DEFAULT_RANGE_CONDITION_CHECK),
54       rangeConditionCheckForCount(DEFAULT_RANGE_CONDITION_CHECK),
55       minReadLength(0),
56       maxReadLength(0),
57       rangeMode(false),
58       inited(false) {
59 }
60 
createReadsTables(U2OpStatus & os)61 void MysqlSingleTableAssemblyAdapter::createReadsTables(U2OpStatus &os) {
62     MysqlTransaction t(db, os);
63 
64     // id - id of the read
65     // name - read name hash
66     // prow - packed view row
67     // gstart - start of the read
68     // elen - effective length of the read
69     // flags - read flags
70     // mq - mapping quality
71     // data - packed data: CIGAR, read sequence, quality string
72     static QString q = "CREATE TABLE IF NOT EXISTS %1 (id BIGINT PRIMARY KEY AUTO_INCREMENT, name BIGINT NOT NULL, prow BIGINT NOT NULL, "
73                        "gstart BIGINT NOT NULL, elen BIGINT NOT NULL, flags BIGINT NOT NULL, mq TINYINT UNSIGNED NOT NULL, data LONGBLOB NOT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8";
74 
75     U2SqlQuery(q.arg(readsTable), db, os).execute();
76     CHECK_OP(os, );
77     inited = true;
78 }
79 
80 //! %1 - database, %2 - table name, %3 - index name, %4 - index column
81 //! Example of working query:
82 // select if (EXISTS(SELECT distinct index_name FROM INFORMATION_SCHEMA.STATISTICS
83 // WHERE table_schema = 'uu' AND table_name = 'AssemblyRead_M2_800_4000_0' and index_name like 'AssemblyRead_M2_800_4000_0_name'),
84 // "SELECT id FROM AssemblyRead_M2_800_4000_0;", "CREATE INDEX AssemblyRead_M2_800_4000_0_name ON uu.AssemblyRead_M2_800_4000_0(name);") into @a;
85 // prepare smt from @a; execute smt; deallocate prepare smt;
86 static const QString CREATE_INDEX_IF_NOT_EXISTS_QUERY =
87     "select if"
88     "("
89     "EXISTS(SELECT distinct index_name FROM INFORMATION_SCHEMA.STATISTICS "
90     "WHERE table_schema = '%1' AND table_name = '%2' and index_name like '%3')"
91     ","
92     "\"SELECT %4 FROM %2;\""
93     ","
94     "\"CREATE INDEX %3 ON %1.%2(%4);\""
95     ") into @a; prepare smt from @a; execute smt; deallocate prepare smt;";
96 
createReadsIndexes(U2OpStatus & os)97 void MysqlSingleTableAssemblyAdapter::createReadsIndexes(U2OpStatus &os) {
98     MysqlTransaction t(db, os);
99 
100     U2SqlQuery(CREATE_INDEX_IF_NOT_EXISTS_QUERY.arg(db->handle.databaseName())
101                    .arg(readsTable)
102                    .arg(readsTable + "_gstart")
103                    .arg("gstart"),
104                db,
105                os)
106         .execute();
107     CHECK_OP(os, );
108 
109     U2SqlQuery(CREATE_INDEX_IF_NOT_EXISTS_QUERY.arg(db->handle.databaseName())
110                    .arg(readsTable)
111                    .arg(readsTable + "_name")
112                    .arg("name"),
113                db,
114                os)
115         .execute();
116 }
117 
countReads(const U2Region & r,U2OpStatus & os)118 qint64 MysqlSingleTableAssemblyAdapter::countReads(const U2Region &r, U2OpStatus &os) {
119     if (r == U2_REGION_MAX) {
120         return U2SqlQuery(QString("SELECT COUNT(*) FROM %1").arg(readsTable), db, os).selectInt64();
121     }
122 
123     static const QString qStr = "SELECT COUNT(*) FROM %1 WHERE %2";
124     U2SqlQuery q(qStr.arg(readsTable).arg(rangeConditionCheckForCount), db, os);
125     bindRegion(q, r, true);
126     return q.selectInt64();
127 }
128 
countReadsPrecise(const U2Region & r,U2OpStatus & os)129 qint64 MysqlSingleTableAssemblyAdapter::countReadsPrecise(const U2Region &r, U2OpStatus &os) {
130     if (!rangeMode) {
131         return countReads(r, os);
132     }
133 
134     // here we use not-optimized rangeConditionCheck but not rangeConditionCheckForCount
135     static const QString qStr = "SELECT COUNT(*) FROM %1 WHERE %2";
136     U2SqlQuery q(qStr.arg(readsTable).arg(rangeConditionCheck), db, os);
137     bindRegion(q, r, false);
138     return q.selectInt64();
139 }
140 
getMaxPackedRow(const U2Region & r,U2OpStatus & os)141 qint64 MysqlSingleTableAssemblyAdapter::getMaxPackedRow(const U2Region &r, U2OpStatus &os) {
142     static const QString queryString = "SELECT MAX(prow) FROM %1 WHERE %2";
143     U2SqlQuery q(queryString.arg(readsTable).arg(rangeConditionCheck), db, os);
144     bindRegion(q, r);
145     return q.selectInt64();
146 }
147 
getMaxEndPos(U2OpStatus & os)148 qint64 MysqlSingleTableAssemblyAdapter::getMaxEndPos(U2OpStatus &os) {
149     static const QString queryString = "SELECT MAX(gstart + elen) FROM %1";
150     return U2SqlQuery(queryString.arg(readsTable), db, os).selectInt64();
151 }
152 
getReads(const U2Region & r,U2OpStatus & os,bool sortedHint)153 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReads(const U2Region &r, U2OpStatus &os, bool sortedHint) {
154     static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE %2";
155 
156     QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable).arg(rangeConditionCheck) + (sortedHint ? SORTED_READS : ""), db, os));
157     bindRegion(*q, r);
158     return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), nullptr, U2AssemblyRead(), os);
159 }
160 
getReadsByRow(const U2Region & r,qint64 minRow,qint64 maxRow,U2OpStatus & os)161 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReadsByRow(const U2Region &r, qint64 minRow, qint64 maxRow, U2OpStatus &os) {
162     static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE %2 AND (prow >= :minRow AND prow < :maxRow)";
163     QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable).arg(rangeConditionCheck), db, os));
164     bindRegion(*q, r);
165     q->bindInt64(":minRow", minRow);
166     q->bindInt64(":maxRow", maxRow);
167     return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), nullptr, U2AssemblyRead(), os);
168 }
169 
getReadsByName(const QByteArray & name,U2OpStatus & os)170 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReadsByName(const QByteArray &name, U2OpStatus &os) {
171     static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE name = :name";
172     QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable), db, os));
173     int hash = qHash(name);
174     q->bindInt64(":name", hash);
175     return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), new MysqlAssemblyNameFilter(name), U2AssemblyRead(), os);
176 }
177 
addReads(U2DbiIterator<U2AssemblyRead> * it,U2AssemblyReadsImportInfo & ii,U2OpStatus & os)178 void MysqlSingleTableAssemblyAdapter::addReads(U2DbiIterator<U2AssemblyRead> *it, U2AssemblyReadsImportInfo &ii, U2OpStatus &os) {
179     MysqlTransaction t(db, os);
180 
181     if (!inited) {
182         createReadsTables(os);
183     }
184 
185     static const QString q = "INSERT INTO %1(name, prow, flags, gstart, elen, mq, data) VALUES (:name, :prow, :flags, :gstart, :elen, :mq, :data)";
186 
187     while (it->hasNext() && !os.isCoR()) {
188         U2SqlQuery insertQ(q.arg(readsTable), db, os);
189 
190         U2AssemblyRead read = it->next();
191         bool dnaExt = false;  // TODO:
192         qint64 flags = read->flags;
193         flags = flags | (dnaExt ? DnaExtAlphabet : 0);
194 
195         if (rangeMode) {
196             SAFE_POINT(read->effectiveLen >= minReadLength && read->effectiveLen < maxReadLength, "Effective read length must be precomputed in the range mode", );
197         } else {
198             int readLen = read->readSequence.length();
199             int effectiveReadLength = readLen + U2AssemblyUtils::getCigarExtraLength(read->cigar);
200             read->effectiveLen = effectiveReadLength;
201         }
202 
203         int hash = qHash(read->name);
204         QByteArray packedData = MysqlAssemblyUtils::packData(MysqlAssemblyDataMethod_NSCQ, read, os);
205 
206         insertQ.bindInt64(":name", hash);
207         insertQ.bindInt64(":prow", read->packedViewRow);
208         insertQ.bindInt64(":flags", flags);
209         insertQ.bindInt64(":gstart", read->leftmostPos);
210         insertQ.bindInt64(":elen", read->effectiveLen);
211         insertQ.bindInt32(":mq", read->mappingQuality);
212         insertQ.bindBlob(":data", packedData);
213         insertQ.insert();
214         CHECK_OP(os, );
215 
216         MysqlAssemblyUtils::addToCoverage(ii.coverageInfo, read);
217 
218         ii.nReads++;
219         ii.onReadImported();
220     }
221 }
222 
removeReads(const QList<U2DataId> & readIds,U2OpStatus & os)223 void MysqlSingleTableAssemblyAdapter::removeReads(const QList<U2DataId> &readIds, U2OpStatus &os) {
224     MysqlTransaction t(db, os);
225 
226     // TODO: add transaction per pack or reads
227     // TODO: remove multiple reads in 1 SQL at once
228     // SQLiteObjectDbi* objDbi = dbi->getSQLiteObjectDbi();
229     foreach (U2DataId readId, readIds) {
230         MysqlUtils::remove(readsTable, "id", readId, 1, db, os);
231         CHECK_OP_BREAK(os);
232     }
233 
234     MysqlObjectDbi::incrementVersion(assemblyId, db, os);
235 }
236 
dropReadsTables(U2OpStatus & os)237 void MysqlSingleTableAssemblyAdapter::dropReadsTables(U2OpStatus &os) {
238     static QString queryString = "DROP TABLE IF EXISTS %1";
239     U2SqlQuery(queryString.arg(readsTable), db, os).execute();
240     CHECK_OP(os, );
241     MysqlObjectDbi::incrementVersion(assemblyId, db, os);
242 }
243 
pack(U2AssemblyPackStat & stat,U2OpStatus & os)244 void MysqlSingleTableAssemblyAdapter::pack(U2AssemblyPackStat &stat, U2OpStatus &os) {
245     MysqlSingleTablePackAlgorithmAdapter packAdapter(db, readsTable);
246     AssemblyPackAlgorithm::pack(packAdapter, stat, os);
247     createReadsIndexes(os);
248 }
249 
calculateCoverage(const U2Region & r,U2AssemblyCoverageStat & coverage,U2OpStatus & os)250 void MysqlSingleTableAssemblyAdapter::calculateCoverage(const U2Region &r, U2AssemblyCoverageStat &coverage, U2OpStatus &os) {
251     QString queryString = "SELECT gstart, elen, data FROM " + readsTable;
252     bool rangeArgs = (r != U2_REGION_MAX);
253 
254     if (rangeArgs) {
255         queryString += " WHERE " + rangeConditionCheck;
256     }
257 
258     U2SqlQuery q(queryString, db, os);
259 
260     if (rangeArgs) {
261         bindRegion(q, r, false);
262     }
263 
264     MysqlAssemblyUtils::calculateCoverage(q, r, coverage, os);
265 }
266 
getReadsTableName() const267 const QString &MysqlSingleTableAssemblyAdapter::getReadsTableName() const {
268     return readsTable;
269 }
270 
enableRangeTableMode(int minLen,int maxLen)271 void MysqlSingleTableAssemblyAdapter::enableRangeTableMode(int minLen, int maxLen) {
272     rangeMode = true;
273     minReadLength = minLen;
274     maxReadLength = maxLen;
275     rangeConditionCheck = RTM_RANGE_CONDITION_CHECK;
276     rangeConditionCheckForCount = RTM_RANGE_CONDITION_CHECK_COUNT;
277 }
278 
getReadsTableName(const U2DataId & assemblyId,char prefix,const QString & suffix)279 QString MysqlSingleTableAssemblyAdapter::getReadsTableName(const U2DataId &assemblyId, char prefix, const QString &suffix) {
280     return QString("AssemblyRead_%1%2_%3").arg(prefix).arg(U2DbiUtils::toDbiId(assemblyId)).arg(suffix);
281     ;
282 }
283 
dropReadsIndexes(U2OpStatus & os)284 void MysqlSingleTableAssemblyAdapter::dropReadsIndexes(U2OpStatus &os) {
285     MysqlTransaction t(db, os);
286 
287     static const QString q1 = "DROP INDEX IF EXISTS %1_gstart";
288     U2SqlQuery(q1.arg(readsTable), db, os).execute();
289 
290     static const QString q2 = "DROP INDEX IF EXISTS %1_name";
291     U2SqlQuery(q2.arg(readsTable), db, os).execute();
292 }
293 
getMinReadLength() const294 qint64 MysqlSingleTableAssemblyAdapter::getMinReadLength() const {
295     return minReadLength;
296 }
297 
getMaxReadLength() const298 qint64 MysqlSingleTableAssemblyAdapter::getMaxReadLength() const {
299     return maxReadLength;
300 }
301 
bindRegion(U2SqlQuery & q,const U2Region & r,bool forCount)302 void MysqlSingleTableAssemblyAdapter::bindRegion(U2SqlQuery &q, const U2Region &r, bool forCount) {
303     if (rangeMode) {
304         q.bindInt64(":end", r.endPos());
305         q.bindInt64(":start", r.startPos - maxReadLength);
306         if (!forCount) {
307             q.bindInt64(":realStart", r.startPos);
308         }
309     } else {
310         q.bindInt64(":end", r.endPos());
311         q.bindInt64(":start", r.startPos);
312     }
313 }
314 
315 //////////////////////////////////////////////////////////////////////////
316 // pack adapter
317 
MysqlSingleTablePackAlgorithmAdapter(MysqlDbRef * db,const QString & readsTable)318 MysqlSingleTablePackAlgorithmAdapter::MysqlSingleTablePackAlgorithmAdapter(MysqlDbRef *db, const QString &readsTable)
319     : db(db),
320       readsTable(readsTable),
321       updateQuery(nullptr) {
322 }
323 
~MysqlSingleTablePackAlgorithmAdapter()324 MysqlSingleTablePackAlgorithmAdapter::~MysqlSingleTablePackAlgorithmAdapter() {
325     releaseDbResources();
326 }
327 
selectAllReads(U2OpStatus & os)328 U2DbiIterator<PackAlgorithmData> *MysqlSingleTablePackAlgorithmAdapter::selectAllReads(U2OpStatus &os) {
329     QSharedPointer<U2SqlQuery> q(new U2SqlQuery("SELECT id, gstart, elen FROM " + readsTable + " ORDER BY gstart", db, os));
330     return new MysqlRSIterator<PackAlgorithmData>(q, new MysqlSimpleAssemblyReadPackedDataLoader(), nullptr, PackAlgorithmData(), os);
331 }
332 
assignProw(const U2DataId & readId,qint64 prow,U2OpStatus & os)333 void MysqlSingleTablePackAlgorithmAdapter::assignProw(const U2DataId &readId, qint64 prow, U2OpStatus &os) {
334     if (updateQuery != nullptr) {
335         delete updateQuery;
336     }
337 
338     updateQuery = new U2SqlQuery("UPDATE " + readsTable + " SET prow = :prow WHERE id = :id", db, os);
339     updateQuery->bindInt64(":prow", prow);
340     updateQuery->bindDataId(":id", readId);
341     updateQuery->execute();
342 }
343 
releaseDbResources()344 void MysqlSingleTablePackAlgorithmAdapter::releaseDbResources() {
345     delete updateQuery;
346     updateQuery = nullptr;
347 }
348 
349 }  // namespace U2
350