1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "MysqlSingleTableAssemblyAdapter.h"
23
24 #include <U2Core/U2AssemblyUtils.h>
25 #include <U2Core/U2SafePoints.h>
26
27 #include "MysqlAssemblyUtils.h"
28 #include "MysqlHelpers.h"
29 #include "mysql_dbi/MysqlDbi.h"
30 #include "mysql_dbi/MysqlObjectDbi.h"
31
32 namespace U2 {
33
34 // :end -> end of the region
35 // :start -> start of the region (- max read size)
36 // :realStart -> real start of the region
37 const QString MysqlSingleTableAssemblyAdapter::DEFAULT_RANGE_CONDITION_CHECK = " (gstart < :end AND gstart + elen > :start) ";
38 const QString MysqlSingleTableAssemblyAdapter::RTM_RANGE_CONDITION_CHECK = " ((gstart < :end AND gstart > :start) AND gstart + elen > :realStart) ";
39 const QString MysqlSingleTableAssemblyAdapter::RTM_RANGE_CONDITION_CHECK_COUNT = " (gstart < :end AND gstart > :start) ";
40 const QString MysqlSingleTableAssemblyAdapter::ALL_READ_FIELDS = " id, prow, gstart, elen, flags, mq, data";
41 const QString MysqlSingleTableAssemblyAdapter::SORTED_READS = " ORDER BY gstart ASC ";
42
MysqlSingleTableAssemblyAdapter(MysqlDbi * dbi,const U2DataId & assemblyId,char tablePrefix,const QString & tableSuffix,const AssemblyCompressor * compressor,MysqlDbRef * db,U2OpStatus &)43 MysqlSingleTableAssemblyAdapter::MysqlSingleTableAssemblyAdapter(MysqlDbi *dbi,
44 const U2DataId &assemblyId,
45 char tablePrefix,
46 const QString &tableSuffix,
47 const AssemblyCompressor *compressor,
48 MysqlDbRef *db,
49 U2OpStatus &)
50 : MysqlAssemblyAdapter(assemblyId, compressor, db),
51 dbi(dbi),
52 readsTable(getReadsTableName(assemblyId, tablePrefix, tableSuffix)),
53 rangeConditionCheck(DEFAULT_RANGE_CONDITION_CHECK),
54 rangeConditionCheckForCount(DEFAULT_RANGE_CONDITION_CHECK),
55 minReadLength(0),
56 maxReadLength(0),
57 rangeMode(false),
58 inited(false) {
59 }
60
createReadsTables(U2OpStatus & os)61 void MysqlSingleTableAssemblyAdapter::createReadsTables(U2OpStatus &os) {
62 MysqlTransaction t(db, os);
63
64 // id - id of the read
65 // name - read name hash
66 // prow - packed view row
67 // gstart - start of the read
68 // elen - effective length of the read
69 // flags - read flags
70 // mq - mapping quality
71 // data - packed data: CIGAR, read sequence, quality string
72 static QString q = "CREATE TABLE IF NOT EXISTS %1 (id BIGINT PRIMARY KEY AUTO_INCREMENT, name BIGINT NOT NULL, prow BIGINT NOT NULL, "
73 "gstart BIGINT NOT NULL, elen BIGINT NOT NULL, flags BIGINT NOT NULL, mq TINYINT UNSIGNED NOT NULL, data LONGBLOB NOT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8";
74
75 U2SqlQuery(q.arg(readsTable), db, os).execute();
76 CHECK_OP(os, );
77 inited = true;
78 }
79
80 //! %1 - database, %2 - table name, %3 - index name, %4 - index column
81 //! Example of working query:
82 // select if (EXISTS(SELECT distinct index_name FROM INFORMATION_SCHEMA.STATISTICS
83 // WHERE table_schema = 'uu' AND table_name = 'AssemblyRead_M2_800_4000_0' and index_name like 'AssemblyRead_M2_800_4000_0_name'),
84 // "SELECT id FROM AssemblyRead_M2_800_4000_0;", "CREATE INDEX AssemblyRead_M2_800_4000_0_name ON uu.AssemblyRead_M2_800_4000_0(name);") into @a;
85 // prepare smt from @a; execute smt; deallocate prepare smt;
86 static const QString CREATE_INDEX_IF_NOT_EXISTS_QUERY =
87 "select if"
88 "("
89 "EXISTS(SELECT distinct index_name FROM INFORMATION_SCHEMA.STATISTICS "
90 "WHERE table_schema = '%1' AND table_name = '%2' and index_name like '%3')"
91 ","
92 "\"SELECT %4 FROM %2;\""
93 ","
94 "\"CREATE INDEX %3 ON %1.%2(%4);\""
95 ") into @a; prepare smt from @a; execute smt; deallocate prepare smt;";
96
createReadsIndexes(U2OpStatus & os)97 void MysqlSingleTableAssemblyAdapter::createReadsIndexes(U2OpStatus &os) {
98 MysqlTransaction t(db, os);
99
100 U2SqlQuery(CREATE_INDEX_IF_NOT_EXISTS_QUERY.arg(db->handle.databaseName())
101 .arg(readsTable)
102 .arg(readsTable + "_gstart")
103 .arg("gstart"),
104 db,
105 os)
106 .execute();
107 CHECK_OP(os, );
108
109 U2SqlQuery(CREATE_INDEX_IF_NOT_EXISTS_QUERY.arg(db->handle.databaseName())
110 .arg(readsTable)
111 .arg(readsTable + "_name")
112 .arg("name"),
113 db,
114 os)
115 .execute();
116 }
117
countReads(const U2Region & r,U2OpStatus & os)118 qint64 MysqlSingleTableAssemblyAdapter::countReads(const U2Region &r, U2OpStatus &os) {
119 if (r == U2_REGION_MAX) {
120 return U2SqlQuery(QString("SELECT COUNT(*) FROM %1").arg(readsTable), db, os).selectInt64();
121 }
122
123 static const QString qStr = "SELECT COUNT(*) FROM %1 WHERE %2";
124 U2SqlQuery q(qStr.arg(readsTable).arg(rangeConditionCheckForCount), db, os);
125 bindRegion(q, r, true);
126 return q.selectInt64();
127 }
128
countReadsPrecise(const U2Region & r,U2OpStatus & os)129 qint64 MysqlSingleTableAssemblyAdapter::countReadsPrecise(const U2Region &r, U2OpStatus &os) {
130 if (!rangeMode) {
131 return countReads(r, os);
132 }
133
134 // here we use not-optimized rangeConditionCheck but not rangeConditionCheckForCount
135 static const QString qStr = "SELECT COUNT(*) FROM %1 WHERE %2";
136 U2SqlQuery q(qStr.arg(readsTable).arg(rangeConditionCheck), db, os);
137 bindRegion(q, r, false);
138 return q.selectInt64();
139 }
140
getMaxPackedRow(const U2Region & r,U2OpStatus & os)141 qint64 MysqlSingleTableAssemblyAdapter::getMaxPackedRow(const U2Region &r, U2OpStatus &os) {
142 static const QString queryString = "SELECT MAX(prow) FROM %1 WHERE %2";
143 U2SqlQuery q(queryString.arg(readsTable).arg(rangeConditionCheck), db, os);
144 bindRegion(q, r);
145 return q.selectInt64();
146 }
147
getMaxEndPos(U2OpStatus & os)148 qint64 MysqlSingleTableAssemblyAdapter::getMaxEndPos(U2OpStatus &os) {
149 static const QString queryString = "SELECT MAX(gstart + elen) FROM %1";
150 return U2SqlQuery(queryString.arg(readsTable), db, os).selectInt64();
151 }
152
getReads(const U2Region & r,U2OpStatus & os,bool sortedHint)153 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReads(const U2Region &r, U2OpStatus &os, bool sortedHint) {
154 static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE %2";
155
156 QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable).arg(rangeConditionCheck) + (sortedHint ? SORTED_READS : ""), db, os));
157 bindRegion(*q, r);
158 return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), nullptr, U2AssemblyRead(), os);
159 }
160
getReadsByRow(const U2Region & r,qint64 minRow,qint64 maxRow,U2OpStatus & os)161 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReadsByRow(const U2Region &r, qint64 minRow, qint64 maxRow, U2OpStatus &os) {
162 static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE %2 AND (prow >= :minRow AND prow < :maxRow)";
163 QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable).arg(rangeConditionCheck), db, os));
164 bindRegion(*q, r);
165 q->bindInt64(":minRow", minRow);
166 q->bindInt64(":maxRow", maxRow);
167 return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), nullptr, U2AssemblyRead(), os);
168 }
169
getReadsByName(const QByteArray & name,U2OpStatus & os)170 U2DbiIterator<U2AssemblyRead> *MysqlSingleTableAssemblyAdapter::getReadsByName(const QByteArray &name, U2OpStatus &os) {
171 static const QString qStr = "SELECT " + ALL_READ_FIELDS + " FROM %1 WHERE name = :name";
172 QSharedPointer<U2SqlQuery> q(new U2SqlQuery(qStr.arg(readsTable), db, os));
173 int hash = qHash(name);
174 q->bindInt64(":name", hash);
175 return new MysqlRSIterator<U2AssemblyRead>(q, new MysqlSimpleAssemblyReadLoader(), new MysqlAssemblyNameFilter(name), U2AssemblyRead(), os);
176 }
177
addReads(U2DbiIterator<U2AssemblyRead> * it,U2AssemblyReadsImportInfo & ii,U2OpStatus & os)178 void MysqlSingleTableAssemblyAdapter::addReads(U2DbiIterator<U2AssemblyRead> *it, U2AssemblyReadsImportInfo &ii, U2OpStatus &os) {
179 MysqlTransaction t(db, os);
180
181 if (!inited) {
182 createReadsTables(os);
183 }
184
185 static const QString q = "INSERT INTO %1(name, prow, flags, gstart, elen, mq, data) VALUES (:name, :prow, :flags, :gstart, :elen, :mq, :data)";
186
187 while (it->hasNext() && !os.isCoR()) {
188 U2SqlQuery insertQ(q.arg(readsTable), db, os);
189
190 U2AssemblyRead read = it->next();
191 bool dnaExt = false; // TODO:
192 qint64 flags = read->flags;
193 flags = flags | (dnaExt ? DnaExtAlphabet : 0);
194
195 if (rangeMode) {
196 SAFE_POINT(read->effectiveLen >= minReadLength && read->effectiveLen < maxReadLength, "Effective read length must be precomputed in the range mode", );
197 } else {
198 int readLen = read->readSequence.length();
199 int effectiveReadLength = readLen + U2AssemblyUtils::getCigarExtraLength(read->cigar);
200 read->effectiveLen = effectiveReadLength;
201 }
202
203 int hash = qHash(read->name);
204 QByteArray packedData = MysqlAssemblyUtils::packData(MysqlAssemblyDataMethod_NSCQ, read, os);
205
206 insertQ.bindInt64(":name", hash);
207 insertQ.bindInt64(":prow", read->packedViewRow);
208 insertQ.bindInt64(":flags", flags);
209 insertQ.bindInt64(":gstart", read->leftmostPos);
210 insertQ.bindInt64(":elen", read->effectiveLen);
211 insertQ.bindInt32(":mq", read->mappingQuality);
212 insertQ.bindBlob(":data", packedData);
213 insertQ.insert();
214 CHECK_OP(os, );
215
216 MysqlAssemblyUtils::addToCoverage(ii.coverageInfo, read);
217
218 ii.nReads++;
219 ii.onReadImported();
220 }
221 }
222
removeReads(const QList<U2DataId> & readIds,U2OpStatus & os)223 void MysqlSingleTableAssemblyAdapter::removeReads(const QList<U2DataId> &readIds, U2OpStatus &os) {
224 MysqlTransaction t(db, os);
225
226 // TODO: add transaction per pack or reads
227 // TODO: remove multiple reads in 1 SQL at once
228 // SQLiteObjectDbi* objDbi = dbi->getSQLiteObjectDbi();
229 foreach (U2DataId readId, readIds) {
230 MysqlUtils::remove(readsTable, "id", readId, 1, db, os);
231 CHECK_OP_BREAK(os);
232 }
233
234 MysqlObjectDbi::incrementVersion(assemblyId, db, os);
235 }
236
dropReadsTables(U2OpStatus & os)237 void MysqlSingleTableAssemblyAdapter::dropReadsTables(U2OpStatus &os) {
238 static QString queryString = "DROP TABLE IF EXISTS %1";
239 U2SqlQuery(queryString.arg(readsTable), db, os).execute();
240 CHECK_OP(os, );
241 MysqlObjectDbi::incrementVersion(assemblyId, db, os);
242 }
243
pack(U2AssemblyPackStat & stat,U2OpStatus & os)244 void MysqlSingleTableAssemblyAdapter::pack(U2AssemblyPackStat &stat, U2OpStatus &os) {
245 MysqlSingleTablePackAlgorithmAdapter packAdapter(db, readsTable);
246 AssemblyPackAlgorithm::pack(packAdapter, stat, os);
247 createReadsIndexes(os);
248 }
249
calculateCoverage(const U2Region & r,U2AssemblyCoverageStat & coverage,U2OpStatus & os)250 void MysqlSingleTableAssemblyAdapter::calculateCoverage(const U2Region &r, U2AssemblyCoverageStat &coverage, U2OpStatus &os) {
251 QString queryString = "SELECT gstart, elen, data FROM " + readsTable;
252 bool rangeArgs = (r != U2_REGION_MAX);
253
254 if (rangeArgs) {
255 queryString += " WHERE " + rangeConditionCheck;
256 }
257
258 U2SqlQuery q(queryString, db, os);
259
260 if (rangeArgs) {
261 bindRegion(q, r, false);
262 }
263
264 MysqlAssemblyUtils::calculateCoverage(q, r, coverage, os);
265 }
266
getReadsTableName() const267 const QString &MysqlSingleTableAssemblyAdapter::getReadsTableName() const {
268 return readsTable;
269 }
270
enableRangeTableMode(int minLen,int maxLen)271 void MysqlSingleTableAssemblyAdapter::enableRangeTableMode(int minLen, int maxLen) {
272 rangeMode = true;
273 minReadLength = minLen;
274 maxReadLength = maxLen;
275 rangeConditionCheck = RTM_RANGE_CONDITION_CHECK;
276 rangeConditionCheckForCount = RTM_RANGE_CONDITION_CHECK_COUNT;
277 }
278
getReadsTableName(const U2DataId & assemblyId,char prefix,const QString & suffix)279 QString MysqlSingleTableAssemblyAdapter::getReadsTableName(const U2DataId &assemblyId, char prefix, const QString &suffix) {
280 return QString("AssemblyRead_%1%2_%3").arg(prefix).arg(U2DbiUtils::toDbiId(assemblyId)).arg(suffix);
281 ;
282 }
283
dropReadsIndexes(U2OpStatus & os)284 void MysqlSingleTableAssemblyAdapter::dropReadsIndexes(U2OpStatus &os) {
285 MysqlTransaction t(db, os);
286
287 static const QString q1 = "DROP INDEX IF EXISTS %1_gstart";
288 U2SqlQuery(q1.arg(readsTable), db, os).execute();
289
290 static const QString q2 = "DROP INDEX IF EXISTS %1_name";
291 U2SqlQuery(q2.arg(readsTable), db, os).execute();
292 }
293
getMinReadLength() const294 qint64 MysqlSingleTableAssemblyAdapter::getMinReadLength() const {
295 return minReadLength;
296 }
297
getMaxReadLength() const298 qint64 MysqlSingleTableAssemblyAdapter::getMaxReadLength() const {
299 return maxReadLength;
300 }
301
bindRegion(U2SqlQuery & q,const U2Region & r,bool forCount)302 void MysqlSingleTableAssemblyAdapter::bindRegion(U2SqlQuery &q, const U2Region &r, bool forCount) {
303 if (rangeMode) {
304 q.bindInt64(":end", r.endPos());
305 q.bindInt64(":start", r.startPos - maxReadLength);
306 if (!forCount) {
307 q.bindInt64(":realStart", r.startPos);
308 }
309 } else {
310 q.bindInt64(":end", r.endPos());
311 q.bindInt64(":start", r.startPos);
312 }
313 }
314
315 //////////////////////////////////////////////////////////////////////////
316 // pack adapter
317
MysqlSingleTablePackAlgorithmAdapter(MysqlDbRef * db,const QString & readsTable)318 MysqlSingleTablePackAlgorithmAdapter::MysqlSingleTablePackAlgorithmAdapter(MysqlDbRef *db, const QString &readsTable)
319 : db(db),
320 readsTable(readsTable),
321 updateQuery(nullptr) {
322 }
323
~MysqlSingleTablePackAlgorithmAdapter()324 MysqlSingleTablePackAlgorithmAdapter::~MysqlSingleTablePackAlgorithmAdapter() {
325 releaseDbResources();
326 }
327
selectAllReads(U2OpStatus & os)328 U2DbiIterator<PackAlgorithmData> *MysqlSingleTablePackAlgorithmAdapter::selectAllReads(U2OpStatus &os) {
329 QSharedPointer<U2SqlQuery> q(new U2SqlQuery("SELECT id, gstart, elen FROM " + readsTable + " ORDER BY gstart", db, os));
330 return new MysqlRSIterator<PackAlgorithmData>(q, new MysqlSimpleAssemblyReadPackedDataLoader(), nullptr, PackAlgorithmData(), os);
331 }
332
assignProw(const U2DataId & readId,qint64 prow,U2OpStatus & os)333 void MysqlSingleTablePackAlgorithmAdapter::assignProw(const U2DataId &readId, qint64 prow, U2OpStatus &os) {
334 if (updateQuery != nullptr) {
335 delete updateQuery;
336 }
337
338 updateQuery = new U2SqlQuery("UPDATE " + readsTable + " SET prow = :prow WHERE id = :id", db, os);
339 updateQuery->bindInt64(":prow", prow);
340 updateQuery->bindDataId(":id", readId);
341 updateQuery->execute();
342 }
343
releaseDbResources()344 void MysqlSingleTablePackAlgorithmAdapter::releaseDbResources() {
345 delete updateQuery;
346 updateQuery = nullptr;
347 }
348
349 } // namespace U2
350