1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_SQLITE_ASSEMBLY_DBI_H_
23 #define _U2_SQLITE_ASSEMBLY_DBI_H_
24 
25 #include <U2Core/U2SqlHelpers.h>
26 
27 #include "SQLiteDbi.h"
28 #include "util/AssemblyAdapter.h"
29 #include "util/AssemblyPackAlgorithm.h"
30 
31 namespace U2 {
32 
33 class SQLiteWriteQuery;
34 class AssemblyAdapter;
35 
36 class SQLiteAssemblyDbi : public U2AssemblyDbi, public SQLiteChildDBICommon {
37 public:
38     SQLiteAssemblyDbi(SQLiteDbi *dbi);
39     ~SQLiteAssemblyDbi();
40 
41     /** Reads assembly objects by id */
42     virtual U2Assembly getAssemblyObject(const U2DataId &assemblyId, U2OpStatus &os);
43 
44     /**
45         Return number of reads in assembly that intersect given region
46         'Intersect' here means that region(leftmost pos, rightmost pos) intersects with 'r'
47     */
48     virtual qint64 countReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os);
49 
50     /**
51         Return reads that intersect given region
52         Note: iterator instance must be deallocated by caller method
53     */
54     virtual U2DbiIterator<U2AssemblyRead> *getReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os, bool sortedHint = false);
55 
56     /**
57         Return reads with packed row value >= min, <= max that intersect given region
58         Note: iterator instance must be deallocated by caller method
59     */
60     virtual U2DbiIterator<U2AssemblyRead> *getReadsByRow(const U2DataId &assemblyId, const U2Region &r, qint64 minRow, qint64 maxRow, U2OpStatus &os);
61 
62     /**
63         Return reads with a specified name. Used to find paired reads that must have equal names
64         Note: iterator instance must be deallocated by caller method
65     */
66     virtual U2DbiIterator<U2AssemblyRead> *getReadsByName(const U2DataId &assemblyId, const QByteArray &name, U2OpStatus &os);
67 
68     /**
69         Return max packed row at the given coordinate
70         'Intersect' here means that region(leftmost pos, rightmost pos) intersects with 'r'
71     */
72     virtual qint64 getMaxPackedRow(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os);
73 
74     /** Count 'length of assembly' - position of the rightmost base of all reads */
75     virtual qint64 getMaxEndPos(const U2DataId &assemblyId, U2OpStatus &os);
76 
77     /**
78         Creates new empty assembly object. Reads iterator can be NULL
79         Requires: U2DbiFeature_WriteAssembly feature support
80     */
81     virtual void createAssemblyObject(U2Assembly &assembly, const QString &folder, U2DbiIterator<U2AssemblyRead> *it, U2AssemblyReadsImportInfo &ii, U2OpStatus &os);
82 
83     /**
84         Creates indexes for reads tables.
85         This method mustn't be called inside transaction.
86         Requires: U2DbiFeature_WriteAssembly feature support
87     */
88     virtual void finalizeAssemblyObject(U2Assembly &assembly, U2OpStatus &os);
89 
90     /**
91         Removes all assembly data and tables.
92         Does not remove entry from the 'Object' table.
93         Requires: U2DbiFeature_WriteAssembly feature support
94     */
95     virtual void removeAssemblyData(const U2DataId &assemblyId, U2OpStatus &os);
96 
97     /**
98         Updates assembly object fields
99         Requires: U2DbiFeature_WriteAssembly feature support
100     */
101     virtual void updateAssemblyObject(U2Assembly &, U2OpStatus &os);
102 
103     /**
104         Removes sequences from assembly
105         Automatically removes affected sequences that are not anymore accessible from folders
106     */
107     virtual void removeReads(const U2DataId &assemblyId, const QList<U2DataId> &rowIds, U2OpStatus &os);
108 
109     /**
110         Adds sequences to assembly
111         Reads got their ids assigned.
112     */
113     virtual void addReads(const U2DataId &assemblyId, U2DbiIterator<U2AssemblyRead> *it, U2OpStatus &os);
114 
115     /**  Packs assembly rows: assigns packedViewRow value for every read in assembly */
116     virtual void pack(const U2DataId &assemblyId, U2AssemblyPackStat &stat, U2OpStatus &os);
117 
118     /**
119         Calculates coverage information for the given region. Saves result to 'c.coverage' vector.
120         Note: Coverage window size depends on 'c.coverage' vector size passed to the method call.
121     */
122     virtual void calculateCoverage(const U2DataId &assemblyId, const U2Region &region, U2AssemblyCoverageStat &coverage, U2OpStatus &os);
123 
124     virtual void initSqlSchema(U2OpStatus &os);
125     virtual void shutdown(U2OpStatus &os);
126 
127     static QString getCreateAssemblyTableQuery(const QString &tableAlias = "Assembly");
128 
129 private:
130     virtual void addReads(AssemblyAdapter *a, U2DbiIterator<U2AssemblyRead> *it, U2AssemblyReadsImportInfo &ii, U2OpStatus &os);
131 
132     void removeTables(const U2DataId &assemblyId, U2OpStatus &os);
133     void removeAssemblyEntry(const U2DataId &assemblyId, U2OpStatus &os);
134 
135     /** Return assembly storage adapter for the given assembly */
136     AssemblyAdapter *getAdapter(const U2DataId &assemblyId, U2OpStatus &os);
137 
138     /** Adapters by database assembly id */
139     QHash<qint64, AssemblyAdapter *> adaptersById;
140 };
141 
142 class SQLiteAssemblyAdapter : public AssemblyAdapter {
143 public:
SQLiteAssemblyAdapter(const U2DataId & assemblyId,const AssemblyCompressor * compressor,DbRef * ref)144     SQLiteAssemblyAdapter(const U2DataId &assemblyId, const AssemblyCompressor *compressor, DbRef *ref)
145         : AssemblyAdapter(assemblyId, compressor), db(ref) {
146     }
147 
148 protected:
149     DbRef *db;
150 };
151 
152 /** Compression method for assembly data */
153 enum SQLiteAssemblyDataMethod {
154     /** Merges Name, Sequence, Cigar and Quality values into single byte array separated by '\n' character. Merge prefix is '0'*/
155     SQLiteAssemblyDataMethod_NSCQ = 1
156 };
157 
158 class SQLiteAssemblyUtils {
159 public:
160     static QByteArray packData(SQLiteAssemblyDataMethod method, const U2AssemblyRead &read, U2OpStatus &os);
161 
162     static void unpackData(const QByteArray &packed, U2AssemblyRead &read, U2OpStatus &os);
163 
164     static void calculateCoverage(SQLiteReadQuery &q, const U2Region &r, U2AssemblyCoverageStat &coverage, U2OpStatus &os);
165 
166     static void addToCoverage(U2AssemblyCoverageImportInfo &cii, const U2AssemblyRead &read);
167 };
168 
169 class SQLiteAssemblyNameFilter : public SQLiteResultSetFilter<U2AssemblyRead> {
170 public:
SQLiteAssemblyNameFilter(const QByteArray & expectedName)171     SQLiteAssemblyNameFilter(const QByteArray &expectedName)
172         : name(expectedName) {
173     }
filter(const U2AssemblyRead & r)174     virtual bool filter(const U2AssemblyRead &r) {
175         return name == r->name;
176     }
177 
178 protected:
179     QByteArray name;
180 };
181 
182 class SimpleAssemblyReadLoader : public SQLiteResultSetLoader<U2AssemblyRead> {
183 public:
184     U2AssemblyRead load(SQLiteQuery *q);
185 };
186 
187 class SimpleAssemblyReadPackedDataLoader : public SQLiteResultSetLoader<PackAlgorithmData> {
188 public:
189     virtual PackAlgorithmData load(SQLiteQuery *q);
190 };
191 
192 }  // namespace U2
193 
194 #endif
195