1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_ASSEMBLY_DBI_H_
23 #define _U2_ASSEMBLY_DBI_H_
24 
25 #include <U2Core/U2Assembly.h>
26 #include <U2Core/U2Dbi.h>
27 #include <U2Core/U2Type.h>
28 
29 namespace U2 {
30 
31 /** Class used to iterate huge amount of assembly reads and optimize assembly import operation to DBI */
32 class U2AssemblyReadsIterator {
33 public:
34     /** returns true if there are more reads to iterate*/
35     virtual bool hasNext() = 0;
36 
37     /** returns next read or error */
38     virtual U2AssemblyRead next(U2OpStatus &os) = 0;
39 };
40 
41 class U2AssemblyCoverageImportInfo {
42 public:
U2AssemblyCoverageImportInfo()43     U2AssemblyCoverageImportInfo()
44         : computeCoverage(false), coverageBasesPerPoint(1) {
45     }
46 
47     /** Specifies if assembly coverage is needed to be computed at import time*/
48     bool computeCoverage;
49 
50     /** Place to save coverage info */
51     U2AssemblyCoverageStat coverage;
52 
53     /** Shows how many real bases are in 1 coverage point */
54     double coverageBasesPerPoint;
55 };
56 
57 /** Additional reads info used during reads import into assembly */
58 class U2AssemblyReadsImportInfo {
59 public:
60     U2AssemblyReadsImportInfo(U2AssemblyReadsImportInfo *parentInfo = nullptr)
61         : nReads(0), packed(false), parentInfo(parentInfo) {
62     }
~U2AssemblyReadsImportInfo()63     virtual ~U2AssemblyReadsImportInfo() {
64     }
65 
66     /** Number of reads added during import */
67     qint64 nReads;
68 
69     /** Specifies if assembly was packed at import time*/
70     bool packed;
71 
72     /* Place where to save pack statistics */
73     U2AssemblyPackStat packStat;
74 
75     U2AssemblyCoverageImportInfo coverageInfo;
76 
onReadImported()77     virtual void onReadImported() {
78         if (nullptr != parentInfo) {
79             parentInfo->onReadImported();
80         }
81     }
82 
83 private:
84     U2AssemblyReadsImportInfo *parentInfo;
85 };
86 
87 /**
88     An interface to obtain  access to assembly data
89 */
90 class U2AssemblyDbi : public U2ChildDbi {
91 protected:
U2AssemblyDbi(U2Dbi * rootDbi)92     U2AssemblyDbi(U2Dbi *rootDbi)
93         : U2ChildDbi(rootDbi) {
94     }
95 
96 public:
97     /**
98         Reads assembly object by its id.
99         If there is no assembly object with the specified id returns a default constructed assembly object.
100     */
101     virtual U2Assembly getAssemblyObject(const U2DataId &id, U2OpStatus &os) = 0;
102 
103     /**
104         Returns number of reads located near or intersecting the region.
105         The region should be a valid region within alignment bounds, i.e. non-negative and less than alignment length.
106 
107         If there is no assembly object with the specified id returns -1.
108 
109         Note: 'near' here means that DBI is not forced to return precise number of reads that intersects the region
110         and some deviations is allowed in order to apply performance optimizations.
111 
112         Note2: Use U2_ASSEMBLY_REGION_MAX to count all reads in assembly in effective way
113 
114     */
115     virtual qint64 countReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) = 0;
116 
117     /**
118         Returns reads that intersect given region.
119         If there is no assembly object with the specified id returns NULL.
120 
121         Note: iterator instance must be deallocated by caller method
122     */
123     virtual U2DbiIterator<U2AssemblyRead> *getReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os, bool sortedHint = false) = 0;
124 
125     /**
126         Returns reads with packed row value bounded by 'minRow' and 'maxRow' that intersect given region.
127         If there is no assembly object with the specified id returns NULL.
128 
129         Note: iterator instance must be deallocated by caller method
130     */
131     virtual U2DbiIterator<U2AssemblyRead> *getReadsByRow(const U2DataId &assemblyId, const U2Region &r, qint64 minRow, qint64 maxRow, U2OpStatus &os) = 0;
132 
133     /**
134         Returns reads with a specified name. Used to find paired reads that must have equal names.
135         If there is no assembly object with the specified id returns NULL.
136 
137         Note: iterator instance must be deallocated by caller method.
138     */
139     virtual U2DbiIterator<U2AssemblyRead> *getReadsByName(const U2DataId &assemblyId, const QByteArray &name, U2OpStatus &os) = 0;
140 
141     /**
142         Returns maximum packed row value of reads that intersect 'r'.
143         'Intersect' here means that region(leftmost pos, rightmost pos) intersects with 'r'
144         If there is no assembly object with the specified id returns -1.
145     */
146     virtual qint64 getMaxPackedRow(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) = 0;
147 
148     /**
149         Count 'length of assembly' - position of the rightmost base of all reads.
150         If there is no assembly object with the specified id returns -1.
151     */
152     virtual qint64 getMaxEndPos(const U2DataId &assemblyId, U2OpStatus &os) = 0;
153 
154     /**
155         Creates new empty assembly object. Reads iterator can be NULL.
156         If iterator is not NULL adapter can automatically try to pack reads. If pack is performed, the corresponding
157         structure is filled with  pack statistics. Assembly object gets its id assigned.
158         Folder 'folder' must exist in database.
159         The created object must be finalized.
160 
161         Requires: U2DbiFeature_WriteAssembly feature support
162     */
163     virtual void createAssemblyObject(U2Assembly &assembly, const QString &folder, U2DbiIterator<U2AssemblyRead> *it, U2AssemblyReadsImportInfo &importInfo, U2OpStatus &os) = 0;
164 
165     /**
166         Does some additional actions that should be done after object creating and reads adding.
167         The set of actions is provider-dependent
168         In common case this method shouldn't be called inside transaction.
169 
170         Requires: U2DbiFeature_WriteAssembly feature support
171     */
172     virtual void finalizeAssemblyObject(U2Assembly &assembly, U2OpStatus &os) = 0;
173 
174     /**
175         Removes all assembly data and tables.
176         Does not remove entry from the 'Object' table.
177         Requires: U2DbiFeature_WriteAssembly feature support
178     */
179     virtual void removeAssemblyData(const U2DataId &assemblyId, U2OpStatus &os) = 0;
180 
181     /**
182         Updates assembly object fields.
183 
184         Requires: U2DbiFeature_WriteAssembly feature support.
185     */
186     virtual void updateAssemblyObject(U2Assembly &assembly, U2OpStatus &os) = 0;
187 
188     /**
189         Removes reads from assembly.
190         Automatically removes affected sequences that are not anymore accessible from folders.
191 
192         Requires: U2DbiFeature_WriteAssembly feature support.
193     */
194     virtual void removeReads(const U2DataId &assemblyId, const QList<U2DataId> &readIds, U2OpStatus &os) = 0;
195 
196     /**
197         Adds sequences to assembly.
198         Reads got their ids assigned.
199 
200         Requires: U2DbiFeature_WriteAssembly feature support.
201     */
202     virtual void addReads(const U2DataId &assemblyId, U2DbiIterator<U2AssemblyRead> *it, U2OpStatus &os) = 0;
203 
204     /**
205         Packs assembly rows: assigns packedViewRow value (i.e. read's vertical position in view)
206         for every read in assembly so that reads do not overlap.
207 
208         Requires: U2DbiFeature_WriteAssembly and U2DbiFeature_AssemblyReadsPacking features support
209     */
210     virtual void pack(const U2DataId &assemblyId, U2AssemblyPackStat &stats, U2OpStatus &os) = 0;
211 
212     /**
213         Calculates coverage information for the given region and stores it in 'coverage' vector.
214 
215         U2Region 'region' passed to the method is split into N sequential windows of equal length,
216         where N is 'coverage' vector size. Number of reads intersecting each window is guaranteed
217         to be in range stored at corresponding index in vector 'coverage'.
218     */
219     virtual void calculateCoverage(const U2DataId &assemblyId, const U2Region &region, U2AssemblyCoverageStat &coverage, U2OpStatus &os) = 0;
220 };
221 
222 }  // namespace U2
223 
224 #endif
225