1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_DOCUMENT_MODEL_H_
23 #define _U2_DOCUMENT_MODEL_H_
24 
25 #include <QDateTime>
26 #include <QMimeData>
27 #include <QPointer>
28 #include <QScriptValue>
29 
30 #include <U2Core/DNASequence.h>
31 #include <U2Core/GUrl.h>
32 #include <U2Core/U2FormatCheckResult.h>
33 #include <U2Core/UnloadedObject.h>
34 #include <U2Core/global.h>
35 
36 #include "StateLockableDataModel.h"
37 
38 namespace U2 {
39 
40 class U2OpStatus;
41 
42 class Document;
43 class GObject;
44 class DocumentFormat;
45 class IOAdapterFactory;
46 class IOAdapter;
47 class DocumentFormatConstraints;
48 class GHints;
49 class TmpDbiHandle;
50 class U2SequenceObject;
51 
52 // Additional info about document format
53 enum DocumentFormatFlag {
54     // Document support reading objects from data stream and can detect object boundaries for all object types correctly
55     DocumentFormatFlag_SupportStreaming = 1 << 0,
56     // Document support writing
57     DocumentFormatFlag_SupportWriting = 1 << 1,
58     // Document can only contain 1 object: like text, raw sequence or some formats that do not support streaming
59     DocumentFormatFlag_SingleObjectFormat = 1 << 2,
60     // Document can't be read from packed stream. Used for database files
61     DocumentFormatFlag_NoPack = 1 << 3,
62     // Document is not fully loaded to memory. Used for database files
63     DocumentFormatFlag_NoFullMemoryLoad = 1 << 4,
64     // Document is not included into format recognition by default
65     DocumentFormatFlag_Hidden = 1 << 5,
66     // Document contains only one object of each supported type
67     DocumentFormatFlag_OnlyOneObject = 1 << 6,
68     // UGENE is unable to create a new document of this format
69     // although it can modify existing documents
70     DocumentFormatFlag_CannotBeCreated = 1 << 7,
71     // Document can contain objects with duplicate names
72     DocumentFormatFlag_AllowDuplicateNames = 1 << 8,
73     // Document changes are written immedeately, they should'n be saved on closing. Used for databases.
74 
75     DocumentFormatFlag_DirectWriteOperations = 1 << 9,
76     // Document can be locked if created not by UGENE
77     DocumentFormatFlag_LockedIfNotCreatedByUGENE = 1 << 10,
78 
79     DocumentFormatFlag_CannotBeCompressed = 1 << 11
80 };
81 
82 typedef QFlags<DocumentFormatFlag> DocumentFormatFlags;
83 #define DocumentFormatFlags_SW (DocumentFormatFlags(DocumentFormatFlag_SupportStreaming) | DocumentFormatFlag_SupportWriting)
84 #define DocumentFormatFlags_W1 (DocumentFormatFlags(DocumentFormatFlag_SupportWriting) | DocumentFormatFlag_SingleObjectFormat)
85 
86 /** Set of hints provided by raw data check routines */
87 
88 /** 'true' if file contain at least one sequence */
89 #define RawDataCheckResult_Sequence "sequence"
90 
91 /** 'true' if at least one sequence in file has gaps */
92 #define RawDataCheckResult_SequenceWithGaps "sequence-with-gaps"
93 
94 /** 'true' if multiple sequences were found */
95 #define RawDataCheckResult_MultipleSequences "multiple-sequences"
96 
97 /** contains estimation of minimal size of a sequence from document*/
98 #define RawDataCheckResult_MinSequenceSize "sequence-min-size"
99 
100 /** contains estimation of maximum size of a sequence from document */
101 #define RawDataCheckResult_MaxSequenceSize "sequence-max-size"
102 
103 /*Contains length of sequence that was read from header of sequence file. Used in merge files option*/
104 #define RawDataCheckResult_HeaderSequenceLength "header-sequence-length"
105 
106 /** Text decoded from the binary input data. Present only for text document formats. */
107 #define RawDataCheckResult_RawTextData "raw-text-data"
108 
109 /** Set of hints that can be processed during document loading */
110 #define DocumentReadingMode_SequenceMergeGapSize "merge-gap"
111 #define DocumentReadingMode_SequenceMergingFinalSizeHint "merge-size"
112 #define DocumentReadingMode_SequenceAsAlignmentHint "sequences-are-msa"
113 #define DocumentReadingMode_SequenceAsShortReadsHint "sequences-are-short-reads"
114 #define DocumentReadingMode_SequenceAsSeparateHint "sequences-separate-mode"
115 #define DocumentReadingMode_MaxObjectsInDoc "max-objects-in-doc"
116 #define DocumentReadingMode_DontMakeUniqueNames "no-unique-names"
117 #define DocumentReadingMode_LoadAsModified "load-as-modified"
118 
119 /** Set of hints that can be processed during document storing */
120 #define DocumentWritingMode_SimpleNames "simple-names"
121 
122 /** Set of hints that can affect a document's removal */
123 #define DocumentRemovalMode_Synchronous "synchronous-delete"
124 
125 /** Hint for splitting variations*/
126 #define DocumentReadingMode_SplitVariationAlleles "split-alleles"
127 
128 /** Set of hints that can be processed during objects conversion */
129 #define ObjectConvertion_UseGenbankHeader "use-genbank-header"
130 
131 class U2CORE_EXPORT DocumentFormat : public QObject {
132     Q_OBJECT
133 public:
134     static const QString DBI_REF_HINT;
135     static const QString DBI_FOLDER_HINT;
136     static const QString DEEP_COPY_OBJECT;
137     static const QString STRONG_FORMAT_ACCORDANCE;
138 
139     static const int READ_BUFF_SIZE;
140 
141     enum DocObjectOp {
142         DocObjectOp_Add,
143         DocObjectOp_Remove
144     };
145 
146     DocumentFormat(QObject *p, const DocumentFormatId &id, DocumentFormatFlags _flags, const QStringList &fileExts = QStringList());
147 
148     /* returns unique document format id */
getFormatId()149     const DocumentFormatId &getFormatId() const {
150         return id;
151     }
152 
153     /* Returns localized format name. */
getFormatName()154     const QString &getFormatName() const {
155         return formatName;
156     }
157 
158     /* Returns localized format description. */
getFormatDescription()159     const QString &getFormatDescription() const {
160         return formatDescription;
161     }
162 
163     /* returns list of usual file extensions for the format
164        Example: "fa", "fasta", "gb" ...
165     */
getSupportedDocumentFileExtensions()166     virtual QStringList getSupportedDocumentFileExtensions() const {
167         return fileExtensions;
168     }
169 
170     /** Creates new empty document in loaded form.
171      * Assigns DBI if needed
172      * Hints contain additional information for document loading Common hints:
173      * 1) DBI_ALIAS_HINT is used for keeping special DBI-alias
174      */
175     virtual Document *createNewLoadedDocument(IOAdapterFactory *io, const GUrl &url, U2OpStatus &os, const QVariantMap &hints = QVariantMap());
176 
177     /** Creates new document in unloaded state. Assigns DBI if needed */
178     virtual Document *createNewUnloadedDocument(IOAdapterFactory *iof, const GUrl &url, U2OpStatus &os, const QVariantMap &hints = QVariantMap(), const QList<UnloadedObjectInfo> &info = QList<UnloadedObjectInfo>(), const QString &instanceModLockDesc = QString());
179 
180     /** A method for compatibility with old code : creates IO adapter and loads document in DocumentLoadMode_Whole
181      * Hints contain additional information for document loading. Common hints:
182      * 1) DBI_ALIAS_HINT is used for keeping special DBI-alias
183      */
184     virtual Document *loadDocument(IOAdapterFactory *iof, const GUrl &url, const QVariantMap &hints, U2OpStatus &os);
185 
186     /**
187         Loads single dna sequence in streaming mode.
188         Note! this function is available only if format supports streaming mode and sequences as an stored data type
189     */
190     virtual DNASequence *loadSequence(IOAdapter *io, U2OpStatus &ti);
191 
192     virtual void storeDocument(Document *d, U2OpStatus &os, IOAdapterFactory *io = nullptr, const GUrl &newDocURL = GUrl());
193 
194     /* io - opened IOAdapter
195      * so you can store many documents to this file
196      */
197     virtual void storeDocument(Document *d, IOAdapter *io, U2OpStatus &os);
198 
199     /** Checks if object can be added/removed to the document */
200     virtual bool isObjectOpSupported(const Document *d, DocObjectOp op, GObjectType t) const;
201 
202     /*
203         Returns score rating that indicates that the data supplied is recognized as a valid document format
204         Note: Data can contain only first N (~1024) bytes of the file
205         The URL value is optional and provided as supplementary option. URL value here can be empty in some special cases.
206     */
207     virtual FormatCheckResult checkRawData(const QByteArray &dataPrefix, const GUrl &url = GUrl()) const = 0;
208 
209     /* Checks that document format satisfies given constraints */
210     virtual bool checkConstraints(const DocumentFormatConstraints &c) const;
211 
212     /* Default implementation does nothing */
updateFormatSettings(Document * d)213     virtual void updateFormatSettings(Document *d) const {
214         Q_UNUSED(d);
215     }
216 
217     /*
218         These object types can be produced by reading documents
219         If the format supports write it must support write operation for all the object types it support
220     */
getSupportedObjectTypes()221     const QSet<GObjectType> &getSupportedObjectTypes() const {
222         return supportedObjectTypes;
223     }
224 
getFlags()225     DocumentFormatFlags getFlags() const {
226         return formatFlags;
227     }
228 
checkFlags(DocumentFormatFlags flagsToCheck)229     bool checkFlags(DocumentFormatFlags flagsToCheck) const {
230         return (formatFlags | flagsToCheck) == formatFlags;
231     }
232 
233     /**
234      * Streaming mode formats implement getSequence() and storeEntry() methods
235      */
isStreamingSupport()236     virtual bool isStreamingSupport() {
237         return formatFlags.testFlag(DocumentFormatFlag_SupportStreaming);
238     }
239 
240     virtual void storeEntry(IOAdapter *io, const QMap<GObjectType, QList<GObject *>> &objectsMap, U2OpStatus &os);
241 
242     virtual QString getRadioButtonText() const;
243 
244 protected:
245     /* io - opened IOAdapter.
246      * if document format supports streaming reading it must correctly process DocumentLoadMode
247      * otherwise, it will load all file from starting position ( default )
248      */
249     virtual Document *loadDocument(IOAdapter *io, const U2DbiRef &targetDb, const QVariantMap &hints, U2OpStatus &os) = 0;
250 
251     DocumentFormatId id;
252     DocumentFormatFlags formatFlags;
253     QStringList fileExtensions;
254     QSet<GObjectType> supportedObjectTypes;
255     QString formatName;
256     QString formatDescription;
257 
258 private:
259     U2DbiRef fetchDbiRef(const QVariantMap &hints, U2OpStatus &os) const;
260 };
261 
262 class DocumentFormatConstraints {
263 public:
DocumentFormatConstraints()264     DocumentFormatConstraints()
265         : flagsToSupport(0), flagsToExclude(0), checkRawData(false),
266           minDataCheckResult(FormatDetection_VeryLowSimilarity), allowPartialTypeMapping(false) {
267     }
268 
clear()269     void clear() {
270         flagsToSupport = 0;
271         flagsToExclude = 0;
272         checkRawData = false;
273         rawData.clear();
274         minDataCheckResult = FormatDetection_VeryLowSimilarity;
275         allowPartialTypeMapping = false;
276         formatsToExclude.clear();
277     }
addFlagToSupport(DocumentFormatFlag f)278     void addFlagToSupport(DocumentFormatFlag f) {
279         flagsToSupport |= f;
280     }
addFlagToExclude(DocumentFormatFlag f)281     void addFlagToExclude(DocumentFormatFlag f) {
282         flagsToExclude |= f;
283     }
284 
285     // If 'true' the format supports write operation
286     DocumentFormatFlags flagsToSupport;
287     DocumentFormatFlags flagsToExclude;
288     QSet<GObjectType> supportedObjectTypes;
289 
290     bool checkRawData;
291     QByteArray rawData;
292     FormatDetectionScore minDataCheckResult;
293     bool allowPartialTypeMapping;
294     QSet<DocumentFormatId> formatsToExclude;
295 };
296 
297 class DocumentImportersRegistry;
298 class U2CORE_EXPORT DocumentFormatRegistry : public QObject {
299     Q_OBJECT
300 public:
301     DocumentFormatRegistry(QObject *p = nullptr)
QObject(p)302         : QObject(p) {
303     }
304 
305     virtual bool registerFormat(DocumentFormat *dfs) = 0;
306 
307     virtual bool unregisterFormat(DocumentFormat *dfs) = 0;
308 
309     virtual QList<DocumentFormatId> getRegisteredFormats() const = 0;
310 
311     virtual DocumentFormat *getFormatById(DocumentFormatId id) const = 0;
312 
313     virtual DocumentFormat *selectFormatByFileExtension(const QString &fileExt) const = 0;
314 
315     virtual QList<DocumentFormatId> selectFormats(const DocumentFormatConstraints &c) const = 0;
316 
317     virtual DocumentImportersRegistry *getImportSupport() = 0;
318 
319 signals:
320     void si_documentFormatRegistered(DocumentFormat *);
321     void si_documentFormatUnregistered(DocumentFormat *);
322 };
323 
324 enum DocumentModLock {
325     DocumentModLock_IO,
326     DocumentModLock_USER,
327     DocumentModLock_FORMAT_AS_CLASS,
328     DocumentModLock_FORMAT_AS_INSTANCE,
329     DocumentModLock_UNLOADED_STATE,
330     DocumentModLock_NUM_LOCKS
331 };
332 
333 enum DocumentObjectRemovalMode {
334     DocumentObjectRemovalMode_Deallocate,  // ordinary removal: both object and its DB representation are deallocated
335     DocumentObjectRemovalMode_OnlyNotify,  // fake removal: neither object nor its DB representation are deallocated.
336     // Only signals about removal are emitted. The object itself remains to belong to the document.
337     DocumentObjectRemovalMode_Release  // fake removal: the same as DocumentObjectRemovalMode_OnlyNotify.
338     // Additionally, the object is excluded from the document's child objects list.
339     // External code has to handle a proper object deallocation.
340 };
341 
342 class DocumentChildEventsHelper;
343 
344 class U2CORE_EXPORT Document : public StateLockableTreeItem {
345     Q_OBJECT
346     Q_PROPERTY(QString name WRITE setName READ getName)
347     Q_PROPERTY(GUrl url WRITE setURL READ getURL)
348 
349     friend class DocumentChildEventsHelper;
350 
351 public:
352     class Constraints {
353     public:
Constraints()354         Constraints()
355             : stateLocked(TriState_Unknown) {
356         }
357         TriState stateLocked;
358         QList<DocumentModLock> notAllowedStateLocks;  // if document contains one of these locks -> it's not matched
359         QList<DocumentFormatId> formats;  // document format must be in list to match
360         GObjectType objectTypeToAdd;  // document must be ready to add objects of the specified type
361     };
362     static const QString UNLOAD_LOCK_NAME;
363 
364     // Creates document in unloaded state. Populates it with unloaded objects
365     Document(DocumentFormat *_df, IOAdapterFactory *_io, const GUrl &_url, const U2DbiRef &_dbiRef, const QList<UnloadedObjectInfo> &unloadedObjects = QList<UnloadedObjectInfo>(), const QVariantMap &hints = QVariantMap(), const QString &instanceModLockDesc = QString());
366 
367     // Creates document in loaded state.
368     Document(DocumentFormat *_df, IOAdapterFactory *_io, const GUrl &_url, const U2DbiRef &_dbiRef, const QList<GObject *> &objects, const QVariantMap &hints = QVariantMap(), const QString &instanceModLockDesc = QString());
369 
370     virtual ~Document();
371 
getDocumentFormat()372     DocumentFormat *getDocumentFormat() const {
373         return df;
374     }
375 
getDocumentFormatId()376     DocumentFormatId getDocumentFormatId() const {
377         return df->getFormatId();
378     }
379 
getIOAdapterFactory()380     IOAdapterFactory *getIOAdapterFactory() const {
381         return io;
382     }
383 
384     void setIOAdapterFactory(IOAdapterFactory *iof);
385 
getObjects()386     const QList<GObject *> &getObjects() const {
387         return objects;
388     }
389 
390     GObject *getObjectById(const U2DataId &id) const;
391 
392     void addObject(GObject *ref);
393 
394     bool removeObject(GObject *o, DocumentObjectRemovalMode removalMode = DocumentObjectRemovalMode_Deallocate);
395 
396     void setObjectsInUse(const QSet<U2DataId> &objs);
397 
getName()398     const QString &getName() const {
399         return name;
400     }
401 
402     void setName(const QString &newName);
403 
getURL()404     const GUrl &getURL() const {
405         return url;
406     }
407 
getURLString()408     const QString &getURLString() const {
409         return url.getURLString();
410     }
411 
412     void setURL(const GUrl &newUrl);
413 
414     void makeClean();
415 
416     void setModificationTrack(bool track);
417 
418     // avoid using this method against shared databases documents,
419     // since databases allow many objects with the same name. The method returns
420     // first matched GObject. Use `getObjectById` instead.
421     GObject *findGObjectByName(const QString &name) const;
422 
423     QList<GObject *> findGObjectByType(GObjectType t, UnloadedObjectFilter f = UOF_LoadedOnly) const;
424 
isLoaded()425     bool isLoaded() const {
426         return modLocks[DocumentModLock_UNLOADED_STATE] == 0;
427     }
428 
429     void setLoaded(bool v);
430 
431     /** Takes content from sourceDoc. Source doc becomes unloaded after this op is completed! */
432     void loadFrom(Document *sourceDoc);
433 
434     /** If deleteObjects is false -> does not delete objects and leaves them without parent */
435     bool unload(bool deleteObjects = true);
436 
437     bool checkConstraints(const Constraints &c) const;
438 
getGHints()439     GHints *getGHints() const {
440         return ctxState;
441     }
442 
443     void setGHints(GHints *state);
444 
445     QVariantMap getGHintsMap() const;
446 
getDocumentModLock(DocumentModLock type)447     StateLock *getDocumentModLock(DocumentModLock type) const {
448         return modLocks[type];
449     }
450 
451     void propagateModLocks(Document *doc) const;
452 
hasUserModLock()453     bool hasUserModLock() const {
454         return modLocks[DocumentModLock_USER] != nullptr;
455     }
456 
457     void setUserModLock(bool v);
458 
459     virtual void setModified(bool modified, const QString &modType = QString());
460 
461     virtual bool isModificationAllowed(const QString &modType);
462 
isModified()463     bool isModified() const {
464         return isTreeItemModified();
465     }
466 
467     void setLastUpdateTime();
468 
getLastUpdateTime()469     const QDateTime &getLastUpdateTime() const {
470         return lastUpdateTime;
471     }
472 
473     const U2DbiRef &getDbiRef() const;
474 
isDocumentOwnsDbiResources()475     inline bool isDocumentOwnsDbiResources() const {
476         return documentOwnsDbiResources;
477     }
478 
setDocumentOwnsDbiResources(bool value)479     inline void setDocumentOwnsDbiResources(bool value) {
480         documentOwnsDbiResources = value;
481     }
482 
483     virtual bool isDatabaseConnection() const;
484 
485     static void setupToEngine(QScriptEngine *engine);
486 
487     Document *getSimpleCopy(DocumentFormat *df, IOAdapterFactory *io, const GUrl &url) const;
488 
489 private:
490     static QScriptValue toScriptValue(QScriptEngine *engine, Document *const &in);
491     static void fromScriptValue(const QScriptValue &object, Document *&out);
492 
493 protected:
494     void removeObjectsDataFromDbi(QList<GObject *> objects);
495     bool _removeObject(GObject *o, bool deleteObjects = true);
496     void _addObject(GObject *obj);
497     void _addObjectToHierarchy(GObject *obj);
498 
499     void initModLocks(const QString &instanceModLockDesc, bool loaded);
500 
501     void checkUnloadedState() const;
502     void checkLoadedState() const;
503     void checkUniqueObjectNames() const;
504     void addUnloadedObjects(const QList<UnloadedObjectInfo> &info);
505 
506     GObject *findGObjectByNameInDb(const QString &name) const;
507     GObject *findGObjectByNameInMem(const QString &name) const;
508 
509     DocumentFormat *const df;
510     IOAdapterFactory *io;
511     GUrl url;
512     U2DbiRef dbiRef;  // Default dbi ref for the document
513 
514     QString name; /* display name == short pathname, excluding the path */
515     QList<GObject *> objects;
516     QHash<U2DataId, GObject *> id2Object;
517     QSet<U2DataId> objectsInUse;
518     GHints *ctxState;
519     QDateTime lastUpdateTime;
520     bool documentOwnsDbiResources;
521 
522     StateLock *modLocks[DocumentModLock_NUM_LOCKS];
523     bool loadStateChangeMode;
524 
525 signals:
526     void si_urlChanged();
527     void si_nameChanged();
528     void si_objectAdded(GObject *o);
529     void si_objectRemoved(GObject *o);
530     void si_loadedStateChanged();
531 };
532 
533 // TODO: decide if to use filters or constraints. May be it worth to remove Document::Constraints at all..
534 
535 class U2CORE_EXPORT DocumentFilter {
536 public:
~DocumentFilter()537     virtual ~DocumentFilter() {
538     }
539     virtual bool matches(Document *doc) const = 0;
540 };
541 
542 class U2CORE_EXPORT DocumentConstraintsFilter : public DocumentFilter {
543 public:
DocumentConstraintsFilter(const Document::Constraints & _c)544     DocumentConstraintsFilter(const Document::Constraints &_c)
545         : constraints(_c) {
546     }
547 
matches(Document * doc)548     virtual bool matches(Document *doc) const {
549         return doc->checkConstraints(constraints);
550     }
551 
552 protected:
553     Document::Constraints constraints;
554 };
555 
556 class U2CORE_EXPORT DocumentMimeData : public QMimeData {
557     Q_OBJECT
558 public:
559     static const QString MIME_TYPE;
560     DocumentMimeData(Document *obj);
561     QPointer<Document> objPtr;
hasFormat(const QString & mimeType)562     bool hasFormat(const QString &mimeType) const {
563         return (mimeType == MIME_TYPE);
564     }
formats()565     QStringList formats() const {
566         return (QStringList() << MIME_TYPE);
567     }
568 };
569 
570 }  // namespace U2
571 
572 Q_DECLARE_METATYPE(U2::Document *)
573 Q_DECLARE_OPERATORS_FOR_FLAGS(U2::DocumentFormatFlags)
574 
575 #endif
576