1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "GenericReadActor.h"
23 #include <limits.h>
24 
25 #include <QFileInfo>
26 
27 #include <U2Core/AppContext.h>
28 #include <U2Core/DNAInfo.h>
29 #include <U2Core/DNASequenceObject.h>
30 #include <U2Core/DocumentModel.h>
31 #include <U2Core/GObjectTypes.h>
32 
33 #include <U2Designer/DelegateEditors.h>
34 
35 #include <U2Gui/DialogUtils.h>
36 #include <U2Gui/GUIUtils.h>
37 
38 #include <U2Lang/BaseActorCategories.h>
39 #include <U2Lang/BaseAttributes.h>
40 #include <U2Lang/BasePorts.h>
41 #include <U2Lang/BaseSlots.h>
42 #include <U2Lang/BaseTypes.h>
43 #include <U2Lang/CoreLibConstants.h>
44 #include <U2Lang/URLAttribute.h>
45 #include <U2Lang/WorkflowEnv.h>
46 
47 #include "CoreLib.h"
48 #include "DocActors.h"
49 #include "util/DatasetValidator.h"
50 
51 /* TRANSLATOR U2::Workflow::SeqReadPrompter */
52 /* TRANSLATOR U2::Workflow::CoreLib */
53 
54 namespace U2 {
55 namespace Workflow {
56 
57 /************************************************************************/
58 /* GenericReadDocProto */
59 /************************************************************************/
GenericReadDocProto(const Descriptor & desc)60 GenericReadDocProto::GenericReadDocProto(const Descriptor &desc)
61     : ReadDbObjActorPrototype(desc) {
62     {
63         attrs << new URLAttribute(BaseAttributes::URL_IN_ATTRIBUTE(),
64                                   BaseTypes::URL_DATASETS_TYPE(),
65                                   true);
66     }
67 
68     setEditor(new DelegateEditor(QMap<QString, PropertyDelegate *>()));
69 
70     if (AppContext::isGUIMode()) {
71         setIcon(QIcon(":/U2Designer/images/blue_circle.png"));
72     }
73 
74     setValidator(new DatasetValidator());
75 }
76 
77 const QString GenericSeqActorProto::MODE_ATTR("mode");
78 const QString GenericSeqActorProto::GAP_ATTR("merge-gap");
79 const QString GenericSeqActorProto::ACC_ATTR("accept-accession");
80 const QString GenericSeqActorProto::LIMIT_ATTR("sequence-count-limit");
81 
82 const QString GenericSeqActorProto::TYPE("generic.seq");
83 const QString GenericMAActorProto::TYPE("generic.ma");
84 
GenericSeqActorProto()85 GenericSeqActorProto::GenericSeqActorProto()
86     : GenericReadDocProto(CoreLibConstants::GENERIC_READ_SEQ_PROTO_ID) {
87     setCompatibleDbObjectTypes(QSet<GObjectType>() << GObjectTypes::SEQUENCE);
88 
89     setDisplayName(U2::Workflow::CoreLib::tr("Read Sequence"));
90     desc = U2::Workflow::CoreLib::tr("Input one or several files with nucleotide or protein sequences."
91                                      " A file may also contain annotations. Any format, supported by UGENE, is allowed (GenBank, FASTA, etc.)."
92                                      " The element outputs message(s) with the sequence and annotations data.");
93     QMap<Descriptor, DataTypePtr> m;
94     m[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
95     m[BaseSlots::DATASET_SLOT()] = BaseTypes::STRING_TYPE();
96     m[BaseSlots::DNA_SEQUENCE_SLOT()] = BaseTypes::DNA_SEQUENCE_TYPE();
97     m[BaseSlots::ANNOTATION_TABLE_SLOT()] = BaseTypes::ANNOTATION_TABLE_TYPE();
98 
99     DataTypePtr seqTypeset(new MapDataType(Descriptor(TYPE), m));
100     bool treg = WorkflowEnv::getDataTypeRegistry()->registerEntry(seqTypeset);
101     Q_UNUSED(treg);
102     assert(treg);
103 
104     ports << new PortDescriptor(Descriptor(BasePorts::OUT_SEQ_PORT_ID(), U2::Workflow::CoreLib::tr("Sequence"), U2::Workflow::CoreLib::tr("A sequence of any type (nucleotide, protein).")),
105                                 seqTypeset,
106                                 false,
107                                 true);
108     {
109         Descriptor md(GenericSeqActorProto::MODE_ATTR, SeqReadPrompter::tr("Mode"), SeqReadPrompter::tr("If the file contains more than one sequence, <i>Split</i> mode sends them \"as is\" to the output, "
110                                                                                                         "while <i>Merge</i> appends all the sequences and outputs the sole merged sequence."));
111         Descriptor gd(GenericSeqActorProto::GAP_ATTR, SeqReadPrompter::tr("Merging gap"), SeqReadPrompter::tr("In <i>Merge</i> mode, the specified number of gaps are inserted between the original sequences. "
112                                                                                                               "This is helpful, for example, to avoid finding false positives at the merge boundaries."));
113         Descriptor acd(GenericSeqActorProto::ACC_ATTR, SeqReadPrompter::tr("Accession filter"), SeqReadPrompter::tr("Reports only sequences containing the specified regular expression."
114                                                                                                                     "<p><i>Leave it empty to switch off this filter. Use <b>*</b> to mask many symbol and use <b>?</b> to mask one symbol.</i></p>"));
115         Descriptor ld(LIMIT_ATTR, SeqReadPrompter::tr("Sequence count limit"), SeqReadPrompter::tr("<i>Split mode</i> only."
116                                                                                                    "<p>Read only first N sequences from each file."
117                                                                                                    "<br>Set 0 value for reading all sequences.</p>"));
118 
119         attrs << new Attribute(md, BaseTypes::NUM_TYPE(), true, SPLIT);
120         attrs << new Attribute(gd, BaseTypes::NUM_TYPE(), false, 10);
121         attrs << new Attribute(ld, BaseTypes::NUM_TYPE(), false, 0);
122         attrs << new Attribute(acd, BaseTypes::STRING_TYPE(), false, QString());
123     }
124 
125     QMap<QString, PropertyDelegate *> delegates;
126     {
127         QVariantMap modeMap;
128         QString splitStr = SeqReadPrompter::tr("Split");
129         QString mergeStr = SeqReadPrompter::tr("Merge");
130         modeMap[splitStr] = SPLIT;
131         modeMap[mergeStr] = MERGE;
132         getEditor()->addDelegate(new ComboBoxDelegate(modeMap), MODE_ATTR);
133     }
134     {
135         QVariantMap minMaxMap;
136         minMaxMap["minimum"] = 0;
137         minMaxMap["maximum"] = INT_MAX;
138         getEditor()->addDelegate(new SpinBoxDelegate(minMaxMap), GAP_ATTR);
139         getEditor()->addDelegate(new SpinBoxDelegate(minMaxMap), LIMIT_ATTR);
140     }
141     setPrompter(new ReadDocPrompter(U2::Workflow::CoreLib::tr("Reads sequence(s) from <u>%1</u>.")));
142 
143     QString seqSlotId = BasePorts::OUT_SEQ_PORT_ID() + "." + BaseSlots::DNA_SEQUENCE_SLOT().getId();
144     QString annsSlotId = BasePorts::OUT_SEQ_PORT_ID() + "." + BaseSlots::ANNOTATION_TABLE_SLOT().getId();
145 }
146 
GenericMAActorProto()147 GenericMAActorProto::GenericMAActorProto()
148     : GenericReadDocProto(CoreLibConstants::GENERIC_READ_MA_PROTO_ID) {
149     setCompatibleDbObjectTypes(QSet<GObjectType>() << GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT);
150 
151     setDisplayName(U2::Workflow::CoreLib::tr("Read Alignment"));
152     desc = U2::Workflow::CoreLib::tr("Input one or several files in one of the multiple sequence alignment formats, supported by UGENE (ClustalW, FASTA, etc.)."
153                                      " The element outputs message(s) with the alignment data.");
154 
155     QMap<Descriptor, DataTypePtr> m;
156     m[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
157     m[BaseSlots::DATASET_SLOT()] = BaseTypes::STRING_TYPE();
158     m[BaseSlots::MULTIPLE_ALIGNMENT_SLOT()] = BaseTypes::MULTIPLE_ALIGNMENT_TYPE();
159     DataTypePtr blockTypeset(new MapDataType(Descriptor(TYPE), m));
160     bool treg = WorkflowEnv::getDataTypeRegistry()->registerEntry(blockTypeset);
161     Q_UNUSED(treg);
162     assert(treg);
163 
164     ports << new PortDescriptor(Descriptor(BasePorts::OUT_MSA_PORT_ID(), U2::Workflow::CoreLib::tr("Multiple sequence alignment"), ""),
165                                 blockTypeset,
166                                 false,
167                                 true);
168 
169     setPrompter(new ReadDocPrompter(U2::Workflow::CoreLib::tr("Reads MSA(s) from <u>%1</u>.")));
170 
171     if (AppContext::isGUIMode()) {
172         setIcon(QIcon(":/U2Designer/images/blue_circle.png"));
173     }
174 }
175 
isAcceptableDrop(const QMimeData * md,QVariantMap * params) const176 bool GenericMAActorProto::isAcceptableDrop(const QMimeData *md, QVariantMap *params) const {
177     QList<DocumentFormat *> fs;
178     QString url = WorkflowUtils::getDropUrl(fs, md);
179     foreach (DocumentFormat *f, fs) {
180         if (f->getSupportedObjectTypes().contains(GObjectTypes::MULTIPLE_SEQUENCE_ALIGNMENT)) {
181             if (params != nullptr) {
182                 params->insert(BaseAttributes::URL_IN_ATTRIBUTE().getId(), url);
183             }
184             return true;
185         }
186     }
187     /*DocumentFormatConstraints sc;
188     sc.supportedObjectTypes.append(GObjectTypes::SEQUENCE);
189     foreach(DocumentFormat* f, fs) {
190         if (f->checkConstraints(sc)) {
191             if (params) {
192                 params->insert(CoreLib::URL_ATTR_ID, url);
193             }
194             return true;
195         }
196     }*/
197     if (QFileInfo(url).isDir()) {
198         if (params) {
199             params->insert(BaseAttributes::URL_IN_ATTRIBUTE().getId(), url + "/*");
200         }
201         return true;
202     }
203 
204     return false;
205 }
206 
isAcceptableDrop(const QMimeData * md,QVariantMap * params) const207 bool GenericSeqActorProto::isAcceptableDrop(const QMimeData *md, QVariantMap *params) const {
208     QList<DocumentFormat *> fs;
209     const GObjectMimeData *gomd = qobject_cast<const GObjectMimeData *>(md);
210     if (gomd && params) {
211         const U2SequenceObject *obj = qobject_cast<const U2SequenceObject *>(gomd->objPtr.data());
212         if (obj) {
213             params->insert(BaseAttributes::URL_IN_ATTRIBUTE().getId(), obj->getDocument()->getURLString());
214             QString acc = obj->getStringAttribute(DNAInfo::ACCESSION);
215             if (acc.isEmpty()) {
216                 acc = obj->getSequenceName();
217             }
218             params->insert(ACC_ATTR, acc);
219         }
220     }
221 
222     QString url = WorkflowUtils::getDropUrl(fs, md);
223     foreach (DocumentFormat *f, fs) {
224         if (f->getSupportedObjectTypes().contains(GObjectTypes::SEQUENCE)) {
225             if (params) {
226                 params->insert(BaseAttributes::URL_IN_ATTRIBUTE().getId(), url);
227             }
228             return true;
229         }
230     }
231     if (QFileInfo(url).isDir()) {
232         if (params) {
233             params->insert(BaseAttributes::URL_IN_ATTRIBUTE().getId(), url + "/*");
234         }
235         return true;
236     }
237 
238     return false;
239 }
240 
composeRichDoc()241 QString SeqReadPrompter::composeRichDoc() {
242     // TODO finish
243     // BusPort* input = qobject_cast<BusPort*>(target->getPort(CoreLib::DATA_PORT_ID));
244     QString url = getURL(BaseAttributes::URL_IN_ATTRIBUTE().getId());
245     return QString("%1").arg(url);
246 }
247 
248 }  // namespace Workflow
249 }  // namespace U2
250