1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "KrakenClassifyWorkerFactory.h"
23
24 #include <QThread>
25
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AppResources.h>
28 #include <U2Core/AppSettings.h>
29 #include <U2Core/BaseDocumentFormats.h>
30 #include <U2Core/DataPathRegistry.h>
31 #include <U2Core/L10n.h>
32
33 #include <U2Designer/DelegateEditors.h>
34
35 #include <U2Gui/DialogUtils.h>
36
37 #include <U2Lang/ActorPrototypeRegistry.h>
38 #include <U2Lang/BaseSlots.h>
39 #include <U2Lang/BaseTypes.h>
40 #include <U2Lang/PairedReadsPortValidator.h>
41 #include <U2Lang/WorkflowEnv.h>
42
43 #include "../../ngs_reads_classification/src/DatabaseDelegate.h"
44 #include "../../ngs_reads_classification/src/NgsReadsClassificationPlugin.h"
45 #include "DatabaseSizeRelation.h"
46 #include "KrakenClassifyPrompter.h"
47 #include "KrakenClassifyValidator.h"
48 #include "KrakenClassifyWorker.h"
49 #include "KrakenSupport.h"
50
51 namespace U2 {
52 namespace LocalWorkflow {
53
54 const QString KrakenClassifyWorkerFactory::ACTOR_ID = "kraken-classify";
55
56 const QString KrakenClassifyWorkerFactory::INPUT_PORT_ID = "in";
57 const QString KrakenClassifyWorkerFactory::OUTPUT_PORT_ID = "out";
58
59 // Slots should be the same as in GetReadsListWorkerFactory
60 const QString KrakenClassifyWorkerFactory::INPUT_SLOT = "reads-url1";
61 const QString KrakenClassifyWorkerFactory::PAIRED_INPUT_SLOT = "reads-url2";
62
63 const QString KrakenClassifyWorkerFactory::INPUT_DATA_ATTR_ID = "input-data";
64 const QString KrakenClassifyWorkerFactory::DATABASE_ATTR_ID = "database";
65 const QString KrakenClassifyWorkerFactory::OUTPUT_URL_ATTR_ID = "output-url";
66 const QString KrakenClassifyWorkerFactory::QUICK_OPERATION_ATTR_ID = "quick-operation";
67 const QString KrakenClassifyWorkerFactory::MIN_HITS_NUMBER_ATTR_ID = "min-hits";
68 const QString KrakenClassifyWorkerFactory::THREADS_NUMBER_ATTR_ID = "threads";
69 const QString KrakenClassifyWorkerFactory::PRELOAD_DATABASE_ATTR_ID = "preload";
70
71 const QString KrakenClassifyWorkerFactory::SINGLE_END_TEXT = "SE reads or contigs";
72 const QString KrakenClassifyWorkerFactory::PAIRED_END_TEXT = "PE reads";
73
74 const QString KrakenClassifyWorkerFactory::WORKFLOW_CLASSIFY_TOOL_KRAKEN = "Kraken";
75
KrakenClassifyWorkerFactory()76 KrakenClassifyWorkerFactory::KrakenClassifyWorkerFactory()
77 : DomainFactory(ACTOR_ID) {
78 }
79
createWorker(Actor * actor)80 Worker *KrakenClassifyWorkerFactory::createWorker(Actor *actor) {
81 return new KrakenClassifyWorker(actor);
82 }
83
init()84 void KrakenClassifyWorkerFactory::init() {
85 QList<PortDescriptor *> ports;
86 {
87 const Descriptor inSlotDesc(INPUT_SLOT,
88 KrakenClassifyPrompter::tr("Input URL 1"),
89 KrakenClassifyPrompter::tr("Input URL 1."));
90
91 const Descriptor inPairedSlotDesc(PAIRED_INPUT_SLOT,
92 KrakenClassifyPrompter::tr("Input URL 2"),
93 KrakenClassifyPrompter::tr("Input URL 2."));
94
95 QMap<Descriptor, DataTypePtr> inType;
96 inType[inSlotDesc] = BaseTypes::STRING_TYPE();
97 inType[inPairedSlotDesc] = BaseTypes::STRING_TYPE();
98
99 QMap<Descriptor, DataTypePtr> outType;
100 outType[TaxonomySupport::TAXONOMY_CLASSIFICATION_SLOT()] = TaxonomySupport::TAXONOMY_CLASSIFICATION_TYPE();
101
102 const Descriptor inPortDesc(INPUT_PORT_ID,
103 KrakenClassifyPrompter::tr("Input sequences"),
104 KrakenClassifyPrompter::tr("URL(s) to FASTQ or FASTA file(s) should be provided.\n\n"
105 "In case of SE reads or contigs use the \"Input URL 1\" slot only.\n\n"
106 "In case of PE reads input \"left\" reads to \"Input URL 1\", \"right\" reads to \"Input URL 2\".\n\n"
107 "See also the \"Input data\" parameter of the element."));
108 Descriptor outPortDesc(OUTPUT_PORT_ID, KrakenClassifyPrompter::tr("Kraken Classification"), KrakenClassifyPrompter::tr("A map of sequence names with the associated taxonomy IDs, classified by Kraken."));
109
110 ports << new PortDescriptor(inPortDesc, DataTypePtr(new MapDataType(ACTOR_ID + "-in", inType)), true /*input*/);
111 ports << new PortDescriptor(outPortDesc, DataTypePtr(new MapDataType(ACTOR_ID + "-out", outType)), false /*input*/, true /*multi*/);
112 }
113
114 QList<Attribute *> attributes;
115 {
116 Descriptor inputDataDesc(INPUT_DATA_ATTR_ID, KrakenClassifyPrompter::tr("Input data"), KrakenClassifyPrompter::tr("To classify single-end (SE) reads or contigs, received by reads de novo assembly, set this parameter to \"SE reads or contigs\".<br><br>"
117 "To classify paired-end (PE) reads, set the value to \"PE reads\".<br><br>"
118 "One or two slots of the input port are used depending on the value of the parameter. Pass URL(s) to data to these slots.<br><br>"
119 "The input files should be in FASTA or FASTQ formats."));
120
121 Descriptor databaseDesc(DATABASE_ATTR_ID, KrakenClassifyPrompter::tr("Database"), KrakenClassifyPrompter::tr("A path to the folder with the Kraken database files."));
122
123 Descriptor outputUrlDesc(OUTPUT_URL_ATTR_ID, KrakenClassifyPrompter::tr("Output file"), KrakenClassifyPrompter::tr("Specify the output file name."));
124
125 Descriptor quickOperationDesc(QUICK_OPERATION_ATTR_ID, KrakenClassifyPrompter::tr("Quick operation"), KrakenClassifyPrompter::tr("Stop classification of an input read after the certain number of hits.<br><br>"
126 "The value can be specified in the \"Minimum number of hits\" parameter."));
127
128 Descriptor minHitsDesc(MIN_HITS_NUMBER_ATTR_ID, KrakenClassifyPrompter::tr("Minimum number of hits"), KrakenClassifyPrompter::tr("The number of hits that are required to declare an input sequence classified.<br><br>"
129 "This can be especially useful with custom databases when testing to see if sequences either do or do not belong to a particular genome."));
130
131 Descriptor threadsDesc(THREADS_NUMBER_ATTR_ID, KrakenClassifyPrompter::tr("Number of threads"), KrakenClassifyPrompter::tr("Use multiple threads (--threads)."));
132
133 Descriptor preloadDatabaseDesc(PRELOAD_DATABASE_ATTR_ID, KrakenClassifyPrompter::tr("Load database into memory"), KrakenClassifyPrompter::tr("Load the Kraken database into RAM (--preload).<br><br>"
134 "This can be useful to improve the speed. The database size should be less than the RAM size.<br><br>"
135 "The other option to improve the speed is to store the database on ramdisk. Set this parameter to \"False\" in this case."));
136
137 Descriptor classifyToolDesc(NgsReadsClassificationPlugin::WORKFLOW_CLASSIFY_TOOL_ID,
138 WORKFLOW_CLASSIFY_TOOL_KRAKEN,
139 "Classify tool. Hidden attribute");
140
141 Attribute *inputDataAttribute = new Attribute(inputDataDesc, BaseTypes::STRING_TYPE(), false, KrakenClassifyTaskSettings::SINGLE_END);
142 inputDataAttribute->addSlotRelation(new SlotRelationDescriptor(INPUT_PORT_ID, PAIRED_INPUT_SLOT, QVariantList() << KrakenClassifyTaskSettings::PAIRED_END));
143 attributes << inputDataAttribute;
144
145 QString minikrakenPath;
146 U2DataPath *minikrakenDataPath = AppContext::getDataPathRegistry()->getDataPathByName(NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_DATA_ID);
147 if (nullptr != minikrakenDataPath && minikrakenDataPath->isValid()) {
148 minikrakenPath = minikrakenDataPath->getPathByName(NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_ITEM_ID);
149 }
150 Attribute *databaseAttribute = new Attribute(databaseDesc, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, minikrakenPath);
151 attributes << databaseAttribute;
152
153 attributes << new Attribute(quickOperationDesc, BaseTypes::BOOL_TYPE(), Attribute::None, false);
154
155 Attribute *minHitsAttribute = new Attribute(minHitsDesc, BaseTypes::NUM_TYPE(), Attribute::None, 1);
156 attributes << minHitsAttribute;
157
158 attributes << new Attribute(preloadDatabaseDesc, BaseTypes::BOOL_TYPE(), Attribute::None, true);
159 attributes << new Attribute(threadsDesc, BaseTypes::NUM_TYPE(), Attribute::None, AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount());
160 attributes << new Attribute(outputUrlDesc, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding | Attribute::CanBeEmpty);
161
162 attributes << new Attribute(classifyToolDesc, BaseTypes::STRING_TYPE(), static_cast<Attribute::Flags>(Attribute::Hidden), WORKFLOW_CLASSIFY_TOOL_KRAKEN);
163
164 minHitsAttribute->addRelation(new VisibilityRelation(QUICK_OPERATION_ATTR_ID, "true"));
165 databaseAttribute->addRelation(new DatabaseSizeRelation(PRELOAD_DATABASE_ATTR_ID));
166 }
167
168 QMap<QString, PropertyDelegate *> delegates;
169 {
170 QVariantMap inputDataMap;
171 inputDataMap[SINGLE_END_TEXT] = KrakenClassifyTaskSettings::SINGLE_END;
172 inputDataMap[PAIRED_END_TEXT] = KrakenClassifyTaskSettings::PAIRED_END;
173 delegates[INPUT_DATA_ATTR_ID] = new ComboBoxDelegate(inputDataMap);
174
175 delegates[DATABASE_ATTR_ID] = new DatabaseDelegate(ACTOR_ID,
176 DATABASE_ATTR_ID,
177 NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_DATA_ID,
178 NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_ITEM_ID,
179 "kraken/database",
180 true);
181
182 DelegateTags outputUrlTags;
183 outputUrlTags.set(DelegateTags::PLACEHOLDER_TEXT, "Auto");
184 outputUrlTags.set(DelegateTags::FILTER, DialogUtils::prepareDocumentsFileFilter(BaseDocumentFormats::PLAIN_TEXT, true, QStringList()));
185 outputUrlTags.set(DelegateTags::FORMAT, BaseDocumentFormats::PLAIN_TEXT);
186 delegates[OUTPUT_URL_ATTR_ID] = new URLDelegate(outputUrlTags, "kraken/output");
187
188 delegates[QUICK_OPERATION_ATTR_ID] = new ComboBoxWithBoolsDelegate();
189
190 QVariantMap threadsProperties;
191 threadsProperties["minimum"] = 1;
192 threadsProperties["maximum"] = QThread::idealThreadCount();
193 delegates[THREADS_NUMBER_ATTR_ID] = new SpinBoxDelegate(threadsProperties);
194
195 delegates[PRELOAD_DATABASE_ATTR_ID] = new ComboBoxWithBoolsDelegate();
196 }
197
198 Descriptor desc(ACTOR_ID, KrakenClassifyPrompter::tr("Classify Sequences with Kraken"), KrakenClassifyPrompter::tr("Kraken is a taxonomic sequence classifier that assigns taxonomic labels to short DNA reads. "
199 "It does this by examining the k-mers within a read and querying a database with those."));
200 ActorPrototype *proto = new IntegralBusActorPrototype(desc, ports, attributes);
201 proto->setEditor(new DelegateEditor(delegates));
202 proto->setPrompter(new KrakenClassifyPrompter(nullptr));
203 proto->addExternalTool(KrakenSupport::CLASSIFY_TOOL_ID);
204 proto->setValidator(new KrakenClassifyValidator());
205 proto->setPortValidator(INPUT_PORT_ID, new PairedReadsPortValidator(INPUT_SLOT, PAIRED_INPUT_SLOT));
206 WorkflowEnv::getProtoRegistry()->registerProto(NgsReadsClassificationPlugin::WORKFLOW_ELEMENTS_GROUP, proto);
207
208 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
209 localDomain->registerEntry(new KrakenClassifyWorkerFactory());
210 }
211
cleanup()212 void KrakenClassifyWorkerFactory::cleanup() {
213 delete WorkflowEnv::getProtoRegistry()->unregisterProto(ACTOR_ID);
214
215 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
216 delete localDomain->unregisterEntry(ACTOR_ID);
217 }
218
219 } // namespace LocalWorkflow
220 } // namespace U2
221