1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "KrakenClassifyWorkerFactory.h"
23 
24 #include <QThread>
25 
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AppResources.h>
28 #include <U2Core/AppSettings.h>
29 #include <U2Core/BaseDocumentFormats.h>
30 #include <U2Core/DataPathRegistry.h>
31 #include <U2Core/L10n.h>
32 
33 #include <U2Designer/DelegateEditors.h>
34 
35 #include <U2Gui/DialogUtils.h>
36 
37 #include <U2Lang/ActorPrototypeRegistry.h>
38 #include <U2Lang/BaseSlots.h>
39 #include <U2Lang/BaseTypes.h>
40 #include <U2Lang/PairedReadsPortValidator.h>
41 #include <U2Lang/WorkflowEnv.h>
42 
43 #include "../../ngs_reads_classification/src/DatabaseDelegate.h"
44 #include "../../ngs_reads_classification/src/NgsReadsClassificationPlugin.h"
45 #include "DatabaseSizeRelation.h"
46 #include "KrakenClassifyPrompter.h"
47 #include "KrakenClassifyValidator.h"
48 #include "KrakenClassifyWorker.h"
49 #include "KrakenSupport.h"
50 
51 namespace U2 {
52 namespace LocalWorkflow {
53 
54 const QString KrakenClassifyWorkerFactory::ACTOR_ID = "kraken-classify";
55 
56 const QString KrakenClassifyWorkerFactory::INPUT_PORT_ID = "in";
57 const QString KrakenClassifyWorkerFactory::OUTPUT_PORT_ID = "out";
58 
59 // Slots should be the same as in GetReadsListWorkerFactory
60 const QString KrakenClassifyWorkerFactory::INPUT_SLOT = "reads-url1";
61 const QString KrakenClassifyWorkerFactory::PAIRED_INPUT_SLOT = "reads-url2";
62 
63 const QString KrakenClassifyWorkerFactory::INPUT_DATA_ATTR_ID = "input-data";
64 const QString KrakenClassifyWorkerFactory::DATABASE_ATTR_ID = "database";
65 const QString KrakenClassifyWorkerFactory::OUTPUT_URL_ATTR_ID = "output-url";
66 const QString KrakenClassifyWorkerFactory::QUICK_OPERATION_ATTR_ID = "quick-operation";
67 const QString KrakenClassifyWorkerFactory::MIN_HITS_NUMBER_ATTR_ID = "min-hits";
68 const QString KrakenClassifyWorkerFactory::THREADS_NUMBER_ATTR_ID = "threads";
69 const QString KrakenClassifyWorkerFactory::PRELOAD_DATABASE_ATTR_ID = "preload";
70 
71 const QString KrakenClassifyWorkerFactory::SINGLE_END_TEXT = "SE reads or contigs";
72 const QString KrakenClassifyWorkerFactory::PAIRED_END_TEXT = "PE reads";
73 
74 const QString KrakenClassifyWorkerFactory::WORKFLOW_CLASSIFY_TOOL_KRAKEN = "Kraken";
75 
KrakenClassifyWorkerFactory()76 KrakenClassifyWorkerFactory::KrakenClassifyWorkerFactory()
77     : DomainFactory(ACTOR_ID) {
78 }
79 
createWorker(Actor * actor)80 Worker *KrakenClassifyWorkerFactory::createWorker(Actor *actor) {
81     return new KrakenClassifyWorker(actor);
82 }
83 
init()84 void KrakenClassifyWorkerFactory::init() {
85     QList<PortDescriptor *> ports;
86     {
87         const Descriptor inSlotDesc(INPUT_SLOT,
88                                     KrakenClassifyPrompter::tr("Input URL 1"),
89                                     KrakenClassifyPrompter::tr("Input URL 1."));
90 
91         const Descriptor inPairedSlotDesc(PAIRED_INPUT_SLOT,
92                                           KrakenClassifyPrompter::tr("Input URL 2"),
93                                           KrakenClassifyPrompter::tr("Input URL 2."));
94 
95         QMap<Descriptor, DataTypePtr> inType;
96         inType[inSlotDesc] = BaseTypes::STRING_TYPE();
97         inType[inPairedSlotDesc] = BaseTypes::STRING_TYPE();
98 
99         QMap<Descriptor, DataTypePtr> outType;
100         outType[TaxonomySupport::TAXONOMY_CLASSIFICATION_SLOT()] = TaxonomySupport::TAXONOMY_CLASSIFICATION_TYPE();
101 
102         const Descriptor inPortDesc(INPUT_PORT_ID,
103                                     KrakenClassifyPrompter::tr("Input sequences"),
104                                     KrakenClassifyPrompter::tr("URL(s) to FASTQ or FASTA file(s) should be provided.\n\n"
105                                                                "In case of SE reads or contigs use the \"Input URL 1\" slot only.\n\n"
106                                                                "In case of PE reads input \"left\" reads to \"Input URL 1\", \"right\" reads to \"Input URL 2\".\n\n"
107                                                                "See also the \"Input data\" parameter of the element."));
108         Descriptor outPortDesc(OUTPUT_PORT_ID, KrakenClassifyPrompter::tr("Kraken Classification"), KrakenClassifyPrompter::tr("A map of sequence names with the associated taxonomy IDs, classified by Kraken."));
109 
110         ports << new PortDescriptor(inPortDesc, DataTypePtr(new MapDataType(ACTOR_ID + "-in", inType)), true /*input*/);
111         ports << new PortDescriptor(outPortDesc, DataTypePtr(new MapDataType(ACTOR_ID + "-out", outType)), false /*input*/, true /*multi*/);
112     }
113 
114     QList<Attribute *> attributes;
115     {
116         Descriptor inputDataDesc(INPUT_DATA_ATTR_ID, KrakenClassifyPrompter::tr("Input data"), KrakenClassifyPrompter::tr("To classify single-end (SE) reads or contigs, received by reads de novo assembly, set this parameter to \"SE reads or contigs\".<br><br>"
117                                                                                                                           "To classify paired-end (PE) reads, set the value to \"PE reads\".<br><br>"
118                                                                                                                           "One or two slots of the input port are used depending on the value of the parameter. Pass URL(s) to data to these slots.<br><br>"
119                                                                                                                           "The input files should be in FASTA or FASTQ formats."));
120 
121         Descriptor databaseDesc(DATABASE_ATTR_ID, KrakenClassifyPrompter::tr("Database"), KrakenClassifyPrompter::tr("A path to the folder with the Kraken database files."));
122 
123         Descriptor outputUrlDesc(OUTPUT_URL_ATTR_ID, KrakenClassifyPrompter::tr("Output file"), KrakenClassifyPrompter::tr("Specify the output file name."));
124 
125         Descriptor quickOperationDesc(QUICK_OPERATION_ATTR_ID, KrakenClassifyPrompter::tr("Quick operation"), KrakenClassifyPrompter::tr("Stop classification of an input read after the certain number of hits.<br><br>"
126                                                                                                                                          "The value can be specified in the \"Minimum number of hits\" parameter."));
127 
128         Descriptor minHitsDesc(MIN_HITS_NUMBER_ATTR_ID, KrakenClassifyPrompter::tr("Minimum number of hits"), KrakenClassifyPrompter::tr("The number of hits that are required to declare an input sequence classified.<br><br>"
129                                                                                                                                          "This can be especially useful with custom databases when testing to see if sequences either do or do not belong to a particular genome."));
130 
131         Descriptor threadsDesc(THREADS_NUMBER_ATTR_ID, KrakenClassifyPrompter::tr("Number of threads"), KrakenClassifyPrompter::tr("Use multiple threads (--threads)."));
132 
133         Descriptor preloadDatabaseDesc(PRELOAD_DATABASE_ATTR_ID, KrakenClassifyPrompter::tr("Load database into memory"), KrakenClassifyPrompter::tr("Load the Kraken database into RAM (--preload).<br><br>"
134                                                                                                                                                      "This can be useful to improve the speed. The database size should be less than the RAM size.<br><br>"
135                                                                                                                                                      "The other option to improve the speed is to store the database on ramdisk. Set this parameter to \"False\" in this case."));
136 
137         Descriptor classifyToolDesc(NgsReadsClassificationPlugin::WORKFLOW_CLASSIFY_TOOL_ID,
138                                     WORKFLOW_CLASSIFY_TOOL_KRAKEN,
139                                     "Classify tool. Hidden attribute");
140 
141         Attribute *inputDataAttribute = new Attribute(inputDataDesc, BaseTypes::STRING_TYPE(), false, KrakenClassifyTaskSettings::SINGLE_END);
142         inputDataAttribute->addSlotRelation(new SlotRelationDescriptor(INPUT_PORT_ID, PAIRED_INPUT_SLOT, QVariantList() << KrakenClassifyTaskSettings::PAIRED_END));
143         attributes << inputDataAttribute;
144 
145         QString minikrakenPath;
146         U2DataPath *minikrakenDataPath = AppContext::getDataPathRegistry()->getDataPathByName(NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_DATA_ID);
147         if (nullptr != minikrakenDataPath && minikrakenDataPath->isValid()) {
148             minikrakenPath = minikrakenDataPath->getPathByName(NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_ITEM_ID);
149         }
150         Attribute *databaseAttribute = new Attribute(databaseDesc, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, minikrakenPath);
151         attributes << databaseAttribute;
152 
153         attributes << new Attribute(quickOperationDesc, BaseTypes::BOOL_TYPE(), Attribute::None, false);
154 
155         Attribute *minHitsAttribute = new Attribute(minHitsDesc, BaseTypes::NUM_TYPE(), Attribute::None, 1);
156         attributes << minHitsAttribute;
157 
158         attributes << new Attribute(preloadDatabaseDesc, BaseTypes::BOOL_TYPE(), Attribute::None, true);
159         attributes << new Attribute(threadsDesc, BaseTypes::NUM_TYPE(), Attribute::None, AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount());
160         attributes << new Attribute(outputUrlDesc, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding | Attribute::CanBeEmpty);
161 
162         attributes << new Attribute(classifyToolDesc, BaseTypes::STRING_TYPE(), static_cast<Attribute::Flags>(Attribute::Hidden), WORKFLOW_CLASSIFY_TOOL_KRAKEN);
163 
164         minHitsAttribute->addRelation(new VisibilityRelation(QUICK_OPERATION_ATTR_ID, "true"));
165         databaseAttribute->addRelation(new DatabaseSizeRelation(PRELOAD_DATABASE_ATTR_ID));
166     }
167 
168     QMap<QString, PropertyDelegate *> delegates;
169     {
170         QVariantMap inputDataMap;
171         inputDataMap[SINGLE_END_TEXT] = KrakenClassifyTaskSettings::SINGLE_END;
172         inputDataMap[PAIRED_END_TEXT] = KrakenClassifyTaskSettings::PAIRED_END;
173         delegates[INPUT_DATA_ATTR_ID] = new ComboBoxDelegate(inputDataMap);
174 
175         delegates[DATABASE_ATTR_ID] = new DatabaseDelegate(ACTOR_ID,
176                                                            DATABASE_ATTR_ID,
177                                                            NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_DATA_ID,
178                                                            NgsReadsClassificationPlugin::MINIKRAKEN_4_GB_ITEM_ID,
179                                                            "kraken/database",
180                                                            true);
181 
182         DelegateTags outputUrlTags;
183         outputUrlTags.set(DelegateTags::PLACEHOLDER_TEXT, "Auto");
184         outputUrlTags.set(DelegateTags::FILTER, DialogUtils::prepareDocumentsFileFilter(BaseDocumentFormats::PLAIN_TEXT, true, QStringList()));
185         outputUrlTags.set(DelegateTags::FORMAT, BaseDocumentFormats::PLAIN_TEXT);
186         delegates[OUTPUT_URL_ATTR_ID] = new URLDelegate(outputUrlTags, "kraken/output");
187 
188         delegates[QUICK_OPERATION_ATTR_ID] = new ComboBoxWithBoolsDelegate();
189 
190         QVariantMap threadsProperties;
191         threadsProperties["minimum"] = 1;
192         threadsProperties["maximum"] = QThread::idealThreadCount();
193         delegates[THREADS_NUMBER_ATTR_ID] = new SpinBoxDelegate(threadsProperties);
194 
195         delegates[PRELOAD_DATABASE_ATTR_ID] = new ComboBoxWithBoolsDelegate();
196     }
197 
198     Descriptor desc(ACTOR_ID, KrakenClassifyPrompter::tr("Classify Sequences with Kraken"), KrakenClassifyPrompter::tr("Kraken is a taxonomic sequence classifier that assigns taxonomic labels to short DNA reads. "
199                                                                                                                        "It does this by examining the k-mers within a read and querying a database with those."));
200     ActorPrototype *proto = new IntegralBusActorPrototype(desc, ports, attributes);
201     proto->setEditor(new DelegateEditor(delegates));
202     proto->setPrompter(new KrakenClassifyPrompter(nullptr));
203     proto->addExternalTool(KrakenSupport::CLASSIFY_TOOL_ID);
204     proto->setValidator(new KrakenClassifyValidator());
205     proto->setPortValidator(INPUT_PORT_ID, new PairedReadsPortValidator(INPUT_SLOT, PAIRED_INPUT_SLOT));
206     WorkflowEnv::getProtoRegistry()->registerProto(NgsReadsClassificationPlugin::WORKFLOW_ELEMENTS_GROUP, proto);
207 
208     DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
209     localDomain->registerEntry(new KrakenClassifyWorkerFactory());
210 }
211 
cleanup()212 void KrakenClassifyWorkerFactory::cleanup() {
213     delete WorkflowEnv::getProtoRegistry()->unregisterProto(ACTOR_ID);
214 
215     DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
216     delete localDomain->unregisterEntry(ACTOR_ID);
217 }
218 
219 }  // namespace LocalWorkflow
220 }  // namespace U2
221