1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "Text2SequenceWorker.h"
23
24 #include <U2Core/AppContext.h>
25 #include <U2Core/FailTask.h>
26 #include <U2Core/U2AlphabetUtils.h>
27
28 #include <U2Designer/DelegateEditors.h>
29
30 #include <U2Gui/SeqPasterWidgetController.h>
31
32 #include <U2Lang/ActorPrototypeRegistry.h>
33 #include <U2Lang/BaseActorCategories.h>
34 #include <U2Lang/BasePorts.h>
35 #include <U2Lang/BaseSlots.h>
36 #include <U2Lang/BaseTypes.h>
37 #include <U2Lang/CoreLibConstants.h>
38 #include <U2Lang/WorkflowEnv.h>
39
40 namespace U2 {
41 namespace LocalWorkflow {
42
43 const QString Text2SequenceWorkerFactory::ACTOR_ID("convert-text-to-sequence");
44
45 static const Descriptor TEXT_2_SEQUENCE_IN_TYPE_ID("text-2-sequence-in-type");
46 static const Descriptor TEXT_2_SEQUENCE_OUT_TYPE_ID("text-2-sequence-out-type");
47
48 static const QString SEQ_NAME_ATTR_ID("sequence-name");
49 static const QString ALPHABET_ATTR_ID("alphabet");
50 static const QString SKIP_SYM_ATTR_ID("skip-unknown");
51 static const QString REPLACE_SYM_ATTR_ID("replace-unknown-with");
52
53 static const QString SEQ_NAME_ATTR_DEF_VAL("Sequence");
54 static const QString ALPHABET_ATTR_ID_DEF_VAL("Auto");
55
56 /*******************************
57 * Text2SequenceWorker
58 *******************************/
59 QMap<QString, QString> Text2SequenceWorker::cuteAlIdNames = Text2SequenceWorker::initCuteAlNames();
60
initCuteAlNames()61 QMap<QString, QString> Text2SequenceWorker::initCuteAlNames() {
62 QMap<QString, QString> res;
63 res[BaseDNAAlphabetIds::RAW()] = "All symbols";
64 res[BaseDNAAlphabetIds::NUCL_DNA_DEFAULT()] = "Standard DNA";
65 res[BaseDNAAlphabetIds::NUCL_RNA_DEFAULT()] = "Standard RNA";
66 res[BaseDNAAlphabetIds::NUCL_DNA_EXTENDED()] = "Extended DNA";
67 res[BaseDNAAlphabetIds::NUCL_RNA_EXTENDED()] = "Extended RNA";
68 res[BaseDNAAlphabetIds::AMINO_DEFAULT()] = "Standard amino";
69 return res;
70 }
71
init()72 void Text2SequenceWorker::init() {
73 txtPort = ports.value(BasePorts::IN_TEXT_PORT_ID());
74 outSeqPort = ports.value(BasePorts::OUT_SEQ_PORT_ID());
75 }
76
tick()77 Task *Text2SequenceWorker::tick() {
78 while (txtPort->hasMessage()) {
79 Message inputMessage = getMessageAndSetupScriptValues(txtPort);
80 if (inputMessage.isEmpty()) {
81 outSeqPort->transit();
82 continue;
83 }
84 QString seqName = actor->getParameter(SEQ_NAME_ATTR_ID)->getAttributeValue<QString>(context);
85 if (seqName.isEmpty()) {
86 return new FailTask(tr("Sequence name not set"));
87 }
88 if (tickedNum++ > 0) {
89 seqName += QString::number(tickedNum);
90 }
91 QString alId = actor->getParameter(ALPHABET_ATTR_ID)->getAttributeValue<QString>(context);
92 if (alId.isEmpty()) {
93 alId = ALPHABET_ATTR_ID_DEF_VAL;
94 } else {
95 alId = cuteAlIdNames.key(alId, alId);
96 }
97 bool skipUnknown = actor->getParameter(SKIP_SYM_ATTR_ID)->getAttributeValue<bool>(context);
98 QChar replaceChar;
99 if (!skipUnknown) {
100 QString replaceStr = actor->getParameter(REPLACE_SYM_ATTR_ID)->getAttributeValue<QString>(context);
101 assert(replaceStr.size() <= 1);
102 if (replaceStr.isEmpty()) {
103 return new FailTask(tr("skip flag should be set or replace character defined"));
104 }
105 replaceChar = replaceStr.at(0);
106 }
107 QByteArray txt = inputMessage.getData().toMap().value(BaseSlots::TEXT_SLOT().getId()).value<QString>().toUtf8();
108
109 const DNAAlphabet *alphabet = (alId == ALPHABET_ATTR_ID_DEF_VAL) ? U2AlphabetUtils::findBestAlphabet(txt) : U2AlphabetUtils::getById(alId);
110 if (alphabet == nullptr) {
111 QString msg;
112 if (alId == ALPHABET_ATTR_ID_DEF_VAL) {
113 msg = tr("Alphabet cannot be automatically detected");
114 } else {
115 msg = tr("Alphabet '%1' cannot be found");
116 }
117 return new FailTask(msg);
118 }
119
120 QByteArray normSequence = SeqPasterWidgetController::getNormSequence(alphabet, txt, !skipUnknown, replaceChar);
121 DNASequence result(seqName, normSequence, alphabet);
122 QVariantMap msgData;
123 {
124 SharedDbiDataHandler seqId = context->getDataStorage()->putSequence(result);
125 msgData[BaseSlots::DNA_SEQUENCE_SLOT().getId()] = qVariantFromValue<SharedDbiDataHandler>(seqId);
126 }
127 if (outSeqPort) {
128 outSeqPort->put(Message(BaseTypes::DNA_SEQUENCE_TYPE(), msgData));
129 }
130 }
131 if (txtPort->isEnded()) {
132 setDone();
133 outSeqPort->setEnded();
134 }
135 return nullptr;
136 }
137
cleanup()138 void Text2SequenceWorker::cleanup() {
139 }
140
141 /*******************************
142 * Text2SequenceWorkerFactory
143 *******************************/
init()144 void Text2SequenceWorkerFactory::init() {
145 // ports description
146 QList<PortDescriptor *> portDescs;
147 {
148 QMap<Descriptor, DataTypePtr> inM;
149 inM[BaseSlots::TEXT_SLOT()] = BaseTypes::STRING_TYPE();
150 DataTypePtr inSet(new MapDataType(TEXT_2_SEQUENCE_IN_TYPE_ID, inM));
151 Descriptor inPortDesc(BasePorts::IN_TEXT_PORT_ID(), Text2SequenceWorker::tr("Input text"), Text2SequenceWorker::tr("A text which will be converted to sequence"));
152 portDescs << new PortDescriptor(inPortDesc, inSet, true);
153
154 QMap<Descriptor, DataTypePtr> outM;
155 outM[BaseSlots::DNA_SEQUENCE_SLOT()] = BaseTypes::DNA_SEQUENCE_TYPE();
156 DataTypePtr outSet(new MapDataType(TEXT_2_SEQUENCE_OUT_TYPE_ID, outM));
157 Descriptor outPortDesc(BasePorts::OUT_SEQ_PORT_ID(), Text2SequenceWorker::tr("Output sequence"), Text2SequenceWorker::tr("Converted sequence"));
158 portDescs << new PortDescriptor(outPortDesc, outSet, false);
159 }
160 // attributes description
161 QList<Attribute *> attrs;
162 {
163 Descriptor seqNameDesc(SEQ_NAME_ATTR_ID, Text2SequenceWorker::tr("Sequence name"), Text2SequenceWorker::tr("Result sequence name."));
164 Descriptor alphabetDesc(ALPHABET_ATTR_ID, Text2SequenceWorker::tr("Sequence alphabet"), Text2SequenceWorker::tr("Select one of the listed alphabets or choose auto to auto-detect."));
165 Descriptor skipSymbolsDesc(SKIP_SYM_ATTR_ID, Text2SequenceWorker::tr("Skip unknown symbols"), Text2SequenceWorker::tr("Do not include symbols that are not contained in alphabet."));
166 Descriptor replaceSymbolsDesc(REPLACE_SYM_ATTR_ID, Text2SequenceWorker::tr("Replace unknown symbols with"), Text2SequenceWorker::tr("Replace unknown symbols with given character."));
167
168 attrs << new Attribute(seqNameDesc, BaseTypes::STRING_TYPE(), /* required */ true, QVariant(SEQ_NAME_ATTR_DEF_VAL));
169 attrs << new Attribute(alphabetDesc, BaseTypes::STRING_TYPE(), false, QVariant(ALPHABET_ATTR_ID_DEF_VAL));
170 attrs << new Attribute(skipSymbolsDesc, BaseTypes::BOOL_TYPE(), false, QVariant(true));
171 attrs << new Attribute(replaceSymbolsDesc, BaseTypes::STRING_TYPE(), false);
172 }
173
174 Descriptor protoDesc(Text2SequenceWorkerFactory::ACTOR_ID,
175 Text2SequenceWorker::tr("Convert Text to Sequence"),
176 Text2SequenceWorker::tr("Converts input text to sequence."));
177 ActorPrototype *proto = new IntegralBusActorPrototype(protoDesc, portDescs, attrs);
178
179 // proto delegates
180 QMap<QString, PropertyDelegate *> delegates;
181 {
182 QVariantMap alMap;
183 QList<const DNAAlphabet *> alps = AppContext::getDNAAlphabetRegistry()->getRegisteredAlphabets();
184 foreach (const DNAAlphabet *a, alps) {
185 alMap[a->getName()] = Text2SequenceWorker::cuteAlIdNames[a->getId()];
186 }
187 alMap[ALPHABET_ATTR_ID_DEF_VAL] = ALPHABET_ATTR_ID_DEF_VAL;
188 delegates[ALPHABET_ATTR_ID] = new ComboBoxDelegate(alMap);
189
190 delegates[REPLACE_SYM_ATTR_ID] = new CharacterDelegate();
191 }
192 proto->setEditor(new DelegateEditor(delegates));
193 proto->setPrompter(new Text2SequencePrompter());
194
195 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_CONVERTERS(), proto);
196 WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID)->registerEntry(new Text2SequenceWorkerFactory());
197 }
198
createWorker(Actor * a)199 Worker *Text2SequenceWorkerFactory::createWorker(Actor *a) {
200 return new Text2SequenceWorker(a);
201 }
202
203 /*******************************
204 * Text2SequencePrompter
205 *******************************/
composeRichDoc()206 QString Text2SequencePrompter::composeRichDoc() {
207 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
208 IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(BasePorts::IN_TEXT_PORT_ID()));
209 Actor *txtProducer = input->getProducer(BaseSlots::TEXT_SLOT().getId());
210 QString txtProducetStr = tr(" from <u>%1</u>").arg(txtProducer ? txtProducer->getLabel() : unsetStr);
211
212 QString seqName = getRequiredParam(SEQ_NAME_ATTR_ID);
213 QString seqNameStr = tr("sequence with name <u>%1</u>").arg(getHyperlink(SEQ_NAME_ATTR_ID, seqName));
214
215 QString alId = getParameter(ALPHABET_ATTR_ID).value<QString>();
216 QString seqAlStr;
217 if (alId == ALPHABET_ATTR_ID_DEF_VAL) {
218 seqAlStr = getHyperlink(ALPHABET_ATTR_ID, tr("Automatically detect sequence alphabet"));
219 } else {
220 alId = Text2SequenceWorker::cuteAlIdNames.key(alId, "");
221 const DNAAlphabet *alphabet = AppContext::getDNAAlphabetRegistry()->findById(alId);
222 QString alphStr = getHyperlink(ALPHABET_ATTR_ID, alphabet ? alphabet->getName() : unsetStr);
223 seqAlStr = tr("Set sequence alphabet to %1").arg(alphStr);
224 }
225
226 bool skipUnknown = getParameter(SKIP_SYM_ATTR_ID).value<bool>();
227 QString replaceStr = getRequiredParam(REPLACE_SYM_ATTR_ID);
228 QString unknownSymbolsStr;
229 if (skipUnknown) {
230 unknownSymbolsStr = getHyperlink(SKIP_SYM_ATTR_ID, tr("skipped"));
231 } else {
232 unknownSymbolsStr = QString("%1 %2")
233 .arg(getHyperlink(SKIP_SYM_ATTR_ID, tr("replaced with symbol")))
234 .arg(getHyperlink(REPLACE_SYM_ATTR_ID, replaceStr));
235 }
236
237 QString doc = tr("Convert input text%1 to %2. %3. Unknown symbols are %4.")
238 .arg(txtProducetStr)
239 .arg(seqNameStr)
240 .arg(seqAlStr)
241 .arg(unknownSymbolsStr);
242 return doc;
243 }
244
245 } // namespace LocalWorkflow
246 } // namespace U2
247