1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "ConvertSnpeffVariationsToAnnotationsTask.h"
23
24 #include <U2Core/AppContext.h>
25 #include <U2Core/CreateAnnotationTask.h>
26 #include <U2Core/DeleteObjectsTask.h>
27 #include <U2Core/DocumentModel.h>
28 #include <U2Core/GenbankFeatures.h>
29 #include <U2Core/IOAdapterUtils.h>
30 #include <U2Core/L10n.h>
31 #include <U2Core/LoadDocumentTask.h>
32 #include <U2Core/SaveDocumentTask.h>
33 #include <U2Core/U1AnnotationUtils.h>
34 #include <U2Core/U2OpStatusUtils.h>
35 #include <U2Core/U2SafePoints.h>
36 #include <U2Core/VariantTrackObject.h>
37
38 #include <U2Formats/SnpeffInfoParser.h>
39
40 namespace U2 {
41
42 const QString ConvertSnpeffVariationsToAnnotationsTask::CHROM_QUALIFIER_NAME = "chrom";
43 const QString ConvertSnpeffVariationsToAnnotationsTask::LOCATION_QUALIFIER_NAME = "Location";
44 const QString ConvertSnpeffVariationsToAnnotationsTask::REFERENCE_QUALIFIER_NAME = "Reference_bases";
45 const QString ConvertSnpeffVariationsToAnnotationsTask::ALTERNATE_QUALIFIER_NAME = "Alternate_bases";
46 const QString ConvertSnpeffVariationsToAnnotationsTask::ALLELE_QUALIFIER_NAME = "Allele";
47 const QString ConvertSnpeffVariationsToAnnotationsTask::ID_QUALIFIER_NAME = "ID";
48
ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject * > & variantTrackObjects)49 ConvertSnpeffVariationsToAnnotationsTask::ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject *> &variantTrackObjects)
50 : Task(tr("Convert SnpEff variations to annotations task"), TaskFlag_None),
51 variantTrackObjects(variantTrackObjects) {
52 }
53
getAnnotationsData() const54 const QMap<QString, QList<SharedAnnotationData>> &ConvertSnpeffVariationsToAnnotationsTask::getAnnotationsData() const {
55 return annotationTablesData;
56 }
57
run()58 void ConvertSnpeffVariationsToAnnotationsTask::run() {
59 for (VariantTrackObject *variantTrackObject : qAsConst(variantTrackObjects)) {
60 QList<SharedAnnotationData> annotationTableData;
61
62 U2VariantTrack variantTrack = variantTrackObject->getVariantTrack(stateInfo);
63 CHECK_OP(stateInfo, );
64
65 QScopedPointer<U2DbiIterator<U2Variant>> variantsIterator(variantTrackObject->getVariants(U2_REGION_MAX, stateInfo));
66 CHECK_OP(stateInfo, );
67
68 SharedAnnotationData tableAnnotationData(new AnnotationData);
69 tableAnnotationData->qualifiers << U2Qualifier(CHROM_QUALIFIER_NAME, variantTrack.sequenceName);
70 tableAnnotationData->type = U2FeatureTypes::Variation;
71
72 SnpeffInfoParser infoParser;
73 while (variantsIterator.data()->hasNext()) {
74 U2Variant variant = variantsIterator.data()->next();
75
76 SharedAnnotationData entryAnnotationData = tableAnnotationData;
77 entryAnnotationData->name = GBFeatureUtils::getKeyInfo(GBFeatureKey_variation).text;
78 entryAnnotationData->location->regions << U2Region(variant.startPos, variant.endPos - variant.startPos + 1);
79 entryAnnotationData->qualifiers << U2Qualifier(REFERENCE_QUALIFIER_NAME, variant.refData);
80 entryAnnotationData->qualifiers << U2Qualifier(ALTERNATE_QUALIFIER_NAME, variant.obsData);
81 entryAnnotationData->qualifiers << U2Qualifier(LOCATION_QUALIFIER_NAME,
82 U2Region(variant.startPos + 1, variant.endPos - variant.startPos + 1).toString(U2Region::FormatDots));
83 if (!variant.publicId.isEmpty()) {
84 entryAnnotationData->qualifiers << U2Qualifier(ID_QUALIFIER_NAME, variant.publicId);
85 }
86
87 U2OpStatusImpl os;
88 QList<QList<U2Qualifier>> qualifiersList = infoParser.parse(os, variant.additionalInfo[U2Variant::VCF4_INFO]);
89 CHECK_OP(os, );
90 CHECK_OP(stateInfo, );
91 stateInfo.addWarnings(os.getWarnings());
92
93 for (const QList<U2Qualifier> &qualifiers : qAsConst(qualifiersList)) {
94 if (qualifiers.isEmpty()) {
95 continue;
96 }
97
98 SharedAnnotationData parsedAnnotationData = entryAnnotationData;
99 parsedAnnotationData->qualifiers << qualifiers.toVector();
100 if (U1AnnotationUtils::containsQualifier(qualifiers, ALLELE_QUALIFIER_NAME)) {
101 U1AnnotationUtils::removeAllQualifier(parsedAnnotationData, ALTERNATE_QUALIFIER_NAME);
102 }
103 annotationTableData << parsedAnnotationData;
104 }
105
106 if (!os.hasWarnings() && qualifiersList.isEmpty()) {
107 annotationTableData << entryAnnotationData;
108 }
109 }
110 annotationTablesData.insert(variantTrack.sequenceName, annotationTableData);
111 }
112 }
113
LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString & variationsUrl,const U2DbiRef & dstDbiRef,const QString & dstUrl,const QString & formatId)114 LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString &variationsUrl,
115 const U2DbiRef &dstDbiRef,
116 const QString &dstUrl,
117 const QString &formatId)
118 : Task(tr("Load file and convert SnpEff variations to annotations task"), TaskFlags_NR_FOSE_COSC | TaskFlag_CollectChildrenWarnings),
119 variationsUrl(variationsUrl),
120 dstDbiRef(dstDbiRef),
121 dstUrl(dstUrl),
122 formatId(formatId),
123 loadTask(nullptr),
124 convertTask(nullptr),
125 saveTask(nullptr),
126 loadedVariationsDocument(nullptr),
127 annotationsDocument(nullptr) {
128 SAFE_POINT_EXT(!variationsUrl.isEmpty(), setError("Source VCF file URL is empty"), );
129 SAFE_POINT_EXT(dstDbiRef.isValid(), setError("Destination DBI reference is invalid"), );
130 SAFE_POINT_EXT(!dstUrl.isEmpty(), setError("Destination file URL is empty"), );
131 SAFE_POINT_EXT(!formatId.isEmpty(), setError("Destination file format is empty"), );
132 }
133
~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask()134 LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask() {
135 qDeleteAll(annotationTableObjects);
136 delete loadedVariationsDocument;
137 delete annotationsDocument;
138 }
139
getResultUrl() const140 const QString &LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::getResultUrl() const {
141 return dstUrl;
142 }
143
prepare()144 void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepare() {
145 QVariantMap hints;
146 hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
147 loadTask = LoadDocumentTask::getDefaultLoadDocTask(variationsUrl, hints);
148 addSubTask(loadTask);
149 }
150
onSubTaskFinished(Task * subTask)151 QList<Task *> LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::onSubTaskFinished(Task *subTask) {
152 QList<Task *> newSubtasks;
153 CHECK_OP(stateInfo, newSubtasks);
154
155 if (loadTask == subTask) {
156 loadedVariationsDocument = loadTask->takeDocument();
157 CHECK_EXT(nullptr != loadedVariationsDocument, setError(tr("'%1' load failed, the result document is NULL").arg(variationsUrl)), newSubtasks);
158 loadedVariationsDocument->setDocumentOwnsDbiResources(false);
159
160 QList<GObject *> objects = loadedVariationsDocument->findGObjectByType(GObjectTypes::VARIANT_TRACK);
161 CHECK_EXT(!objects.isEmpty(), setError(tr("File '%1' doesn't contain variation tracks").arg(variationsUrl)), newSubtasks);
162
163 QList<VariantTrackObject *> variantTrackObjects;
164 foreach (GObject *object, objects) {
165 VariantTrackObject *variantTrackObject = qobject_cast<VariantTrackObject *>(object);
166 SAFE_POINT_EXT(nullptr != variantTrackObject, setError("Can't cast GObject to VariantTrackObject"), newSubtasks);
167 variantTrackObjects << variantTrackObject;
168 }
169
170 convertTask = new ConvertSnpeffVariationsToAnnotationsTask(variantTrackObjects);
171 newSubtasks << convertTask;
172 }
173
174 if (convertTask == subTask) {
175 QMap<QString, QList<SharedAnnotationData>> annotationsData = convertTask->getAnnotationsData();
176 foreach (const QString &chromosome, annotationsData.keys()) {
177 AnnotationTableObject *annotationTableObject = new AnnotationTableObject(chromosome, dstDbiRef);
178 annotationTableObjects << annotationTableObject;
179
180 createAnnotationsTasks << new CreateAnnotationsTask(annotationTableObject, annotationsData[chromosome], "Variations");
181 }
182 newSubtasks << createAnnotationsTasks;
183 }
184
185 if (createAnnotationsTasks.contains(subTask)) {
186 createAnnotationsTasks.removeAll(subTask);
187 if (createAnnotationsTasks.isEmpty()) {
188 prepareSaveTask();
189 CHECK_OP(stateInfo, newSubtasks);
190 newSubtasks << saveTask;
191 newSubtasks << new DeleteObjectsTask(loadedVariationsDocument->getObjects());
192 delete loadedVariationsDocument;
193 loadedVariationsDocument = nullptr;
194 }
195 }
196
197 if (saveTask == subTask) {
198 newSubtasks << new DeleteObjectsTask(annotationsDocument->getObjects());
199 delete annotationsDocument;
200 annotationsDocument = nullptr;
201 }
202
203 return newSubtasks;
204 }
205
prepareDocument()206 Document *LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareDocument() {
207 DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(formatId);
208 SAFE_POINT_EXT(nullptr != format, setError(QString("Document format '%1' not found in the registry").arg(formatId)), nullptr);
209 IOAdapterFactory *ioAdapterFactory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(dstUrl));
210 SAFE_POINT_EXT(nullptr != ioAdapterFactory, setError(L10N::nullPointerError("ioAdapterFactory")), nullptr);
211
212 QVariantMap hints;
213 hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
214
215 Document *document = format->createNewLoadedDocument(ioAdapterFactory, dstUrl, stateInfo, hints);
216 CHECK_OP(stateInfo, nullptr);
217 document->setDocumentOwnsDbiResources(false);
218
219 foreach (AnnotationTableObject *annotationTableObject, annotationTableObjects) {
220 document->addObject(annotationTableObject);
221 }
222 annotationTableObjects.clear();
223
224 return document;
225 }
226
prepareSaveTask()227 void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareSaveTask() {
228 annotationsDocument = prepareDocument();
229 CHECK_OP(stateInfo, );
230 saveTask = new SaveDocumentTask(annotationsDocument);
231 }
232
233 } // namespace U2
234