1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "ConvertSnpeffVariationsToAnnotationsTask.h"
23 
24 #include <U2Core/AppContext.h>
25 #include <U2Core/CreateAnnotationTask.h>
26 #include <U2Core/DeleteObjectsTask.h>
27 #include <U2Core/DocumentModel.h>
28 #include <U2Core/GenbankFeatures.h>
29 #include <U2Core/IOAdapterUtils.h>
30 #include <U2Core/L10n.h>
31 #include <U2Core/LoadDocumentTask.h>
32 #include <U2Core/SaveDocumentTask.h>
33 #include <U2Core/U1AnnotationUtils.h>
34 #include <U2Core/U2OpStatusUtils.h>
35 #include <U2Core/U2SafePoints.h>
36 #include <U2Core/VariantTrackObject.h>
37 
38 #include <U2Formats/SnpeffInfoParser.h>
39 
40 namespace U2 {
41 
42 const QString ConvertSnpeffVariationsToAnnotationsTask::CHROM_QUALIFIER_NAME = "chrom";
43 const QString ConvertSnpeffVariationsToAnnotationsTask::LOCATION_QUALIFIER_NAME = "Location";
44 const QString ConvertSnpeffVariationsToAnnotationsTask::REFERENCE_QUALIFIER_NAME = "Reference_bases";
45 const QString ConvertSnpeffVariationsToAnnotationsTask::ALTERNATE_QUALIFIER_NAME = "Alternate_bases";
46 const QString ConvertSnpeffVariationsToAnnotationsTask::ALLELE_QUALIFIER_NAME = "Allele";
47 const QString ConvertSnpeffVariationsToAnnotationsTask::ID_QUALIFIER_NAME = "ID";
48 
ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject * > & variantTrackObjects)49 ConvertSnpeffVariationsToAnnotationsTask::ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject *> &variantTrackObjects)
50     : Task(tr("Convert SnpEff variations to annotations task"), TaskFlag_None),
51       variantTrackObjects(variantTrackObjects) {
52 }
53 
getAnnotationsData() const54 const QMap<QString, QList<SharedAnnotationData>> &ConvertSnpeffVariationsToAnnotationsTask::getAnnotationsData() const {
55     return annotationTablesData;
56 }
57 
run()58 void ConvertSnpeffVariationsToAnnotationsTask::run() {
59     for (VariantTrackObject *variantTrackObject : qAsConst(variantTrackObjects)) {
60         QList<SharedAnnotationData> annotationTableData;
61 
62         U2VariantTrack variantTrack = variantTrackObject->getVariantTrack(stateInfo);
63         CHECK_OP(stateInfo, );
64 
65         QScopedPointer<U2DbiIterator<U2Variant>> variantsIterator(variantTrackObject->getVariants(U2_REGION_MAX, stateInfo));
66         CHECK_OP(stateInfo, );
67 
68         SharedAnnotationData tableAnnotationData(new AnnotationData);
69         tableAnnotationData->qualifiers << U2Qualifier(CHROM_QUALIFIER_NAME, variantTrack.sequenceName);
70         tableAnnotationData->type = U2FeatureTypes::Variation;
71 
72         SnpeffInfoParser infoParser;
73         while (variantsIterator.data()->hasNext()) {
74             U2Variant variant = variantsIterator.data()->next();
75 
76             SharedAnnotationData entryAnnotationData = tableAnnotationData;
77             entryAnnotationData->name = GBFeatureUtils::getKeyInfo(GBFeatureKey_variation).text;
78             entryAnnotationData->location->regions << U2Region(variant.startPos, variant.endPos - variant.startPos + 1);
79             entryAnnotationData->qualifiers << U2Qualifier(REFERENCE_QUALIFIER_NAME, variant.refData);
80             entryAnnotationData->qualifiers << U2Qualifier(ALTERNATE_QUALIFIER_NAME, variant.obsData);
81             entryAnnotationData->qualifiers << U2Qualifier(LOCATION_QUALIFIER_NAME,
82                                                            U2Region(variant.startPos + 1, variant.endPos - variant.startPos + 1).toString(U2Region::FormatDots));
83             if (!variant.publicId.isEmpty()) {
84                 entryAnnotationData->qualifiers << U2Qualifier(ID_QUALIFIER_NAME, variant.publicId);
85             }
86 
87             U2OpStatusImpl os;
88             QList<QList<U2Qualifier>> qualifiersList = infoParser.parse(os, variant.additionalInfo[U2Variant::VCF4_INFO]);
89             CHECK_OP(os, );
90             CHECK_OP(stateInfo, );
91             stateInfo.addWarnings(os.getWarnings());
92 
93             for (const QList<U2Qualifier> &qualifiers : qAsConst(qualifiersList)) {
94                 if (qualifiers.isEmpty()) {
95                     continue;
96                 }
97 
98                 SharedAnnotationData parsedAnnotationData = entryAnnotationData;
99                 parsedAnnotationData->qualifiers << qualifiers.toVector();
100                 if (U1AnnotationUtils::containsQualifier(qualifiers, ALLELE_QUALIFIER_NAME)) {
101                     U1AnnotationUtils::removeAllQualifier(parsedAnnotationData, ALTERNATE_QUALIFIER_NAME);
102                 }
103                 annotationTableData << parsedAnnotationData;
104             }
105 
106             if (!os.hasWarnings() && qualifiersList.isEmpty()) {
107                 annotationTableData << entryAnnotationData;
108             }
109         }
110         annotationTablesData.insert(variantTrack.sequenceName, annotationTableData);
111     }
112 }
113 
LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString & variationsUrl,const U2DbiRef & dstDbiRef,const QString & dstUrl,const QString & formatId)114 LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString &variationsUrl,
115                                                                                                          const U2DbiRef &dstDbiRef,
116                                                                                                          const QString &dstUrl,
117                                                                                                          const QString &formatId)
118     : Task(tr("Load file and convert SnpEff variations to annotations task"), TaskFlags_NR_FOSE_COSC | TaskFlag_CollectChildrenWarnings),
119       variationsUrl(variationsUrl),
120       dstDbiRef(dstDbiRef),
121       dstUrl(dstUrl),
122       formatId(formatId),
123       loadTask(nullptr),
124       convertTask(nullptr),
125       saveTask(nullptr),
126       loadedVariationsDocument(nullptr),
127       annotationsDocument(nullptr) {
128     SAFE_POINT_EXT(!variationsUrl.isEmpty(), setError("Source VCF file URL is empty"), );
129     SAFE_POINT_EXT(dstDbiRef.isValid(), setError("Destination DBI reference is invalid"), );
130     SAFE_POINT_EXT(!dstUrl.isEmpty(), setError("Destination file URL is empty"), );
131     SAFE_POINT_EXT(!formatId.isEmpty(), setError("Destination file format is empty"), );
132 }
133 
~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask()134 LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask() {
135     qDeleteAll(annotationTableObjects);
136     delete loadedVariationsDocument;
137     delete annotationsDocument;
138 }
139 
getResultUrl() const140 const QString &LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::getResultUrl() const {
141     return dstUrl;
142 }
143 
prepare()144 void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepare() {
145     QVariantMap hints;
146     hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
147     loadTask = LoadDocumentTask::getDefaultLoadDocTask(variationsUrl, hints);
148     addSubTask(loadTask);
149 }
150 
onSubTaskFinished(Task * subTask)151 QList<Task *> LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::onSubTaskFinished(Task *subTask) {
152     QList<Task *> newSubtasks;
153     CHECK_OP(stateInfo, newSubtasks);
154 
155     if (loadTask == subTask) {
156         loadedVariationsDocument = loadTask->takeDocument();
157         CHECK_EXT(nullptr != loadedVariationsDocument, setError(tr("'%1' load failed, the result document is NULL").arg(variationsUrl)), newSubtasks);
158         loadedVariationsDocument->setDocumentOwnsDbiResources(false);
159 
160         QList<GObject *> objects = loadedVariationsDocument->findGObjectByType(GObjectTypes::VARIANT_TRACK);
161         CHECK_EXT(!objects.isEmpty(), setError(tr("File '%1' doesn't contain variation tracks").arg(variationsUrl)), newSubtasks);
162 
163         QList<VariantTrackObject *> variantTrackObjects;
164         foreach (GObject *object, objects) {
165             VariantTrackObject *variantTrackObject = qobject_cast<VariantTrackObject *>(object);
166             SAFE_POINT_EXT(nullptr != variantTrackObject, setError("Can't cast GObject to VariantTrackObject"), newSubtasks);
167             variantTrackObjects << variantTrackObject;
168         }
169 
170         convertTask = new ConvertSnpeffVariationsToAnnotationsTask(variantTrackObjects);
171         newSubtasks << convertTask;
172     }
173 
174     if (convertTask == subTask) {
175         QMap<QString, QList<SharedAnnotationData>> annotationsData = convertTask->getAnnotationsData();
176         foreach (const QString &chromosome, annotationsData.keys()) {
177             AnnotationTableObject *annotationTableObject = new AnnotationTableObject(chromosome, dstDbiRef);
178             annotationTableObjects << annotationTableObject;
179 
180             createAnnotationsTasks << new CreateAnnotationsTask(annotationTableObject, annotationsData[chromosome], "Variations");
181         }
182         newSubtasks << createAnnotationsTasks;
183     }
184 
185     if (createAnnotationsTasks.contains(subTask)) {
186         createAnnotationsTasks.removeAll(subTask);
187         if (createAnnotationsTasks.isEmpty()) {
188             prepareSaveTask();
189             CHECK_OP(stateInfo, newSubtasks);
190             newSubtasks << saveTask;
191             newSubtasks << new DeleteObjectsTask(loadedVariationsDocument->getObjects());
192             delete loadedVariationsDocument;
193             loadedVariationsDocument = nullptr;
194         }
195     }
196 
197     if (saveTask == subTask) {
198         newSubtasks << new DeleteObjectsTask(annotationsDocument->getObjects());
199         delete annotationsDocument;
200         annotationsDocument = nullptr;
201     }
202 
203     return newSubtasks;
204 }
205 
prepareDocument()206 Document *LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareDocument() {
207     DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(formatId);
208     SAFE_POINT_EXT(nullptr != format, setError(QString("Document format '%1' not found in the registry").arg(formatId)), nullptr);
209     IOAdapterFactory *ioAdapterFactory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(dstUrl));
210     SAFE_POINT_EXT(nullptr != ioAdapterFactory, setError(L10N::nullPointerError("ioAdapterFactory")), nullptr);
211 
212     QVariantMap hints;
213     hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
214 
215     Document *document = format->createNewLoadedDocument(ioAdapterFactory, dstUrl, stateInfo, hints);
216     CHECK_OP(stateInfo, nullptr);
217     document->setDocumentOwnsDbiResources(false);
218 
219     foreach (AnnotationTableObject *annotationTableObject, annotationTableObjects) {
220         document->addObject(annotationTableObject);
221     }
222     annotationTableObjects.clear();
223 
224     return document;
225 }
226 
prepareSaveTask()227 void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareSaveTask() {
228     annotationsDocument = prepareDocument();
229     CHECK_OP(stateInfo, );
230     saveTask = new SaveDocumentTask(annotationsDocument);
231 }
232 
233 }  // namespace U2
234