1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "Peak2GeneFormatLoader.h"
23 
24 #include <U2Core/IOAdapter.h>
25 #include <U2Core/L10n.h>
26 #include <U2Core/U2OpStatus.h>
27 #include <U2Core/U2SafePoints.h>
28 
29 #include <U2Formats/TabulatedFormatReader.h>
30 
31 namespace U2 {
32 
Peak2GeneFormatLoader(U2OpStatus & os,IOAdapter * ioAdapter)33 Peak2GeneFormatLoader::Peak2GeneFormatLoader(U2OpStatus &os, IOAdapter *ioAdapter)
34     : os(os),
35       ioAdapter(ioAdapter),
36       skipLine(false),
37       currentLineNumber(0) {
38     CHECK_EXT(nullptr != ioAdapter, os.setError(L10N::nullPointerError("IO adapter")), );
39     CHECK_EXT(ioAdapter->isOpen(), os.setError(L10N::errorReadingFile(ioAdapter->getURL())), );
40 }
41 
loadAnnotations()42 QList<SharedAnnotationData> Peak2GeneFormatLoader::loadAnnotations() {
43     QList<SharedAnnotationData> annotations;
44 
45     TabulatedFormatReader reader(os, ioAdapter);
46     CHECK_OP(os, annotations);
47 
48     while (reader.hasNextLine()) {
49         skipLine = false;
50         currentLineNumber = reader.getCurrentLineNumber();
51         SharedAnnotationData annotation = parseLine(reader.getNextLine());
52         if (!skipLine) {
53             annotations << annotation;
54         }
55     }
56 
57     return annotations;
58 }
59 
parseLine(const QStringList & lineValues)60 SharedAnnotationData Peak2GeneFormatLoader::parseLine(const QStringList &lineValues) {
61     SharedAnnotationData data(new AnnotationData);
62     CHECK_EXT(lineValues.size() == COLUMNS_COUNT, skipLine = true; os.addWarning(QString("Incorrect columns count at line %1: expect %2, got %3")
63                                                                                      .arg(currentLineNumber)
64                                                                                      .arg(COLUMNS_COUNT)
65                                                                                      .arg(lineValues.size())),
66                                                                    data);
67 
68     data->qualifiers << U2Qualifier("chrom", getChromName(lineValues));
69     CHECK(!skipLine, data);
70 
71     data->location->regions << getRegion(lineValues);
72     CHECK(!skipLine, data);
73 
74     data->name = getPeakName(lineValues);
75     CHECK(!skipLine, data);
76 
77     data->qualifiers << U2Qualifier("score", getPeakScore(lineValues));
78     CHECK(!skipLine, data);
79 
80     data->qualifiers << U2Qualifier("NA", getNa(lineValues));
81     CHECK(!skipLine, data);
82 
83     data->qualifiers << U2Qualifier("Genes", getGenes(lineValues));
84     CHECK(!skipLine, data);
85 
86     data->qualifiers << U2Qualifier("Strand", getStrand(lineValues));
87     CHECK(!skipLine, data);
88 
89     data->qualifiers << U2Qualifier("TSS2pCenter", getTss2pCenter(lineValues));
90     CHECK(!skipLine, data);
91 
92     return data;
93 }
94 
getChromName(const QStringList & lineValues)95 QString Peak2GeneFormatLoader::getChromName(const QStringList &lineValues) {
96     CHECK_EXT(!lineValues[ChromName].isEmpty(), skipLine = true; os.addWarning(QString("Chrom name is empty at line %1")
97                                                                                    .arg(currentLineNumber)),
98                                                                  "");
99     return lineValues[ChromName];
100 }
101 
getRegion(const QStringList & lineValues)102 U2Region Peak2GeneFormatLoader::getRegion(const QStringList &lineValues) {
103     bool ok = false;
104     const qint64 startPos = lineValues[StartPos].toLongLong(&ok);
105     CHECK_EXT(ok, skipLine = true; os.addWarning(tr("Incorrect start position at line %1: '%2'")
106                                                      .arg(currentLineNumber)
107                                                      .arg(lineValues[StartPos])),
108                                    U2Region());
109 
110     const qint64 endPos = lineValues[EndPos].toLongLong(&ok);
111     CHECK_EXT(ok, skipLine = true; os.addWarning(tr("Incorrect end position at line %1: '%2'")
112                                                      .arg(currentLineNumber)
113                                                      .arg(lineValues[EndPos])),
114                                    U2Region());
115 
116     return U2Region(startPos, endPos - startPos);
117 }
118 
getPeakName(const QStringList & lineValues)119 QString Peak2GeneFormatLoader::getPeakName(const QStringList &lineValues) {
120     CHECK_EXT(!lineValues[PeakName].isEmpty(), skipLine = true; os.addWarning(QString("Peak name is empty at line %1")
121                                                                                   .arg(currentLineNumber)),
122                                                                 "");
123     return lineValues[PeakName];
124 }
125 
getPeakScore(const QStringList & lineValues)126 QString Peak2GeneFormatLoader::getPeakScore(const QStringList &lineValues) {
127     bool ok = false;
128     lineValues[PeakScore].toDouble(&ok);
129     CHECK_EXT(ok, skipLine = true; os.addWarning(tr("Incorrect peak score at line %1: '%2'")
130                                                      .arg(currentLineNumber)
131                                                      .arg(lineValues[PeakScore])),
132                                    "");
133     return lineValues[PeakScore];
134 }
135 
getNa(const QStringList & lineValues)136 QString Peak2GeneFormatLoader::getNa(const QStringList &lineValues) {
137     bool ok = false;
138     lineValues[NA].toInt(&ok);
139     CHECK_EXT(ok, skipLine = true; os.addWarning(tr("Incorrect NA value at line %1: '%2'")
140                                                      .arg(currentLineNumber)
141                                                      .arg(lineValues[NA])),
142                                    "");
143     return lineValues[NA];
144 }
145 
getGenes(const QStringList & lineValues)146 QString Peak2GeneFormatLoader::getGenes(const QStringList &lineValues) {
147     CHECK_EXT(!lineValues[Genes].isEmpty(), skipLine = true; os.addWarning(QString("Genes list is empty at line %1")
148                                                                                .arg(currentLineNumber)),
149                                                              "");
150     return lineValues[Genes];
151 }
152 
getStrand(const QStringList & lineValues)153 QString Peak2GeneFormatLoader::getStrand(const QStringList &lineValues) {
154     CHECK_EXT(!lineValues[Strand].isEmpty(), skipLine = true; os.addWarning(QString("Strand is empty at line %1")
155                                                                                 .arg(currentLineNumber)),
156                                                               "");
157     return lineValues[Strand];
158 }
159 
getTss2pCenter(const QStringList & lineValues)160 QString Peak2GeneFormatLoader::getTss2pCenter(const QStringList &lineValues) {
161     CHECK_EXT(!lineValues[Tss2peakCenter].isEmpty(), skipLine = true; os.addWarning(QString("Strand is empty at line %1")
162                                                                                         .arg(currentLineNumber)),
163                                                                       "");
164     return lineValues[Tss2peakCenter];
165 }
166 
167 }    // namespace U2
168