1 package org.broadinstitute.hellbender.utils.report;
2 
3 import org.broadinstitute.hellbender.exceptions.GATKException;
4 import org.broadinstitute.hellbender.exceptions.UserException;
5 import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
6 import org.broadinstitute.hellbender.utils.recalibration.RecalUtils;
7 
8 import java.io.BufferedReader;
9 import java.io.File;
10 import java.io.IOException;
11 import java.io.InputStream;
12 import java.io.InputStreamReader;
13 import java.io.PrintStream;
14 import java.io.Reader;
15 import java.util.Collection;
16 import java.util.List;
17 import java.util.Map;
18 import java.util.NavigableMap;
19 import java.util.SortedSet;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 
23 /**
24  * Container class for GATK report tables
25  */
26 public final class GATKReport {
27     public static final String RECAL_FILE = "input covariates table file for base quality score recalibration";
28     public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport.";
29     public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_1;
30     private static final String SEPARATOR = ":";
31     private GATKReportVersion version = LATEST_REPORT_VERSION;
32 
33     private final NavigableMap<String, GATKReportTable> tables = new TreeMap<>();
34 
35     /**
36      * Create a new, empty GATKReport.
37      */
GATKReport()38     public GATKReport() {
39     }
40 
41     /**
42      * Create a new GATKReport with the contents of a GATKReport on disk.
43      *
44      * @param filename the path to the file to load
45      */
GATKReport(String filename)46     public GATKReport(String filename) {
47         this(BucketUtils.openFile(filename));
48     }
49 
50     /**
51      * Create a new GATKReport with the contents of a GATKReport on disk.
52      *
53      * @param file the file to load
54      */
GATKReport(File file)55     public GATKReport(File file) {
56         this(file.getPath());
57     }
58 
GATKReport(InputStream in)59     public GATKReport(InputStream in){
60         loadReport(new InputStreamReader(in));
61     }
62 
63     /**
64      * Create a new GATK report from GATK report tables
65      * @param tables Any number of tables that you want to add to the report
66      */
GATKReport(GATKReportTable... tables)67     public GATKReport(GATKReportTable... tables) {
68         for( GATKReportTable table: tables)
69             addTable(table);
70     }
71 
72     /**
73      * Gets the unique read groups in the table
74      *
75      * @return the unique read groups
76      */
getReadGroups()77     public SortedSet<String> getReadGroups() {
78         final GATKReportTable reportTable = getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
79         final SortedSet<String> readGroups = new TreeSet<>();
80         for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
81             readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
82         }
83         return readGroups;
84     }
85 
86     /**
87      * Load a GATKReport from a {@link Reader}
88      *
89      * @param in the reader to load from
90      */
loadReport(Reader in)91     private void loadReport(Reader in) {
92         BufferedReader reader = new BufferedReader(in);
93         String reportHeader;
94         try {
95             reportHeader = reader.readLine();
96         } catch (IOException e) {
97             throw new UserException("Could not read " + RECAL_FILE, e);
98         }
99 
100         if ( reportHeader == null ) {
101             throw new UserException(RECAL_FILE + " is empty.");
102         }
103 
104         // Read the first line for the version and number of tables.
105         version = GATKReportVersion.fromHeader(reportHeader);
106         if (version.equals(GATKReportVersion.V0_1) ||
107                 version.equals(GATKReportVersion.V0_2))
108             throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer.");
109 
110         int nTables = Integer.parseInt(reportHeader.split(":")[2]);
111 
112         // Read each table according ot the number of tables
113         for (int i = 0; i < nTables; i++) {
114             addTable(new GATKReportTable(reader, version));
115         }
116     }
117 
118 
119     /**
120      * Add a new, empty table to the report
121      *
122      * @param tableName        the name of the table
123      * @param tableDescription the description of the table
124      * @param numColumns       the number of columns in this table
125      */
addTable(final String tableName, final String tableDescription, final int numColumns)126     public void addTable(final String tableName, final String tableDescription, final int numColumns) {
127         addTable(tableName, tableDescription, numColumns, GATKReportTable.Sorting.DO_NOT_SORT);
128     }
129 
130     /**
131      * Add a new, empty table to the report
132      *
133      * @param tableName        the name of the table
134      * @param tableDescription the description of the table
135      * @param numColumns       the number of columns in this table
136      * @param sortingWay       way to sort table
137      */
addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.Sorting sortingWay)138     public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.Sorting sortingWay) {
139         GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
140         tables.put(tableName, table);
141     }
142 
143     /**
144      * Adds a table, empty or populated, to the report
145      *
146      * @param table the table to add
147      */
addTable(GATKReportTable table)148     public void addTable(GATKReportTable table) {
149         tables.put(table.getTableName(), table);
150     }
151 
addTables(List<GATKReportTable> gatkReportTableV2s)152     public void addTables(List<GATKReportTable> gatkReportTableV2s) {
153         for ( GATKReportTable table : gatkReportTableV2s )
154             addTable(table);
155     }
156 
157     /**
158      * Return a table with a given name
159      *
160      * @param tableName the name of the table
161      * @return the table object
162      */
getTable(String tableName)163     public GATKReportTable getTable(String tableName) {
164         GATKReportTable table = tables.get(tableName);
165         if (table == null)
166             throw new GATKException("Table is not in GATKReport: " + tableName);
167         return table;
168     }
169 
170     /**
171      * Print all tables contained within this container to a PrintStream
172      *
173      * @param out the PrintStream to which the tables should be written
174      */
print(PrintStream out)175     public void print(PrintStream out) {
176         out.println(GATKREPORT_HEADER_PREFIX + getVersion() + SEPARATOR + getTables().size());
177         for (GATKReportTable table : tables.values()) {
178             table.write(out);
179         }
180     }
181 
182     /**
183      * Print all tables contained within this container to a PrintStream
184      *
185      * @param out the PrintStream to which the tables should be written
186      */
print(PrintStream out, GATKReportTable.Sorting sortingWay)187     public void print(PrintStream out, GATKReportTable.Sorting sortingWay) {
188         out.println(GATKREPORT_HEADER_PREFIX + getVersion() + SEPARATOR + getTables().size());
189         for (GATKReportTable table : tables.values()) {
190             table.write(out, sortingWay);
191         }
192     }
193 
194 
195 
getTables()196     public Collection<GATKReportTable> getTables() {
197         return tables.values();
198     }
199 
200     /**
201      * This is the main function is charge of gathering the reports. It checks that the reports are compatible and then
202      * calls the table gathering functions.
203      *
204      * @param input another GATKReport of the same format
205      */
concat(GATKReport input)206     public void concat(GATKReport input) {
207 
208         if ( !isSameFormat(input) ) {
209             throw new GATKException("Failed to combine GATKReport, format doesn't match!");
210         }
211 
212         for ( Map.Entry<String, GATKReportTable> table : tables.entrySet() ) {
213             table.getValue().concat(input.getTable(table.getKey()));
214         }
215     }
216 
getVersion()217     public GATKReportVersion getVersion() {
218         return version;
219     }
220 
221     /**
222      * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything
223      * in between. This does not check if the data inside is the same. This is the check to see if the two reports are
224      * gatherable or reduceable.
225      *
226      * @param report another GATK report
227      * @return true if the the reports are gatherable
228      */
isSameFormat(GATKReport report)229     public boolean isSameFormat(GATKReport report) {
230         if (!version.equals(report.version)) {
231             return false;
232         }
233         if (!tables.keySet().equals(report.tables.keySet())) {
234             return false;
235         }
236         for (String tableName : tables.keySet()) {
237             if (!getTable(tableName).isSameFormat(report.getTable(tableName)))
238                 return false;
239         }
240         return true;
241     }
242 
243     /**
244      * Checks that the reports are exactly the same.
245      *
246      * @param report another GATK report
247      * @return true if all field in the reports, tables, and columns are equal.
248      */
equals(GATKReport report)249     public boolean equals(GATKReport report) {
250         if (!version.equals(report.version)) {
251             return false;
252         }
253         if (!tables.keySet().equals(report.tables.keySet())) {
254             return false;
255         }
256         for (String tableName : tables.keySet()) {
257             if (!getTable(tableName).equals(report.getTable(tableName)))
258                 return false;
259         }
260         return true;
261     }
262 
263     /**
264      * The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need
265      * the advanced functionality of a full GATK Report.
266      * <p/>
267      * A simple GATK Report consists of:
268      * <p/>
269      * - A single table
270      * - No primary key ( it is hidden )
271      * <p/>
272      * Optional:
273      * - Only untyped columns. As long as the data is an Object, it will be accepted.
274      * - Default column values being empty strings.
275      * <p/>
276      * Limitations:
277      * <p/>
278      * - A simple GATK report cannot contain multiple tables.
279      * - It cannot contain typed columns, which prevents arithmetic gathering.
280      *
281      * @param tableName The name of your simple GATK report table
282      * @param columns   The names of the columns in your table
283      * @return a simplified GATK report
284      */
newSimpleReport(final String tableName, GATKReportTable.Sorting sorting, final String... columns)285     public static GATKReport newSimpleReport(final String tableName, GATKReportTable.Sorting sorting, final String... columns) {
286         return newSimpleReportWithDescription(tableName, "A simplified GATK table report", sorting, columns);
287     }
288 
289     /**
290      * @see #newSimpleReport(String, GATKReportTable.Sorting, String...) but with a customized description
291      */
newSimpleReportWithDescription(final String tableName, final String desc, GATKReportTable.Sorting sorting, final String... columns)292     public static GATKReport newSimpleReportWithDescription(final String tableName, final String desc, GATKReportTable.Sorting sorting, final String... columns) {
293         GATKReportTable table = new GATKReportTable(tableName, desc, columns.length, sorting);
294 
295         for (String column : columns) {
296             table.addColumn(column, "");
297         }
298 
299         GATKReport output = new GATKReport();
300         output.addTable(table);
301 
302         return output;
303     }
304 
305     /**
306      * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports
307      * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information.
308      *
309      * @param values     the row of data to be added to the table.
310      *               Note: the number of arguments must match the columns in the table.
311      */
addRow(final Object... values)312     public void addRow(final Object... values) {
313         // Must be a simple report
314         if ( tables.size() != 1 )
315             throw new GATKException("Cannot write a row to a complex GATK Report");
316 
317         GATKReportTable table = tables.firstEntry().getValue();
318         if ( table.getNumColumns() != values.length )
319             throw new GATKException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
320 
321         final int rowIndex = table.getNumRows();
322         for ( int i = 0; i < values.length; i++ )
323             table.set(rowIndex, i, values[i]);
324     }
325 
326 }
327