1 package org.broadinstitute.hellbender.utils.report; 2 3 import org.broadinstitute.hellbender.exceptions.GATKException; 4 import org.broadinstitute.hellbender.exceptions.UserException; 5 import org.broadinstitute.hellbender.utils.gcs.BucketUtils; 6 import org.broadinstitute.hellbender.utils.recalibration.RecalUtils; 7 8 import java.io.BufferedReader; 9 import java.io.File; 10 import java.io.IOException; 11 import java.io.InputStream; 12 import java.io.InputStreamReader; 13 import java.io.PrintStream; 14 import java.io.Reader; 15 import java.util.Collection; 16 import java.util.List; 17 import java.util.Map; 18 import java.util.NavigableMap; 19 import java.util.SortedSet; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 23 /** 24 * Container class for GATK report tables 25 */ 26 public final class GATKReport { 27 public static final String RECAL_FILE = "input covariates table file for base quality score recalibration"; 28 public static final String GATKREPORT_HEADER_PREFIX = "#:GATKReport."; 29 public static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V1_1; 30 private static final String SEPARATOR = ":"; 31 private GATKReportVersion version = LATEST_REPORT_VERSION; 32 33 private final NavigableMap<String, GATKReportTable> tables = new TreeMap<>(); 34 35 /** 36 * Create a new, empty GATKReport. 37 */ GATKReport()38 public GATKReport() { 39 } 40 41 /** 42 * Create a new GATKReport with the contents of a GATKReport on disk. 43 * 44 * @param filename the path to the file to load 45 */ GATKReport(String filename)46 public GATKReport(String filename) { 47 this(BucketUtils.openFile(filename)); 48 } 49 50 /** 51 * Create a new GATKReport with the contents of a GATKReport on disk. 52 * 53 * @param file the file to load 54 */ GATKReport(File file)55 public GATKReport(File file) { 56 this(file.getPath()); 57 } 58 GATKReport(InputStream in)59 public GATKReport(InputStream in){ 60 loadReport(new InputStreamReader(in)); 61 } 62 63 /** 64 * Create a new GATK report from GATK report tables 65 * @param tables Any number of tables that you want to add to the report 66 */ GATKReport(GATKReportTable... tables)67 public GATKReport(GATKReportTable... tables) { 68 for( GATKReportTable table: tables) 69 addTable(table); 70 } 71 72 /** 73 * Gets the unique read groups in the table 74 * 75 * @return the unique read groups 76 */ getReadGroups()77 public SortedSet<String> getReadGroups() { 78 final GATKReportTable reportTable = getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE); 79 final SortedSet<String> readGroups = new TreeSet<>(); 80 for ( int i = 0; i < reportTable.getNumRows(); i++ ) { 81 readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString()); 82 } 83 return readGroups; 84 } 85 86 /** 87 * Load a GATKReport from a {@link Reader} 88 * 89 * @param in the reader to load from 90 */ loadReport(Reader in)91 private void loadReport(Reader in) { 92 BufferedReader reader = new BufferedReader(in); 93 String reportHeader; 94 try { 95 reportHeader = reader.readLine(); 96 } catch (IOException e) { 97 throw new UserException("Could not read " + RECAL_FILE, e); 98 } 99 100 if ( reportHeader == null ) { 101 throw new UserException(RECAL_FILE + " is empty."); 102 } 103 104 // Read the first line for the version and number of tables. 105 version = GATKReportVersion.fromHeader(reportHeader); 106 if (version.equals(GATKReportVersion.V0_1) || 107 version.equals(GATKReportVersion.V0_2)) 108 throw new UserException("The GATK no longer supports reading legacy GATK Reports. Please use v1.0 or newer."); 109 110 int nTables = Integer.parseInt(reportHeader.split(":")[2]); 111 112 // Read each table according ot the number of tables 113 for (int i = 0; i < nTables; i++) { 114 addTable(new GATKReportTable(reader, version)); 115 } 116 } 117 118 119 /** 120 * Add a new, empty table to the report 121 * 122 * @param tableName the name of the table 123 * @param tableDescription the description of the table 124 * @param numColumns the number of columns in this table 125 */ addTable(final String tableName, final String tableDescription, final int numColumns)126 public void addTable(final String tableName, final String tableDescription, final int numColumns) { 127 addTable(tableName, tableDescription, numColumns, GATKReportTable.Sorting.DO_NOT_SORT); 128 } 129 130 /** 131 * Add a new, empty table to the report 132 * 133 * @param tableName the name of the table 134 * @param tableDescription the description of the table 135 * @param numColumns the number of columns in this table 136 * @param sortingWay way to sort table 137 */ addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.Sorting sortingWay)138 public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.Sorting sortingWay) { 139 GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay); 140 tables.put(tableName, table); 141 } 142 143 /** 144 * Adds a table, empty or populated, to the report 145 * 146 * @param table the table to add 147 */ addTable(GATKReportTable table)148 public void addTable(GATKReportTable table) { 149 tables.put(table.getTableName(), table); 150 } 151 addTables(List<GATKReportTable> gatkReportTableV2s)152 public void addTables(List<GATKReportTable> gatkReportTableV2s) { 153 for ( GATKReportTable table : gatkReportTableV2s ) 154 addTable(table); 155 } 156 157 /** 158 * Return a table with a given name 159 * 160 * @param tableName the name of the table 161 * @return the table object 162 */ getTable(String tableName)163 public GATKReportTable getTable(String tableName) { 164 GATKReportTable table = tables.get(tableName); 165 if (table == null) 166 throw new GATKException("Table is not in GATKReport: " + tableName); 167 return table; 168 } 169 170 /** 171 * Print all tables contained within this container to a PrintStream 172 * 173 * @param out the PrintStream to which the tables should be written 174 */ print(PrintStream out)175 public void print(PrintStream out) { 176 out.println(GATKREPORT_HEADER_PREFIX + getVersion() + SEPARATOR + getTables().size()); 177 for (GATKReportTable table : tables.values()) { 178 table.write(out); 179 } 180 } 181 182 /** 183 * Print all tables contained within this container to a PrintStream 184 * 185 * @param out the PrintStream to which the tables should be written 186 */ print(PrintStream out, GATKReportTable.Sorting sortingWay)187 public void print(PrintStream out, GATKReportTable.Sorting sortingWay) { 188 out.println(GATKREPORT_HEADER_PREFIX + getVersion() + SEPARATOR + getTables().size()); 189 for (GATKReportTable table : tables.values()) { 190 table.write(out, sortingWay); 191 } 192 } 193 194 195 getTables()196 public Collection<GATKReportTable> getTables() { 197 return tables.values(); 198 } 199 200 /** 201 * This is the main function is charge of gathering the reports. It checks that the reports are compatible and then 202 * calls the table gathering functions. 203 * 204 * @param input another GATKReport of the same format 205 */ concat(GATKReport input)206 public void concat(GATKReport input) { 207 208 if ( !isSameFormat(input) ) { 209 throw new GATKException("Failed to combine GATKReport, format doesn't match!"); 210 } 211 212 for ( Map.Entry<String, GATKReportTable> table : tables.entrySet() ) { 213 table.getValue().concat(input.getTable(table.getKey())); 214 } 215 } 216 getVersion()217 public GATKReportVersion getVersion() { 218 return version; 219 } 220 221 /** 222 * Returns whether or not the two reports have the same format, from columns, to tables, to reports, and everything 223 * in between. This does not check if the data inside is the same. This is the check to see if the two reports are 224 * gatherable or reduceable. 225 * 226 * @param report another GATK report 227 * @return true if the the reports are gatherable 228 */ isSameFormat(GATKReport report)229 public boolean isSameFormat(GATKReport report) { 230 if (!version.equals(report.version)) { 231 return false; 232 } 233 if (!tables.keySet().equals(report.tables.keySet())) { 234 return false; 235 } 236 for (String tableName : tables.keySet()) { 237 if (!getTable(tableName).isSameFormat(report.getTable(tableName))) 238 return false; 239 } 240 return true; 241 } 242 243 /** 244 * Checks that the reports are exactly the same. 245 * 246 * @param report another GATK report 247 * @return true if all field in the reports, tables, and columns are equal. 248 */ equals(GATKReport report)249 public boolean equals(GATKReport report) { 250 if (!version.equals(report.version)) { 251 return false; 252 } 253 if (!tables.keySet().equals(report.tables.keySet())) { 254 return false; 255 } 256 for (String tableName : tables.keySet()) { 257 if (!getTable(tableName).equals(report.getTable(tableName))) 258 return false; 259 } 260 return true; 261 } 262 263 /** 264 * The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need 265 * the advanced functionality of a full GATK Report. 266 * <p/> 267 * A simple GATK Report consists of: 268 * <p/> 269 * - A single table 270 * - No primary key ( it is hidden ) 271 * <p/> 272 * Optional: 273 * - Only untyped columns. As long as the data is an Object, it will be accepted. 274 * - Default column values being empty strings. 275 * <p/> 276 * Limitations: 277 * <p/> 278 * - A simple GATK report cannot contain multiple tables. 279 * - It cannot contain typed columns, which prevents arithmetic gathering. 280 * 281 * @param tableName The name of your simple GATK report table 282 * @param columns The names of the columns in your table 283 * @return a simplified GATK report 284 */ newSimpleReport(final String tableName, GATKReportTable.Sorting sorting, final String... columns)285 public static GATKReport newSimpleReport(final String tableName, GATKReportTable.Sorting sorting, final String... columns) { 286 return newSimpleReportWithDescription(tableName, "A simplified GATK table report", sorting, columns); 287 } 288 289 /** 290 * @see #newSimpleReport(String, GATKReportTable.Sorting, String...) but with a customized description 291 */ newSimpleReportWithDescription(final String tableName, final String desc, GATKReportTable.Sorting sorting, final String... columns)292 public static GATKReport newSimpleReportWithDescription(final String tableName, final String desc, GATKReportTable.Sorting sorting, final String... columns) { 293 GATKReportTable table = new GATKReportTable(tableName, desc, columns.length, sorting); 294 295 for (String column : columns) { 296 table.addColumn(column, ""); 297 } 298 299 GATKReport output = new GATKReport(); 300 output.addTable(table); 301 302 return output; 303 } 304 305 /** 306 * This method provides an efficient way to populate a simplified GATK report. This method will only work on reports 307 * that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information. 308 * 309 * @param values the row of data to be added to the table. 310 * Note: the number of arguments must match the columns in the table. 311 */ addRow(final Object... values)312 public void addRow(final Object... values) { 313 // Must be a simple report 314 if ( tables.size() != 1 ) 315 throw new GATKException("Cannot write a row to a complex GATK Report"); 316 317 GATKReportTable table = tables.firstEntry().getValue(); 318 if ( table.getNumColumns() != values.length ) 319 throw new GATKException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" ); 320 321 final int rowIndex = table.getNumRows(); 322 for ( int i = 0; i < values.length; i++ ) 323 table.set(rowIndex, i, values[i]); 324 } 325 326 } 327