1 // ==========================================================================
2 // NGS: Regions of Interest Analysis
3 // ==========================================================================
4 // Copyright (c) 2012-2018, Bernd Jagla, Institut Pasteur
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Create ROI coverage plot thumbnail grid.
35 // ==========================================================================
36
37 #include <fstream>
38 #include <iostream>
39 #include <sstream>
40
41 #include <seqan/arg_parse.h>
42 #include <seqan/basic.h>
43 #include <seqan/sequence.h>
44 #include <seqan/file.h>
45 #include <seqan/stream.h>
46 #include <seqan/roi_io.h>
47
48 #include "png_canvas.h"
49
50 // ==========================================================================
51 // Classes
52 // ==========================================================================
53
54 // ----------------------------------------------------------------------------
55 // Class Options
56 // ----------------------------------------------------------------------------
57
58 // Options for the bam2roi Application.
59
60 struct Options
61 {
62 // Verbosity of output: 0 -- quiet, 1 -- normal, 2 -- verbose, 3 -- very verbose.
63 int verbosity;
64
65 // -----------------------------------------------------------------------
66 // Input / Output Options
67 // -----------------------------------------------------------------------
68
69 // Paths to input ROI file.
70 seqan::CharString inputFileName;
71
72 // Output prefix, will append "%d.png".
73 seqan::CharString outputFileName;
74
75 // -----------------------------------------------------------------------
76 // Plot Options
77 // -----------------------------------------------------------------------
78
79 // Number of columns and rows in grid.
80 int numCols;
81 int numRows;
82
83 // Width and height of one plot in px.
84 int plotWidth;
85 int plotHeight;
86
87 // Border and space between plots.
88 int borderWidth;
89 int space;
90
91 // Maximal number of plots to create, 0 for infinity.
92 int maxRois;
93
94 // Largest value to print in plots. In case of going beyond, we'll make a little red dot at the top. A value of 0
95 // indicates no limit.
96 int maxValue;
97
OptionsOptions98 Options() :
99 verbosity(1), numCols(10), numRows(10), plotWidth(10), plotHeight(10), borderWidth(0), space(0),
100 maxRois(0), maxValue(0)
101 {}
102 };
103
104 // --------------------------------------------------------------------------
105 // Class Plotter
106 // --------------------------------------------------------------------------
107
108 class Plotter
109 {
110 public:
111 // The options to use.
112 Options options;
113
114 // The index of the current grid and plot on grid.
115 int gridNo;
116 int plotNo;
117
118 // The current PNG canvas.
119 PngCanvas canvas;
120
121 // Colors.
122 PngColor color;
123 PngColor bgColor;
124
Plotter(Options const & options)125 Plotter(Options const & options) :
126 options(options), gridNo(0), plotNo(0),
127 canvas(options.numCols * options.plotWidth + 2 * options.borderWidth + (options.numCols - 1) * options.space,
128 options.numRows * options.plotHeight + 2 * options.borderWidth + (options.numRows - 1) * options.space),
129 color(0, 0, 0, 0xff), bgColor(0, 0, 0, 10)
130 {}
131
~Plotter()132 ~Plotter()
133 {
134 // Write PngCanvas if there is any plot on it.
135 if (plotNo)
136 writeGrid();
137 }
138
139 // Plot grid to canvas, advance gridNo, plotNo, and write out grid if necessary.
plotGrid(seqan::String<unsigned> const & points)140 void plotGrid(seqan::String<unsigned> const & points)
141 {
142 _doPlot(points);
143
144 // Advance plotNo and gridNo if necessary, write out plot to file.
145 plotNo += 1;
146 if (plotNo == options.numCols * options.numRows)
147 {
148 writeGrid();
149 clearGrid();
150 plotNo = 0;
151 gridNo += 1;
152 }
153 }
154
plotStart(int idx)155 std::pair<int, int> plotStart(int idx)
156 {
157 int row = idx / options.numCols;
158 int col = idx % options.numCols;
159 int x = options.borderWidth + col * (options.plotWidth + options.space);
160 int y = options.borderWidth + row * (options.plotHeight + options.space);
161 return std::make_pair(x, y);
162 }
163
_doPlot(seqan::String<unsigned> const & counts)164 void _doPlot(seqan::String<unsigned> const & counts)
165 {
166 std::pair<int, int> startPos = plotStart(plotNo);
167 // Compute polyline coordinates.
168 int numPoints = length(counts);
169 double h = 0.0;
170 double l = 0.0;
171 for (unsigned i = 0; i < length(counts); ++i)
172 h = std::max(h, (double)counts[i]);
173 if (h == 0.0)
174 h = 1.0;
175 if (h == l) // Center line if highest == lowest.
176 h *= 2;
177
178 if (options.maxValue != 0)
179 h = options.maxValue;
180
181 canvas.color = bgColor;
182 canvas.filledRectangle(startPos.first, startPos.second,
183 startPos.first + options.plotWidth,
184 startPos.second + options.plotHeight);
185
186 canvas.color = color;
187 std::vector<std::pair<int, int> > points;
188 for (unsigned i = 0; i < length(counts); ++i)
189 {
190 int x = static_cast<int>(startPos.first + (double)i / numPoints * options.plotWidth);
191 unsigned val = counts[i];
192 if (options.maxValue != 0 && (int)val >= options.maxValue)
193 val = options.maxValue;
194 int y = static_cast<int>(startPos.second + options.plotHeight - val / h * options.plotHeight);
195 points.push_back(std::make_pair(x, y));
196 }
197 canvas.polyline(points);
198
199 // Print polyline instructions if --very-verbose.
200 if (options.verbosity >= 3)
201 {
202 std::cerr << "polyline\n";
203 for (unsigned i = 0; i < points.size(); ++i)
204 std::cerr << " " << points[i].first << ", " << points[i].second << "\n";
205 }
206
207 PngColor red(0xff, 0, 0, 0xff);
208 canvas.color = red;
209 for (unsigned i = 0; i < points.size(); ++i)
210 if (options.maxValue != 0 && (int)counts[i] >= options.maxValue)
211 canvas.point(points[i].first, points[i].second);
212 }
213
214 // Write grid on PngCanvas to output.
writeGrid()215 void writeGrid()
216 {
217 std::stringstream ss;
218 ss << options.outputFileName << gridNo << ".png";
219 canvas.write(ss.str().c_str());
220 }
221
222 // Clear grid.
clearGrid()223 void clearGrid()
224 {
225 canvas.color = PngColor::WHITE();
226 canvas.filledRectangle(0, 0, canvas.width - 1, canvas.height - 1);
227 }
228 };
229
230 // ==========================================================================
231 // Metafunctions
232 // ==========================================================================
233
234 // ==========================================================================
235 // Functions
236 // ==========================================================================
237
238 // --------------------------------------------------------------------------
239 // Function yesNo() [Options]
240 // --------------------------------------------------------------------------
241
yesNo(bool b)242 char const * yesNo(bool b)
243 {
244 return b ? "YES" : "NO";
245 }
246
247 // --------------------------------------------------------------------------
248 // Function print() [Options]
249 // --------------------------------------------------------------------------
250
print(std::ostream & out,Options const & options)251 void print(std::ostream & out, Options const & options)
252 {
253 out << "__OPTIONS_____________________________________________________________________\n"
254 << "\n"
255 << "INPUT FILE \t" << options.inputFileName << "\n"
256 << "OUTPUT FILE \t" << options.outputFileName << "\n"
257 << "\n"
258 << "NUM COLS \t" << options.numCols << "\n"
259 << "NUM ROWS \t" << options.numRows << "\n"
260 << "\n"
261 << "PLOT WIDTH \t" << options.plotWidth << "\n"
262 << "PLOT HEIGHT \t" << options.plotHeight << "\n"
263 << "\n"
264 << "BORDER WIDTH \t" << options.borderWidth << "\n"
265 << "SPACE \t" << options.space << "\n"
266 << "\n"
267 << "MAX ROIS \t" << options.maxRois << "\n"
268 << "MAX VALUE \t" << options.maxValue << "\n"
269 << "\n";
270 }
271
272 // --------------------------------------------------------------------------
273 // Function parseCommandLine()
274 // --------------------------------------------------------------------------
275
276 // Parse command line with ArgumentParser class.
277
278 seqan::ArgumentParser::ParseResult
parseCommandLine(Options & options,int argc,char const ** argv)279 parseCommandLine(Options & options, int argc, char const ** argv)
280 {
281 // Setup ArgumentParser.
282 seqan::ArgumentParser parser("roi_plot_thumbnails");
283 setCategory(parser, "NGS ROI Analysis");
284
285 // Set short description, version, and date.
286 setShortDescription(parser, "Create plot grids for ROI file.");
287 setVersion(parser, SEQAN_APP_VERSION " [" SEQAN_REVISION "]");
288 setDate(parser, SEQAN_DATE);
289
290 // Define usage line and long description.
291 addUsageLine(parser, "\\fB-if\\fP \\fIIN.roi\\fP \\fB-o\\fP \\fIOUT\\fP");
292
293 addDescription(parser,
294 "Create PNG images with plot grids to \\fIOUT${i}.png\\fP from ROI records "
295 "in \\fIIN.roi\\fP.");
296
297 // -----------------------------------------------------------------------
298 // General Options
299 // -----------------------------------------------------------------------
300
301 addSection(parser, "General Options");
302
303 addOption(parser, seqan::ArgParseOption("v", "verbose", "Verbose mode."));
304 addOption(parser, seqan::ArgParseOption("vv", "vverbose", "Very verbose mode."));
305
306 // -----------------------------------------------------------------------
307 // Input / Output Options
308 // -----------------------------------------------------------------------
309
310 addSection(parser, "Input / Output Parameters");
311
312 addOption(parser, seqan::ArgParseOption("if", "input-file", "ROI to plot.",
313 seqan::ArgParseOption::INPUT_FILE));
314 setValidValues(parser, "input-file", seqan::RoiFileIn::getFileExtensions());
315 setRequired(parser, "input-file");
316
317 addOption(parser, seqan::ArgParseOption("o", "output-prefix", "Prefix of output file.",
318 seqan::ArgParseOption::OUTPUT_FILE));
319 setRequired(parser, "output-prefix");
320
321 // -----------------------------------------------------------------------
322 // Plot Configuration
323 // -----------------------------------------------------------------------
324
325 addSection(parser, "PlotConfiguration");
326
327 addOption(parser, seqan::ArgParseOption("", "max-rois", "Maximal number of plots to create (0 for all).",
328 seqan::ArgParseOption::INTEGER, "NUM"));
329 setMinValue(parser, "max-rois", "0");
330 setDefaultValue(parser, "max-rois", "20000");
331
332 addOption(parser, seqan::ArgParseOption("", "max-value", "Fix maximal y value. 0 for no limit..",
333 seqan::ArgParseOption::INTEGER, "NUM"));
334 setMinValue(parser, "max-value", "0");
335 setDefaultValue(parser, "max-value", "0");
336
337 addOption(parser, seqan::ArgParseOption("", "num-cols", "Number of plots in one row.",
338 seqan::ArgParseOption::INTEGER, "COLS"));
339 setMinValue(parser, "num-cols", "1");
340 setDefaultValue(parser, "num-cols", "40");
341
342 addOption(parser, seqan::ArgParseOption("", "num-rows", "Number of plots in one column.",
343 seqan::ArgParseOption::INTEGER, "ROWS"));
344 setMinValue(parser, "num-rows", "1");
345 setDefaultValue(parser, "num-rows", "50");
346
347 addOption(parser, seqan::ArgParseOption("", "plot-height", "Height of one plot in px.",
348 seqan::ArgParseOption::INTEGER, "HEIGHT"));
349 setMinValue(parser, "plot-height", "1");
350 setDefaultValue(parser, "plot-height", "30");
351
352 addOption(parser, seqan::ArgParseOption("", "plot-width", "Width of one plot in px.",
353 seqan::ArgParseOption::INTEGER, "WIDTH"));
354 setMinValue(parser, "plot-width", "1");
355 setDefaultValue(parser, "plot-width", "30");
356
357 addOption(parser, seqan::ArgParseOption("", "border-width", "Border width.",
358 seqan::ArgParseOption::INTEGER, "WIDTH"));
359 setMinValue(parser, "border-width", "0");
360 setDefaultValue(parser, "border-width", "0");
361
362 addOption(parser, seqan::ArgParseOption("", "spacing", "Space between plots.",
363 seqan::ArgParseOption::INTEGER, "WIDTH"));
364 setDefaultValue(parser, "spacing", "2");
365
366 // -----------------------------------------------------------------------
367 // Parsing and Value Extraction
368 // -----------------------------------------------------------------------
369
370 // Parse command line.
371 seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv);
372
373 // Only extract options if the program will continue after parseCommandLine()
374 if (res != seqan::ArgumentParser::PARSE_OK)
375 return res;
376
377 // Extract option values.
378
379 options.verbosity = 1;
380 if (isSet(parser, "verbose"))
381 options.verbosity = 2;
382 if (isSet(parser, "vverbose"))
383 options.verbosity = 3;
384
385 getOptionValue(options.inputFileName, parser, "input-file");
386 getOptionValue(options.outputFileName, parser, "output-prefix");
387
388 getOptionValue(options.numCols, parser, "num-cols");
389 getOptionValue(options.numRows, parser, "num-rows");
390 getOptionValue(options.plotWidth, parser, "plot-width");
391 getOptionValue(options.plotHeight, parser, "plot-height");
392 getOptionValue(options.borderWidth, parser, "border-width");
393 getOptionValue(options.space, parser, "spacing");
394 getOptionValue(options.maxRois, parser, "max-rois");
395 getOptionValue(options.maxValue, parser, "max-value");
396
397 return seqan::ArgumentParser::PARSE_OK;
398 }
399
main(int argc,char const ** argv)400 int main(int argc, char const ** argv)
401 {
402 // Parse the command line.
403 Options options;
404 seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv);
405
406 // If parsing was not successful then exit with code 1 if there were errors. Otherwise, exit with code 0 (e.g. help
407 // was printed).
408 if (res != seqan::ArgumentParser::PARSE_OK)
409 return res == seqan::ArgumentParser::PARSE_ERROR;
410
411 // Print program name and options.
412
413 if (options.verbosity >= 1)
414 {
415 std::cerr << "ROI PLOT GRIDS\n"
416 << "==============\n"
417 << "\n";
418 print(std::cerr, options);
419 }
420
421 // -----------------------------------------------------------------------
422 // Open input and output file
423 // -----------------------------------------------------------------------
424
425 if (options.verbosity >= 1)
426 std::cerr << "__OPENING FILES_______________________________________________________________\n"
427 << "\n";
428
429 if (options.verbosity >= 1)
430 std::cerr << "Opening " << options.inputFileName << " ...";
431 seqan::RoiFileIn roiFileIn;
432 if (!open(roiFileIn, toCString(options.inputFileName)))
433 {
434 std::cerr << "\nERROR: Could not open " << options.inputFileName << "\n";
435 return 1;
436 }
437 if (options.verbosity >= 1)
438 std::cerr << " OK\n";
439
440 // Read header (actually, skip it).
441 seqan::RoiHeader roiHeader;
442 readHeader(roiHeader, roiFileIn);
443
444 // -----------------------------------------------------------------------
445 // Create plot grids.
446 // -----------------------------------------------------------------------
447
448 if (options.verbosity >= 1)
449 std::cerr << "\n__PLOTTING____________________________________________________________________\n"
450 << "\n"
451 << "WORKING...";
452
453 Plotter plotter(options);
454
455 seqan::RoiRecord record;
456 for (int i = 0; (options.maxRois == 0 || i < options.maxRois) && !atEnd(roiFileIn); ++i)
457 {
458 if (options.verbosity >= 2)
459 std::cerr << " " << i << "(" << length(record.count) << ", " << plotter.gridNo << ")";
460 readRecord(record, roiFileIn);
461
462 plotter.plotGrid(record.count);
463 }
464
465 if (options.verbosity >= 1)
466 std::cerr << " OK\n\nDone.\n";
467
468 return 0;
469 }
470