1 // ==========================================================================
2 //                   NGS: Regions of Interest Analysis
3 // ==========================================================================
4 // Copyright (c) 2012-2018, Bernd Jagla, Institut Pasteur
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Create ROI coverage plot thumbnail grid.
35 // ==========================================================================
36 
37 #include <fstream>
38 #include <iostream>
39 #include <sstream>
40 
41 #include <seqan/arg_parse.h>
42 #include <seqan/basic.h>
43 #include <seqan/sequence.h>
44 #include <seqan/file.h>
45 #include <seqan/stream.h>
46 #include <seqan/roi_io.h>
47 
48 #include "png_canvas.h"
49 
50 // ==========================================================================
51 // Classes
52 // ==========================================================================
53 
54 // ----------------------------------------------------------------------------
55 // Class Options
56 // ----------------------------------------------------------------------------
57 
58 // Options for the bam2roi Application.
59 
60 struct Options
61 {
62     // Verbosity of output: 0 -- quiet, 1 -- normal, 2 -- verbose, 3 -- very verbose.
63     int verbosity;
64 
65     // -----------------------------------------------------------------------
66     // Input / Output Options
67     // -----------------------------------------------------------------------
68 
69     // Paths to input ROI file.
70     seqan::CharString inputFileName;
71 
72     // Output prefix, will append "%d.png".
73     seqan::CharString outputFileName;
74 
75     // -----------------------------------------------------------------------
76     // Plot Options
77     // -----------------------------------------------------------------------
78 
79     // Number of columns and rows in grid.
80     int numCols;
81     int numRows;
82 
83     // Width and height of one plot in px.
84     int plotWidth;
85     int plotHeight;
86 
87     // Border and space between plots.
88     int borderWidth;
89     int space;
90 
91     // Maximal number of plots to create, 0 for infinity.
92     int maxRois;
93 
94     // Largest value to print in plots.  In case of going beyond, we'll make a little red dot at the top.  A value of 0
95     // indicates no limit.
96     int maxValue;
97 
OptionsOptions98     Options() :
99             verbosity(1), numCols(10), numRows(10), plotWidth(10), plotHeight(10), borderWidth(0), space(0),
100             maxRois(0), maxValue(0)
101     {}
102 };
103 
104 // --------------------------------------------------------------------------
105 // Class Plotter
106 // --------------------------------------------------------------------------
107 
108 class Plotter
109 {
110 public:
111     // The options to use.
112     Options options;
113 
114     // The index of the current grid and plot on grid.
115     int gridNo;
116     int plotNo;
117 
118     // The current PNG canvas.
119     PngCanvas canvas;
120 
121     // Colors.
122     PngColor color;
123     PngColor bgColor;
124 
Plotter(Options const & options)125     Plotter(Options const & options) :
126             options(options), gridNo(0), plotNo(0),
127             canvas(options.numCols * options.plotWidth + 2 * options.borderWidth + (options.numCols - 1) * options.space,
128                    options.numRows * options.plotHeight + 2 * options.borderWidth + (options.numRows - 1) * options.space),
129             color(0, 0, 0, 0xff), bgColor(0, 0, 0, 10)
130     {}
131 
~Plotter()132     ~Plotter()
133     {
134         // Write PngCanvas if there is any plot on it.
135         if (plotNo)
136             writeGrid();
137     }
138 
139     // Plot grid to canvas, advance gridNo, plotNo, and write out grid if necessary.
plotGrid(seqan::String<unsigned> const & points)140     void plotGrid(seqan::String<unsigned> const & points)
141     {
142         _doPlot(points);
143 
144         // Advance plotNo and gridNo if necessary, write out plot to file.
145         plotNo += 1;
146         if (plotNo == options.numCols * options.numRows)
147         {
148             writeGrid();
149             clearGrid();
150             plotNo = 0;
151             gridNo += 1;
152         }
153     }
154 
plotStart(int idx)155     std::pair<int, int> plotStart(int idx)
156     {
157         int row = idx / options.numCols;
158         int col = idx % options.numCols;
159         int x = options.borderWidth + col * (options.plotWidth + options.space);
160         int y = options.borderWidth + row * (options.plotHeight + options.space);
161         return std::make_pair(x, y);
162     }
163 
_doPlot(seqan::String<unsigned> const & counts)164     void _doPlot(seqan::String<unsigned> const & counts)
165     {
166         std::pair<int, int> startPos = plotStart(plotNo);
167         // Compute polyline coordinates.
168         int numPoints = length(counts);
169         double h = 0.0;
170         double l = 0.0;
171         for (unsigned i = 0; i < length(counts); ++i)
172             h = std::max(h, (double)counts[i]);
173         if (h == 0.0)
174             h = 1.0;
175         if (h == l)  // Center line if highest == lowest.
176             h *= 2;
177 
178         if (options.maxValue != 0)
179             h = options.maxValue;
180 
181         canvas.color = bgColor;
182         canvas.filledRectangle(startPos.first, startPos.second,
183                                     startPos.first + options.plotWidth,
184                                     startPos.second + options.plotHeight);
185 
186         canvas.color = color;
187         std::vector<std::pair<int, int> > points;
188         for (unsigned i = 0; i < length(counts); ++i)
189         {
190             int x = static_cast<int>(startPos.first + (double)i / numPoints * options.plotWidth);
191             unsigned val = counts[i];
192             if (options.maxValue != 0 && (int)val >= options.maxValue)
193                 val = options.maxValue;
194             int y = static_cast<int>(startPos.second + options.plotHeight - val / h * options.plotHeight);
195             points.push_back(std::make_pair(x, y));
196         }
197         canvas.polyline(points);
198 
199         // Print polyline instructions if --very-verbose.
200         if (options.verbosity >= 3)
201         {
202             std::cerr << "polyline\n";
203             for (unsigned i = 0; i < points.size(); ++i)
204                 std::cerr << "  " << points[i].first << ", " << points[i].second << "\n";
205         }
206 
207         PngColor red(0xff, 0, 0, 0xff);
208         canvas.color = red;
209         for (unsigned i = 0; i < points.size(); ++i)
210             if (options.maxValue != 0 && (int)counts[i] >= options.maxValue)
211                 canvas.point(points[i].first, points[i].second);
212     }
213 
214     // Write grid on PngCanvas to output.
writeGrid()215     void writeGrid()
216     {
217         std::stringstream ss;
218         ss << options.outputFileName << gridNo << ".png";
219         canvas.write(ss.str().c_str());
220     }
221 
222     // Clear grid.
clearGrid()223     void clearGrid()
224     {
225         canvas.color = PngColor::WHITE();
226         canvas.filledRectangle(0, 0, canvas.width - 1, canvas.height - 1);
227     }
228 };
229 
230 // ==========================================================================
231 // Metafunctions
232 // ==========================================================================
233 
234 // ==========================================================================
235 // Functions
236 // ==========================================================================
237 
238 // --------------------------------------------------------------------------
239 // Function yesNo()                                                 [Options]
240 // --------------------------------------------------------------------------
241 
yesNo(bool b)242 char const * yesNo(bool b)
243 {
244     return b ? "YES" : "NO";
245 }
246 
247 // --------------------------------------------------------------------------
248 // Function print()                                                 [Options]
249 // --------------------------------------------------------------------------
250 
print(std::ostream & out,Options const & options)251 void print(std::ostream & out, Options const & options)
252 {
253     out << "__OPTIONS_____________________________________________________________________\n"
254         << "\n"
255         << "INPUT FILE       \t" << options.inputFileName << "\n"
256         << "OUTPUT FILE      \t" << options.outputFileName << "\n"
257         << "\n"
258         << "NUM COLS         \t" << options.numCols << "\n"
259         << "NUM ROWS         \t" << options.numRows << "\n"
260         << "\n"
261         << "PLOT WIDTH       \t" << options.plotWidth << "\n"
262         << "PLOT HEIGHT      \t" << options.plotHeight << "\n"
263         << "\n"
264         << "BORDER WIDTH     \t" << options.borderWidth << "\n"
265         << "SPACE            \t" << options.space << "\n"
266         << "\n"
267         << "MAX ROIS         \t" << options.maxRois << "\n"
268         << "MAX VALUE        \t" << options.maxValue << "\n"
269         << "\n";
270 }
271 
272 // --------------------------------------------------------------------------
273 // Function parseCommandLine()
274 // --------------------------------------------------------------------------
275 
276 // Parse command line with ArgumentParser class.
277 
278 seqan::ArgumentParser::ParseResult
parseCommandLine(Options & options,int argc,char const ** argv)279 parseCommandLine(Options & options, int argc, char const ** argv)
280 {
281     // Setup ArgumentParser.
282     seqan::ArgumentParser parser("roi_plot_thumbnails");
283     setCategory(parser, "NGS ROI Analysis");
284 
285     // Set short description, version, and date.
286     setShortDescription(parser, "Create plot grids for ROI file.");
287     setVersion(parser, SEQAN_APP_VERSION " [" SEQAN_REVISION "]");
288     setDate(parser, SEQAN_DATE);
289 
290     // Define usage line and long description.
291     addUsageLine(parser, "\\fB-if\\fP \\fIIN.roi\\fP \\fB-o\\fP \\fIOUT\\fP");
292 
293     addDescription(parser,
294                    "Create PNG images with plot grids to \\fIOUT${i}.png\\fP from ROI records "
295                    "in \\fIIN.roi\\fP.");
296 
297     // -----------------------------------------------------------------------
298     // General Options
299     // -----------------------------------------------------------------------
300 
301     addSection(parser, "General Options");
302 
303     addOption(parser, seqan::ArgParseOption("v", "verbose", "Verbose mode."));
304     addOption(parser, seqan::ArgParseOption("vv", "vverbose", "Very verbose mode."));
305 
306     // -----------------------------------------------------------------------
307     // Input / Output Options
308     // -----------------------------------------------------------------------
309 
310     addSection(parser, "Input / Output Parameters");
311 
312     addOption(parser, seqan::ArgParseOption("if", "input-file", "ROI to plot.",
313                                             seqan::ArgParseOption::INPUT_FILE));
314     setValidValues(parser, "input-file", seqan::RoiFileIn::getFileExtensions());
315     setRequired(parser, "input-file");
316 
317     addOption(parser, seqan::ArgParseOption("o", "output-prefix", "Prefix of output file.",
318                                             seqan::ArgParseOption::OUTPUT_FILE));
319     setRequired(parser, "output-prefix");
320 
321     // -----------------------------------------------------------------------
322     // Plot Configuration
323     // -----------------------------------------------------------------------
324 
325     addSection(parser, "PlotConfiguration");
326 
327 	addOption(parser, seqan::ArgParseOption("", "max-rois", "Maximal number of plots to create (0 for all).",
328                                             seqan::ArgParseOption::INTEGER, "NUM"));
329     setMinValue(parser, "max-rois", "0");
330     setDefaultValue(parser, "max-rois", "20000");
331 
332 	addOption(parser, seqan::ArgParseOption("", "max-value", "Fix maximal y value.  0 for no limit..",
333                                             seqan::ArgParseOption::INTEGER, "NUM"));
334     setMinValue(parser, "max-value", "0");
335     setDefaultValue(parser, "max-value", "0");
336 
337 	addOption(parser, seqan::ArgParseOption("", "num-cols", "Number of plots in one row.",
338                                             seqan::ArgParseOption::INTEGER, "COLS"));
339     setMinValue(parser, "num-cols", "1");
340     setDefaultValue(parser, "num-cols", "40");
341 
342 	addOption(parser, seqan::ArgParseOption("", "num-rows", "Number of plots in one column.",
343                                             seqan::ArgParseOption::INTEGER, "ROWS"));
344     setMinValue(parser, "num-rows", "1");
345     setDefaultValue(parser, "num-rows", "50");
346 
347 	addOption(parser, seqan::ArgParseOption("", "plot-height", "Height of one plot in px.",
348                                             seqan::ArgParseOption::INTEGER, "HEIGHT"));
349     setMinValue(parser, "plot-height", "1");
350     setDefaultValue(parser, "plot-height", "30");
351 
352 	addOption(parser, seqan::ArgParseOption("", "plot-width", "Width of one plot in px.",
353                                             seqan::ArgParseOption::INTEGER, "WIDTH"));
354     setMinValue(parser, "plot-width", "1");
355     setDefaultValue(parser, "plot-width", "30");
356 
357 	addOption(parser, seqan::ArgParseOption("", "border-width", "Border width.",
358                                             seqan::ArgParseOption::INTEGER, "WIDTH"));
359     setMinValue(parser, "border-width", "0");
360     setDefaultValue(parser, "border-width", "0");
361 
362 	addOption(parser, seqan::ArgParseOption("", "spacing", "Space between plots.",
363                                             seqan::ArgParseOption::INTEGER, "WIDTH"));
364     setDefaultValue(parser, "spacing", "2");
365 
366     // -----------------------------------------------------------------------
367     // Parsing and Value Extraction
368     // -----------------------------------------------------------------------
369 
370     // Parse command line.
371     seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv);
372 
373     // Only extract  options if the program will continue after parseCommandLine()
374     if (res != seqan::ArgumentParser::PARSE_OK)
375         return res;
376 
377     // Extract option values.
378 
379     options.verbosity = 1;
380     if (isSet(parser, "verbose"))
381         options.verbosity = 2;
382     if (isSet(parser, "vverbose"))
383         options.verbosity = 3;
384 
385     getOptionValue(options.inputFileName, parser, "input-file");
386     getOptionValue(options.outputFileName, parser, "output-prefix");
387 
388     getOptionValue(options.numCols, parser, "num-cols");
389     getOptionValue(options.numRows, parser, "num-rows");
390     getOptionValue(options.plotWidth, parser, "plot-width");
391     getOptionValue(options.plotHeight, parser, "plot-height");
392     getOptionValue(options.borderWidth, parser, "border-width");
393     getOptionValue(options.space, parser, "spacing");
394     getOptionValue(options.maxRois, parser, "max-rois");
395     getOptionValue(options.maxValue, parser, "max-value");
396 
397 	return seqan::ArgumentParser::PARSE_OK;
398 }
399 
main(int argc,char const ** argv)400 int main(int argc, char const ** argv)
401 {
402 	// Parse the command line.
403     Options options;
404     seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv);
405 
406     // If parsing was not successful then exit with code 1 if there were errors.  Otherwise, exit with code 0 (e.g. help
407     // was printed).
408     if (res != seqan::ArgumentParser::PARSE_OK)
409         return res == seqan::ArgumentParser::PARSE_ERROR;
410 
411     // Print program name and options.
412 
413     if (options.verbosity >= 1)
414     {
415         std::cerr << "ROI PLOT GRIDS\n"
416                   << "==============\n"
417                   << "\n";
418         print(std::cerr, options);
419     }
420 
421     // -----------------------------------------------------------------------
422     // Open input and output file
423     // -----------------------------------------------------------------------
424 
425     if (options.verbosity >= 1)
426         std::cerr << "__OPENING FILES_______________________________________________________________\n"
427                   << "\n";
428 
429     if (options.verbosity >= 1)
430          std::cerr << "Opening " << options.inputFileName << " ...";
431     seqan::RoiFileIn roiFileIn;
432     if (!open(roiFileIn, toCString(options.inputFileName)))
433 	{
434 		std::cerr << "\nERROR: Could not open " << options.inputFileName << "\n";
435         return 1;
436 	}
437     if (options.verbosity >= 1)
438         std::cerr << " OK\n";
439 
440     // Read header (actually, skip it).
441     seqan::RoiHeader roiHeader;
442     readHeader(roiHeader, roiFileIn);
443 
444     // -----------------------------------------------------------------------
445     // Create plot grids.
446     // -----------------------------------------------------------------------
447 
448     if (options.verbosity >= 1)
449         std::cerr << "\n__PLOTTING____________________________________________________________________\n"
450                   << "\n"
451                   << "WORKING...";
452 
453     Plotter plotter(options);
454 
455     seqan::RoiRecord record;
456     for (int i = 0; (options.maxRois == 0 || i < options.maxRois) && !atEnd(roiFileIn); ++i)
457     {
458         if (options.verbosity >= 2)
459             std::cerr << " " << i << "(" << length(record.count) << ", " << plotter.gridNo << ")";
460         readRecord(record, roiFileIn);
461 
462         plotter.plotGrid(record.count);
463     }
464 
465     if (options.verbosity >= 1)
466         std::cerr << " OK\n\nDone.\n";
467 
468     return 0;
469 }
470