1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #ifndef TRIM_STAT_H
19 #define TRIM_STAT_H
20 
21 #include "runtime.H"
22 
23 #include <vector>
24 
25 
26 class trimStat {
27 public:
trimStat()28   trimStat() {
29     nReads = 0;
30     nBases = 0;
31   };
32 
33   trimStat &operator+=(uint32 bases) {
34     nReads += 1;
35     nBases += bases;
36 
37     histo.push_back(bases);
38 
39     return(*this);
40   };
41 
generatePlots(char const * outputPrefix,char const * outputName,uint32 binwidth)42   void       generatePlots(char const *outputPrefix, char const *outputName, uint32 binwidth) {
43     char  N[FILENAME_MAX];
44     FILE *F;
45 
46     snprintf(N, FILENAME_MAX, "%s.%s.dat", outputPrefix, outputName);
47     F = AS_UTL_openOutputFile(N);
48     for (uint64 ii=0; ii<histo.size(); ii++)
49       fprintf(F, F_U32"\n", histo[ii]);
50     AS_UTL_closeFile(F, N);
51 
52     snprintf(N, FILENAME_MAX, "%s.%s.gp", outputPrefix, outputName);
53     F = AS_UTL_openOutputFile(N);
54     fprintf(F, "set title '%s'\n", outputName);
55     fprintf(F, "set xlabel 'length, bin width = %u'\n", binwidth);
56     fprintf(F, "set ylabel 'number'\n");
57     fprintf(F, "\n");
58     fprintf(F, "binwidth=%u\n", binwidth);
59     fprintf(F, "set boxwidth binwidth\n");
60     fprintf(F, "bin(x,width) = width*floor(x/width) + binwidth/2.0\n");
61     fprintf(F, "\n");
62     fprintf(F, "set terminal png size 1024,1024\n");
63     fprintf(F, "set output '%s.%s.png'\n", outputPrefix, outputName);
64     fprintf(F, "plot [] [0:] '%s.%s.dat' using (bin($1,binwidth)):(1.0) smooth freq with boxes title ''\n", outputPrefix, outputName);
65     AS_UTL_closeFile(F, N);
66 
67     snprintf(N, FILENAME_MAX, "gnuplot %s.%s.gp > /dev/null 2>&1", outputPrefix, outputName);
68 
69     system(N);
70   };
71 
72   uint32               nReads;
73   uint64               nBases;
74 
75   std::vector<uint32>  histo;
76 };
77 
78 #endif  //  TRIM_STAT_H
79