1 /*  This file is part of Jellyfish.
2 
3     Jellyfish is free software: you can redistribute it and/or modify
4     it under the terms of the GNU General Public License as published by
5     the Free Software Foundation, either version 3 of the License, or
6     (at your option) any later version.
7 
8     Jellyfish is distributed in the hope that it will be useful,
9     but WITHOUT ANY WARRANTY; without even the implied warranty of
10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11     GNU General Public License for more details.
12 
13     You should have received a copy of the GNU General Public License
14     along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 #include <config.h>
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <string.h>
21 #include <iostream>
22 #include <fstream>
23 #include <vector>
24 
25 #include <jellyfish/err.hpp>
26 #include <jellyfish/misc.hpp>
27 #include <jellyfish/fstream_default.hpp>
28 #include <jellyfish/jellyfish.hpp>
29 #include <sub_commands/histo_main_cmdline.hpp>
30 
31 namespace err = jellyfish::err;
32 
33 template<typename reader_type>
compute_histo(reader_type & reader,const uint64_t base,const uint64_t ceil,uint64_t * histo,const uint64_t nb_buckets,const uint64_t inc)34 void compute_histo(reader_type& reader, const uint64_t base, const uint64_t ceil,
35                    uint64_t* histo, const uint64_t nb_buckets, const uint64_t inc) {
36   while(reader.next()) {
37     if(reader.val() < base)
38       ++histo[0];
39     else if(reader.val() > ceil)
40       ++histo[nb_buckets - 1];
41     else
42       ++histo[(reader.val() - base) / inc];
43   }
44 }
45 
46 
histo_main(int argc,char * argv[])47 int histo_main(int argc, char *argv[])
48 {
49   histo_main_cmdline args(argc, argv);
50 
51   std::ifstream is(args.db_arg);
52   if(!is.good())
53     err::die(err::msg() << "Failed to open input file '" << args.db_arg << "'");
54   jellyfish::file_header header;
55   header.read(is);
56   jellyfish::mer_dna::k(header.key_len() / 2);
57 
58   if(args.high_arg < args.low_arg)
59     histo_main_cmdline::error("High count value must be >= to low count value");
60   ofstream_default out(args.output_given ? args.output_arg : 0, std::cout);
61   if(!out.good())
62     err::die(err::msg() << "Error opening output file '" << args.output_arg << "'");
63 
64   const uint64_t base = args.increment_arg >= args.low_arg ? 0 : args.low_arg - args.increment_arg;
65   const uint64_t ceil = args.high_arg + args.increment_arg;
66   const uint64_t inc  = args.increment_arg;
67 
68   const uint64_t nb_buckets  = (ceil + inc - base) / inc;
69   uint64_t*      histo       = new uint64_t[nb_buckets];
70   memset(histo, '\0', sizeof(uint64_t) * nb_buckets);
71 
72   if(!header.format().compare(binary_dumper::format)) {
73     binary_reader reader(is, &header);
74     compute_histo(reader, base, ceil, histo, nb_buckets, inc);
75   } else if(!header.format().compare(text_dumper::format)) {
76     text_reader reader(is, &header);
77     compute_histo(reader, base, ceil, histo, nb_buckets, inc);
78   } else {
79     err::die(err::msg() << "Unknown format '" << header.format() << "'");
80   }
81 
82   for(uint64_t i = 0, col = base; i < nb_buckets; ++i, col += inc)
83     if(histo[i] > 0 || args.full_flag)
84       out << col << " " << histo[i] << "\n";
85 
86   delete [] histo;
87   out.close();
88 
89   return 0;
90 }
91