1 /* This file is part of Jellyfish.
2
3 Jellyfish is free software: you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation, either version 3 of the License, or
6 (at your option) any later version.
7
8 Jellyfish is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with Jellyfish. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include <config.h>
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <string.h>
21 #include <iostream>
22 #include <fstream>
23 #include <vector>
24
25 #include <jellyfish/err.hpp>
26 #include <jellyfish/misc.hpp>
27 #include <jellyfish/fstream_default.hpp>
28 #include <jellyfish/jellyfish.hpp>
29 #include <sub_commands/histo_main_cmdline.hpp>
30
31 namespace err = jellyfish::err;
32
33 template<typename reader_type>
compute_histo(reader_type & reader,const uint64_t base,const uint64_t ceil,uint64_t * histo,const uint64_t nb_buckets,const uint64_t inc)34 void compute_histo(reader_type& reader, const uint64_t base, const uint64_t ceil,
35 uint64_t* histo, const uint64_t nb_buckets, const uint64_t inc) {
36 while(reader.next()) {
37 if(reader.val() < base)
38 ++histo[0];
39 else if(reader.val() > ceil)
40 ++histo[nb_buckets - 1];
41 else
42 ++histo[(reader.val() - base) / inc];
43 }
44 }
45
46
histo_main(int argc,char * argv[])47 int histo_main(int argc, char *argv[])
48 {
49 histo_main_cmdline args(argc, argv);
50
51 std::ifstream is(args.db_arg);
52 if(!is.good())
53 err::die(err::msg() << "Failed to open input file '" << args.db_arg << "'");
54 jellyfish::file_header header;
55 header.read(is);
56 jellyfish::mer_dna::k(header.key_len() / 2);
57
58 if(args.high_arg < args.low_arg)
59 histo_main_cmdline::error("High count value must be >= to low count value");
60 ofstream_default out(args.output_given ? args.output_arg : 0, std::cout);
61 if(!out.good())
62 err::die(err::msg() << "Error opening output file '" << args.output_arg << "'");
63
64 const uint64_t base = args.increment_arg >= args.low_arg ? 0 : args.low_arg - args.increment_arg;
65 const uint64_t ceil = args.high_arg + args.increment_arg;
66 const uint64_t inc = args.increment_arg;
67
68 const uint64_t nb_buckets = (ceil + inc - base) / inc;
69 uint64_t* histo = new uint64_t[nb_buckets];
70 memset(histo, '\0', sizeof(uint64_t) * nb_buckets);
71
72 if(!header.format().compare(binary_dumper::format)) {
73 binary_reader reader(is, &header);
74 compute_histo(reader, base, ceil, histo, nb_buckets, inc);
75 } else if(!header.format().compare(text_dumper::format)) {
76 text_reader reader(is, &header);
77 compute_histo(reader, base, ceil, histo, nb_buckets, inc);
78 } else {
79 err::die(err::msg() << "Unknown format '" << header.format() << "'");
80 }
81
82 for(uint64_t i = 0, col = base; i < nb_buckets; ++i, col += inc)
83 if(histo[i] > 0 || args.full_flag)
84 out << col << " " << histo[i] << "\n";
85
86 delete [] histo;
87 out.close();
88
89 return 0;
90 }
91