1 /* Quorum
2 * Copyright (C) 2012 Genome group at University of Maryland.
3 *
4 * This program is free software: you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation, either version 3 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <iostream>
19 #include <string>
20 #include <vector>
21 #include <sstream>
22
23 #include <jellyfish/mer_dna.hpp>
24 #include <jellyfish/file_header.hpp>
25 #include <jellyfish/err.hpp>
26 #include <jellyfish/thread_exec.hpp>
27 #include <jellyfish/stream_manager.hpp>
28 #include <jellyfish/whole_sequence_parser.hpp>
29 #include <jellyfish/large_hash_array.hpp>
30
31 #include <src/mer_database.hpp>
32 #include <src/create_database_cmdline.hpp>
33
34 namespace err = jellyfish::err;
35
36 static create_database_cmdline args;
37 typedef create_database_cmdline::error error;
38
39 using jellyfish::mer_dna;
40 typedef std::vector<const char*> file_vector;
41 typedef jellyfish::stream_manager<file_vector::const_iterator> stream_manager;
42 typedef jellyfish::whole_sequence_parser<stream_manager> read_parser;
43
44 class quality_mer_counter : public jellyfish::thread_exec {
45 hash_with_quality& ary_;
46 read_parser parser_;
47 const char qual_thresh_;
48
49 public:
quality_mer_counter(int nb_threads,hash_with_quality & ary,stream_manager & streams,char qual_thresh)50 quality_mer_counter(int nb_threads, hash_with_quality& ary, stream_manager& streams, char qual_thresh) :
51 ary_(ary),
52 parser_(4 * nb_threads, 100, 1, streams),
53 qual_thresh_(qual_thresh)
54 { }
55
start(int thid)56 virtual void start(int thid) {
57 mer_dna m, rm;
58 size_t counted_high = 0, counted_low = 0;
59
60 while(true) {
61 read_parser::job job(parser_);
62 if(job.is_empty()) break;
63
64 for(size_t i = 0; i < job->nb_filled; ++i) { // Process each read
65 std::string& seq = job->data[i].seq;
66 std::string& quals = job->data[i].qual;
67
68 auto base = seq.begin();
69 auto qual = quals.begin();
70 unsigned int low_len = 0; // Length of low quality stretch
71 unsigned int high_len = 0; // Length of high quality stretch
72 for( ; base != seq.end(); ++base, ++qual) {
73 int code = mer_dna::code(*base);
74 if(mer_dna::not_dna(code)) {
75 high_len = low_len = 0;
76 continue;
77 }
78 m.shift_left(code);
79 rm.shift_right(mer_dna::complement(code));
80 ++low_len;
81 if(*qual >= qual_thresh_)
82 ++high_len;
83 else
84 high_len = 0;
85 if(low_len >= mer_dna::k()) {
86 if(!ary_.add(m < rm ? m : rm, high_len >= mer_dna::k()))
87 throw std::runtime_error(err::msg() << "Hash is full");
88 counted_high += high_len >= mer_dna::k();
89 ++counted_low;
90 }
91 }
92 }
93 }
94 ary_.done();
95 }
96 };
97
main(int argc,char * argv[])98 int main(int argc, char *argv[])
99 {
100 database_header header;
101 header.fill_standard();
102 header.set_cmdline(argc, argv);
103
104 args.parse(argc, argv);
105 mer_dna::k(args.mer_arg);
106 if(!args.min_qual_value_given && !args.min_qual_char_given)
107 error("Either a min-qual-value or min-qual-char must be provided.");
108 if(args.min_qual_char_given && args.min_qual_char_arg.size() != 1)
109 error("The min-qual-char should be one ASCII character.");
110 char qual_thresh = args.min_qual_char_given ? args.min_qual_char_arg[0] : (char)args.min_qual_value_arg;
111 if(args.bits_arg < 1 || args.bits_arg > 63)
112 error("The number of bits should be between 1 and 63");
113 std::ofstream output(args.output_arg);
114 if(!output.good())
115 error() << "Failed to open output file '" << args.output_arg << "'.";
116
117 hash_with_quality ary(args.size_arg, 2 * mer_dna::k(), args.bits_arg,
118 args.threads_arg, args.reprobe_arg);
119 {
120 stream_manager streams(args.reads_arg.cbegin(), args.reads_arg.cend(), 1);
121 quality_mer_counter counter(args.threads_arg, ary, streams, qual_thresh);
122 counter.exec_join(args.threads_arg);
123 }
124
125 ary.write(output, &header);
126 output.close();
127
128 return 0;
129 }
130