/* Quorum
* Copyright (C) 2012 Genome group at University of Maryland.
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace err = jellyfish::err;
static create_database_cmdline args;
typedef create_database_cmdline::error error;
using jellyfish::mer_dna;
typedef std::vector file_vector;
typedef jellyfish::stream_manager stream_manager;
typedef jellyfish::whole_sequence_parser read_parser;
class quality_mer_counter : public jellyfish::thread_exec {
hash_with_quality& ary_;
read_parser parser_;
const char qual_thresh_;
public:
quality_mer_counter(int nb_threads, hash_with_quality& ary, stream_manager& streams, char qual_thresh) :
ary_(ary),
parser_(4 * nb_threads, 100, 1, streams),
qual_thresh_(qual_thresh)
{ }
virtual void start(int thid) {
mer_dna m, rm;
size_t counted_high = 0, counted_low = 0;
while(true) {
read_parser::job job(parser_);
if(job.is_empty()) break;
for(size_t i = 0; i < job->nb_filled; ++i) { // Process each read
std::string& seq = job->data[i].seq;
std::string& quals = job->data[i].qual;
auto base = seq.begin();
auto qual = quals.begin();
unsigned int low_len = 0; // Length of low quality stretch
unsigned int high_len = 0; // Length of high quality stretch
for( ; base != seq.end(); ++base, ++qual) {
int code = mer_dna::code(*base);
if(mer_dna::not_dna(code)) {
high_len = low_len = 0;
continue;
}
m.shift_left(code);
rm.shift_right(mer_dna::complement(code));
++low_len;
if(*qual >= qual_thresh_)
++high_len;
else
high_len = 0;
if(low_len >= mer_dna::k()) {
if(!ary_.add(m < rm ? m : rm, high_len >= mer_dna::k()))
throw std::runtime_error(err::msg() << "Hash is full");
counted_high += high_len >= mer_dna::k();
++counted_low;
}
}
}
}
ary_.done();
}
};
int main(int argc, char *argv[])
{
database_header header;
header.fill_standard();
header.set_cmdline(argc, argv);
args.parse(argc, argv);
mer_dna::k(args.mer_arg);
if(!args.min_qual_value_given && !args.min_qual_char_given)
error("Either a min-qual-value or min-qual-char must be provided.");
if(args.min_qual_char_given && args.min_qual_char_arg.size() != 1)
error("The min-qual-char should be one ASCII character.");
char qual_thresh = args.min_qual_char_given ? args.min_qual_char_arg[0] : (char)args.min_qual_value_arg;
if(args.bits_arg < 1 || args.bits_arg > 63)
error("The number of bits should be between 1 and 63");
std::ofstream output(args.output_arg);
if(!output.good())
error() << "Failed to open output file '" << args.output_arg << "'.";
hash_with_quality ary(args.size_arg, 2 * mer_dna::k(), args.bits_arg,
args.threads_arg, args.reprobe_arg);
{
stream_manager streams(args.reads_arg.cbegin(), args.reads_arg.cend(), 1);
quality_mer_counter counter(args.threads_arg, ary, streams, qual_thresh);
counter.exec_join(args.threads_arg);
}
ary.write(output, &header);
output.close();
return 0;
}