1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- 2 // 3 // Copyright 2011-2014, Julian Catchen <jcatchen@uoregon.edu> 4 // 5 // This file is part of Stacks. 6 // 7 // Stacks is free software: you can redistribute it and/or modify 8 // it under the terms of the GNU General Public License as published by 9 // the Free Software Foundation, either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // Stacks is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 // 17 // You should have received a copy of the GNU General Public License 18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>. 19 // 20 21 #ifndef __KMER_FILTER_H__ 22 #define __KMER_FILTER_H__ 23 24 #include "constants.h" 25 26 #include <cstdlib> 27 #include <getopt.h> // Process command-line options 28 #include <dirent.h> // Open/Read contents of a directory 29 #include <sys/types.h> 30 #include <sys/stat.h> 31 #include <cstring> 32 #include <cmath> 33 #include <algorithm> 34 #include <iostream> 35 #include <fstream> 36 #include <sstream> 37 38 #include <vector> 39 #include <map> 40 #include <set> 41 #include <utility> 42 #include <unordered_map> 43 44 #ifdef HAVE_SPARSEHASH 45 #include <sparsehash/sparse_hash_map> 46 using google::sparse_hash_map; 47 #endif 48 49 #include "clean.h" 50 #include "utils.h" 51 #include "kmers.h" 52 #include "write.h" 53 #include "BustardI.h" // Reading input files in Tab-separated Bustard format 54 #include "FastaI.h" // Reading input files in FASTA format 55 #include "FastqI.h" // Reading input files in FASTQ format 56 #include "gzFasta.h" // Reading gzipped input files in FASTA format 57 #include "gzFastq.h" // Reading gzipped input files in FASTQ format 58 59 #ifdef HAVE_SPARSEHASH 60 typedef sparse_hash_map<char *, long, hash_charptr, eqstr> SeqKmerHash; 61 #else 62 typedef unordered_map<char *, long, hash_charptr, eqstr> SeqKmerHash; 63 #endif 64 65 void help( void ); 66 void version( void ); 67 int parse_command_line(int, char**); 68 int build_file_list(vector<string> &, vector<pair<string, string> > &); 69 int process_reads(string, string, SeqKmerHash &, map<string, long> &); 70 int process_paired_reads(string, string, string, string, SeqKmerHash &, map<string, long> &); 71 int print_results(map<string, map<string, long> > &); 72 73 // 74 // Functions to normalize read depth 75 // 76 int normalize_reads(string, string, SeqKmerHash &, vector<char *> &, map<string, long> &); 77 int normalize_paired_reads(string, string, string, string, SeqKmerHash &, vector<char *> &, map<string, long> &); 78 bool normalize_kmer_lookup(SeqKmerHash &, char *, char *, int, vector<char *> &); 79 80 // 81 // Functions for finding and removing reads with rare kmers 82 // 83 int populate_kmers(vector<pair<string, string> > &, vector<pair<string, string> > &, SeqKmerHash &, vector<char *> &); 84 int process_file_kmers(string, SeqKmerHash &, vector<char *> &); 85 int generate_kmer_dist(SeqKmerHash &); 86 int calc_kmer_median(SeqKmerHash &, double &, double &); 87 int kmer_map_cmp(pair<char *, long>, pair<char *, long>); 88 int kmer_lookup(SeqKmerHash &, char *, char *, int, int &, int &); 89 int free_kmer_hash(SeqKmerHash &, vector<char *> &); 90 91 int read_kmer_freq(string, SeqKmerHash &, vector<char *> &); 92 int write_kmer_freq(string, SeqKmerHash &); 93 94 #endif // __KMER_FILTER_H__ 95