1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2011-2014, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 #ifndef __KMER_FILTER_H__
22 #define __KMER_FILTER_H__
23 
24 #include "constants.h"
25 
26 #include <cstdlib>
27 #include <getopt.h> // Process command-line options
28 #include <dirent.h> // Open/Read contents of a directory
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <cstring>
32 #include <cmath>
33 #include <algorithm>
34 #include <iostream>
35 #include <fstream>
36 #include <sstream>
37 
38 #include <vector>
39 #include <map>
40 #include <set>
41 #include <utility>
42 #include <unordered_map>
43 
44 #ifdef HAVE_SPARSEHASH
45 #include <sparsehash/sparse_hash_map>
46 using google::sparse_hash_map;
47 #endif
48 
49 #include "clean.h"
50 #include "utils.h"
51 #include "kmers.h"
52 #include "write.h"
53 #include "BustardI.h"   // Reading input files in Tab-separated Bustard format
54 #include "FastaI.h"     // Reading input files in FASTA format
55 #include "FastqI.h"     // Reading input files in FASTQ format
56 #include "gzFasta.h"    // Reading gzipped input files in FASTA format
57 #include "gzFastq.h"    // Reading gzipped input files in FASTQ format
58 
59 #ifdef HAVE_SPARSEHASH
60 typedef sparse_hash_map<char *, long, hash_charptr, eqstr> SeqKmerHash;
61 #else
62 typedef unordered_map<char *, long, hash_charptr, eqstr> SeqKmerHash;
63 #endif
64 
65 void help( void );
66 void version( void );
67 int  parse_command_line(int, char**);
68 int  build_file_list(vector<string> &, vector<pair<string, string> > &);
69 int  process_reads(string, string, SeqKmerHash &, map<string, long> &);
70 int  process_paired_reads(string, string, string, string, SeqKmerHash &, map<string, long> &);
71 int  print_results(map<string, map<string, long> > &);
72 
73 //
74 // Functions to normalize read depth
75 //
76 int  normalize_reads(string, string, SeqKmerHash &, vector<char *> &, map<string, long> &);
77 int  normalize_paired_reads(string, string, string, string, SeqKmerHash &, vector<char *> &, map<string, long> &);
78 bool normalize_kmer_lookup(SeqKmerHash &, char *, char *, int, vector<char *> &);
79 
80 //
81 // Functions for finding and removing reads with rare kmers
82 //
83 int  populate_kmers(vector<pair<string, string> > &, vector<pair<string, string> > &, SeqKmerHash &, vector<char *> &);
84 int  process_file_kmers(string, SeqKmerHash &, vector<char *> &);
85 int  generate_kmer_dist(SeqKmerHash &);
86 int  calc_kmer_median(SeqKmerHash &, double &, double &);
87 int  kmer_map_cmp(pair<char *, long>, pair<char *, long>);
88 int  kmer_lookup(SeqKmerHash &, char *, char *, int, int &, int &);
89 int  free_kmer_hash(SeqKmerHash &, vector<char *> &);
90 
91 int  read_kmer_freq(string, SeqKmerHash &, vector<char *> &);
92 int  write_kmer_freq(string, SeqKmerHash &);
93 
94 #endif // __KMER_FILTER_H__
95