1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*- 2 // 3 // Copyright 2010-2018, Julian Catchen <jcatchen@illinois.edu> 4 // 5 // This file is part of Stacks. 6 // 7 // Stacks is free software: you can redistribute it and/or modify 8 // it under the terms of the GNU General Public License as published by 9 // the Free Software Foundation, either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // Stacks is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 // 17 // You should have received a copy of the GNU General Public License 18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>. 19 // 20 21 #ifndef __USTACKS_H__ 22 #define __USTACKS_H__ 23 24 #include "constants.h" 25 26 #ifdef _OPENMP 27 #include <omp.h> // OpenMP library 28 #endif 29 30 #include <getopt.h> // Process command-line options 31 #include <cmath> 32 #include <cstdlib> 33 #include <cstring> 34 #include <algorithm> 35 #include <utility> 36 37 #include <string> 38 39 #include <iostream> 40 #include <fstream> 41 #include <sstream> 42 #include <iomanip> // std::setprecision 43 44 #include <vector> 45 #include <map> 46 #include <unordered_map> 47 #include <queue> 48 using std::queue; 49 #include <set> 50 #include <unistd.h> 51 52 #include "config.h" 53 #include "kmers.h" 54 #include "utils.h" 55 #include "DNASeq.h" // Class for storing two-bit compressed DNA sequences 56 #include "stacks.h" // Major data structures for holding stacks 57 #include "mstack.h" 58 #include "mst.h" // Minimum spanning tree implementation 59 #include "models.h" // Contains maximum likelihood statistical models. 60 #include "FastaI.h" // Reading input files in FASTA format 61 #include "FastqI.h" // Reading input files in FASTQ format 62 #include "gzFasta.h" // Reading gzipped input files in FASTA format 63 #include "gzFastq.h" // Reading gzipped input files in FASTQ format 64 #include "aln_utils.h" 65 #include "GappedAln.h" 66 67 class HVal { 68 public: 69 vector<int> ids; 70 count()71 int count() { 72 return this->ids.size(); 73 } add_id(int id)74 int add_id(int id) { 75 this->ids.push_back(id); 76 return 0; 77 } 78 }; 79 80 typedef unordered_map<DNANSeq, HVal> DNASeqHashMap; 81 82 void help( void ); 83 void version( void ); 84 int parse_command_line(int, char**); 85 void load_radtags(string, DNASeqHashMap &, size_t &); 86 int load_seq_ids(vector<char *> &); 87 void reduce_radtags(DNASeqHashMap &, map<int, Stack *> &, map<int, Rem *> &, size_t &, size_t &); 88 int free_radtags_hash(DNASeqHashMap &, vector<DNANSeq *> &); 89 int populate_merged_tags(map<int, Stack *> &, map<int, MergedStack *> &); 90 void merge_stacks(map<int, MergedStack *> &, size_t &); 91 int call_consensus(map<int, MergedStack *> &, map<int, Stack *> &, map<int, Rem *> &, bool); 92 int call_alleles(MergedStack *, vector<DNANSeq *> &, vector<read_type> &); 93 int update_consensus(MergedStack *, map<int, Stack *> &, map<int, Rem *> &); 94 size_t merge_remainders(map<int, MergedStack *> &, map<int, Stack *> &, map<int, Rem *> &); 95 int search_for_gapped_remainders(map<int, MergedStack *> &, map<int, Stack *> &, map<int, Rem *> &); 96 size_t merge_gapped_remainders(map<int, MergedStack *> &, map<int, Stack *> &, map<int, Rem *> &); 97 int write_results(map<int, MergedStack *> &, map<int, Stack *> &, map<int, Rem *> &); 98 99 // 100 // Match MergedStacks using a k-mer hashing algorithm 101 // 102 int calc_kmer_distance(map<int, MergedStack *> &, int); 103 int search_for_gaps(map<int, MergedStack *> &); 104 int merge_gapped_alns(map<int, Stack *> &, map<int, Rem *> &, map<int, MergedStack *> &); 105 int edit_gapped_seqs(map<int, Stack *> &, map<int, Rem *> &, MergedStack *, vector<pair<char, uint> > &); 106 int edit_gaps(vector<pair<char, uint> > &, char *); 107 int dist(MergedStack *, MergedStack *, vector<pair<char, uint> > &); 108 bool rank_alignments(Aln, Aln); 109 // 110 // Calculate depth of coverage statistics for stacks 111 // 112 void calc_coverage_distribution(map<int, MergedStack *> &, double &, double &, double &, double &); 113 114 // 115 // Dealing with lumberjack (huge) stacks 116 // 117 size_t remove_repetitive_stacks(map<int, MergedStack *> &); 118 int deleverage(map<int, MergedStack *> &, set<int> &, int, vector<MergedStack *> &); 119 120 // 121 // Debugging 122 // 123 int dump_unique_tags(map<int, Stack *> &); 124 int dump_merged_tags(map<int, MergedStack *> &); 125 int dump_stack_graph(string, map<int, Stack *> &, map<int, MergedStack *> &, vector<int> &, map<int, map<int, double> > &, map<int, set<int> > &); 126 127 // 128 // Utilities 129 // 130 MergedStack *merge_tags(MergedStack *, MergedStack *, int); 131 MergedStack *merge_tags(map<int, MergedStack *> &, set<int> &, int); 132 MergedStack *merge_tags(map<int, MergedStack *> &, int *, int, int); 133 long double factorial(int); 134 135 #endif // __USTACKS_H__ 136