1 #ifndef CCODE_H 2 #define CCODE_H 3 4 /* 5 * ccode.h 6 * Mothur 7 * 8 * Created by westcott on 8/24/09. 9 * Copyright 2009 Schloss LAB. All rights reserved. 10 * 11 */ 12 13 #include "mothurchimera.h" 14 #include "calculator.h" 15 #include "decalc.h" 16 17 /***********************************************************/ 18 //This class was created using the algorithms described in the 19 // "Evaluating putative chimeric sequences from PCR-amplified products" paper 20 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez. 21 22 /***********************************************************/ 23 24 class Ccode : public MothurChimera { 25 26 public: 27 Ccode(string, string, bool, string, int, int, string); //fasta, template, filter, mask, window, numWanted, outputDir 28 ~Ccode(); 29 30 int getChimeras(Sequence* query); 31 Sequence print(ostream&, ostream&); 32 33 private: 34 35 DistCalc* distCalc; 36 DeCalculator* decalc; 37 int iters, window, numWanted; 38 string fastafile, mapInfo; 39 40 Sequence* querySeq; 41 42 map<int, int> spotMap; 43 map<int, int>::iterator it; 44 45 vector<int> windows; //windows is the vector of window breaks for query 46 int windowSizes; //windowSizes is the size of the windows for query 47 map<int, int> trim; //trim is the map containing the starting and ending positions for query 48 vector<SeqDist> closest; //closest is a vector of sequence at are closest to query 49 vector<float> averageRef; //averageRef is the average distance at each window for the references for query 50 vector<float> averageQuery; //averageQuery is the average distance at each winow for the query for query 51 vector<float> sumRef; //sumRef is the sum of distances at each window for the references for query 52 vector<float> sumSquaredRef; //sumSquaredRef is the sum of squared distances at each window for the references for query 53 vector<float> sumQuery; //sumQuery is the sum of distances at each window for the comparison of query to references for query 54 vector<float> sumSquaredQuery; //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query 55 vector<float> varRef; //varRef is the variance among references seqs at each window for query 56 vector<float> varQuery; //varQuery is the variance among references and query at each window 57 vector<float> sdRef; //sdRef is the standard deviation of references seqs at each window for query 58 vector<float> sdQuery; //sdQuery is the standard deviation of references and query at each window 59 vector<float> anova; //anova is the vector of anova scores for each window for query 60 int refCombo; //refCombo is the number of reference sequences combinations for query 61 vector<bool> isChimericConfidence; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits 62 vector<bool> isChimericTStudent; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits 63 vector<bool> isChimericANOVA; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits 64 65 vector<SeqDist> findClosest(Sequence*, int); 66 void removeBadReferenceSeqs(vector<SeqDist>&); //removes sequences from closest that are to different of too similar to eachother. 67 void trimSequences(Sequence*); 68 vector<int> findWindows(); 69 void getAverageRef(vector<SeqDist>); //fills sumRef, averageRef, sumSquaredRef and refCombo. 70 void getAverageQuery (vector<SeqDist>, Sequence*); //fills sumQuery, averageQuery, sumSquaredQuery. 71 void findVarianceRef (); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. 72 void findVarianceQuery (); //fills varQuery and sdQuery 73 void determineChimeras (); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. 74 75 int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch 76 float getT(int); 77 float getF(int); 78 }; 79 80 /***********************************************************/ 81 82 #endif 83 84 85