1 #ifndef CCODE_H
2 #define CCODE_H
3 
4 /*
5  *  ccode.h
6  *  Mothur
7  *
8  *  Created by westcott on 8/24/09.
9  *  Copyright 2009 Schloss LAB. All rights reserved.
10  *
11  */
12 
13 #include "mothurchimera.h"
14 #include "calculator.h"
15 #include "decalc.h"
16 
17 /***********************************************************/
18 //This class was created using the algorithms described in the
19 // "Evaluating putative chimeric sequences from PCR-amplified products" paper
20 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
21 
22 /***********************************************************/
23 
24 class Ccode : public MothurChimera {
25 
26 	public:
27 		Ccode(string, string, bool, string, int, int, string);	//fasta, template, filter, mask, window, numWanted, outputDir
28 		~Ccode();
29 
30 		int getChimeras(Sequence* query);
31 		Sequence print(ostream&, ostream&);
32 
33 	private:
34 
35 		DistCalc* distCalc;
36 		DeCalculator* decalc;
37 		int iters, window, numWanted;
38 		string fastafile, mapInfo;
39 
40 		Sequence* querySeq;
41 
42 		map<int, int> spotMap;
43 		map<int, int>::iterator it;
44 
45 		vector<int>  windows; //windows is the vector of window breaks for query
46 		int windowSizes;  //windowSizes is the size of the windows for query
47 		map<int, int> trim;  //trim is the map containing the starting and ending positions for query
48 		vector<SeqDist>  closest;  //closest is a vector of sequence at are closest to query
49 		vector<float>  averageRef;  //averageRef is the average distance at each window for the references for query
50 		vector<float>  averageQuery;  //averageQuery is the average distance at each winow for the query for query
51 		vector<float>   sumRef;  //sumRef is the sum of distances at each window for the references for query
52 		vector<float>   sumSquaredRef;  //sumSquaredRef is the sum of squared distances at each window for the references for query
53 		vector<float> sumQuery;  //sumQuery is the sum of distances at each window for the comparison of query to references for query
54 		vector<float>  sumSquaredQuery;  //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query
55 		vector<float> varRef;  //varRef is the variance among references seqs at each window for query
56 		vector<float> varQuery;  //varQuery is the variance among references and query at each window
57 		vector<float> sdRef;  //sdRef is the standard deviation of references seqs at each window for query
58 		vector<float> sdQuery;  //sdQuery is the standard deviation of references and query at each window
59 		vector<float> anova;  //anova is the vector of anova scores for each window for query
60 		int refCombo;  //refCombo is the number of reference sequences combinations for query
61 		vector<bool>  isChimericConfidence;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
62 		vector<bool>  isChimericTStudent;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
63 		vector<bool>  isChimericANOVA;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
64 
65 		vector<SeqDist>  findClosest(Sequence*, int);
66 		void removeBadReferenceSeqs(vector<SeqDist>&);  //removes sequences from closest that are to different of too similar to eachother.
67 		void trimSequences(Sequence*);
68 		vector<int> findWindows();
69 		void getAverageRef(vector<SeqDist>);		//fills sumRef, averageRef, sumSquaredRef and refCombo.
70 		void getAverageQuery (vector<SeqDist>, Sequence*);	//fills sumQuery, averageQuery, sumSquaredQuery.
71 		void findVarianceRef ();						//fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
72 		void findVarianceQuery ();					//fills varQuery and sdQuery
73 		void determineChimeras ();					//fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
74 
75 		int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
76 		float getT(int);
77 		float getF(int);
78 };
79 
80 /***********************************************************/
81 
82 #endif
83 
84 
85