]> git.donarmstrong.com Git - mothur.git/blobdiff - ccode.h
added modify names parameter to set.dir
[mothur.git] / ccode.h
diff --git a/ccode.h b/ccode.h
index cced0b7d404b4145a437b677f37fa869bb8abb3f..456b735df524ff3eed9c2688be95f8537c0d0c86 100644 (file)
--- a/ccode.h
+++ b/ccode.h
@@ -14,6 +14,7 @@
 #include "dist.h"
 #include "decalc.h"
 
+/***********************************************************/
 //This class was created using the algorythms described in the 
 // "Evaluating putative chimeric sequences from PCR-amplified products" paper 
 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
 class Ccode : public Chimera {
        
        public:
-               Ccode(string, string);  
+               Ccode(string, string, bool, string, int, int, string);  //fasta, template, filter, mask, window, numWanted, outputDir
                ~Ccode();
                
-               void getChimeras();
-               void print(ostream&);
-               
-               void setCons(string c)          {}
-               void setQuantiles(string q) {}
-               
+               int getChimeras(Sequence* query);
+               Sequence print(ostream&, ostream&);
                
+               #ifdef USE_MPI
+               Sequence print(MPI_File&, MPI_File&);
+               #endif
        private:
        
                Dist* distCalc;
                DeCalculator* decalc;
-               int iters;
-               string fastafile, templateFile;
+               int iters, window, numWanted;
+               string fastafile, mapInfo;
                
+               Sequence* querySeq;
                
-               vector<linePair*> lines;
-               vector<linePair*> templateLines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
+               map<int, int> spotMap;
+               map<int, int>::iterator it;
                
-               vector<int> windows;
-               vector< vector<Sequence*> > closest;  //closest[0] is a vector of sequence at are closest to queryseqs[0]...
-               vector< vector<float> > averageRef;  //averageRef[0] is the average distance at each window for the references for querySeqs[0]
-               vector< vector<float> > averageQuery;  //averageQuery[0] is the average distance at each winow for the query for querySeqs[0]
+               vector<int>  windows; //windows is the vector of window breaks for query
+               int windowSizes;  //windowSizes is the size of the windows for query
+               map<int, int> trim;  //trim is the map containing the starting and ending positions for query
+               vector<SeqDist>  closest;  //closest is a vector of sequence at are closest to query
+               vector<float>  averageRef;  //averageRef is the average distance at each window for the references for query
+               vector<float>  averageQuery;  //averageQuery is the average distance at each winow for the query for query
+               vector<float>   sumRef;  //sumRef is the sum of distances at each window for the references for query
+               vector<float>   sumSquaredRef;  //sumSquaredRef is the sum of squared distances at each window for the references for query
+               vector<float> sumQuery;  //sumQuery is the sum of distances at each window for the comparison of query to references for query
+               vector<float>  sumSquaredQuery;  //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query
+               vector<float> varRef;  //varRef is the variance among references seqs at each window for query
+               vector<float> varQuery;  //varQuery is the variance among references and query at each window
+               vector<float> sdRef;  //sdRef is the standard deviation of references seqs at each window for query
+               vector<float> sdQuery;  //sdQuery is the standard deviation of references and query at each window
+               vector<float> anova;  //anova is the vector of anova scores for each window for query
+               int refCombo;  //refCombo is the number of reference sequences combinations for query
+               vector<bool>  isChimericConfidence;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericTStudent;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericANOVA;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
                
-               vector< vector<Sequence*> > findClosest(int, int, int); 
-               void removeBadReferenceSeqs(vector<Sequence*>&, int);  //removes sequences from closest that are to different of too similar to eachother. 
-               void trimSequences();
+               vector<SeqDist>  findClosest(Sequence*, int); 
+               void removeBadReferenceSeqs(vector<SeqDist>&);  //removes sequences from closest that are to different of too similar to eachother. 
+               void trimSequences(Sequence*);
                vector<int> findWindows();
-               vector<float> getAverageRef(vector<Sequence*>);
-               vector<float> getAverageQuery (vector<Sequence*>, int);
-               
+               void getAverageRef(vector<SeqDist>);            //fills sumRef, averageRef, sumSquaredRef and refCombo.
+               void getAverageQuery (vector<SeqDist>, Sequence*);      //fills sumQuery, averageQuery, sumSquaredQuery.
+               void findVarianceRef ();                                                //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+               void findVarianceQuery ();                                      //fills varQuery and sdQuery
+               void determineChimeras ();                                      //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
                
                int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
+               float getT(int); 
+               float getF(int); 
                
-               void createProcessesClosest();
-               
+               #ifdef USE_MPI
+               int printMapping(string&);
+               MPI_File outMap;
+               #endif
+
 };
 
 /***********************************************************/