X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=ccode.h;h=456b735df524ff3eed9c2688be95f8537c0d0c86;hp=6322292c12534fc44838255ed6490da4292bbf46;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=40873e9a7e12d248ebb86e75ca96238c7e7b9701 diff --git a/ccode.h b/ccode.h index 6322292..456b735 100644 --- a/ccode.h +++ b/ccode.h @@ -14,6 +14,7 @@ #include "dist.h" #include "decalc.h" +/***********************************************************/ //This class was created using the algorythms described in the // "Evaluating putative chimeric sequences from PCR-amplified products" paper //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez. @@ -23,67 +24,66 @@ class Ccode : public Chimera { public: - Ccode(string, string); + Ccode(string, string, bool, string, int, int, string); //fasta, template, filter, mask, window, numWanted, outputDir ~Ccode(); - void getChimeras(); - void print(ostream&); - - void setCons(string c) {} - void setQuantiles(string q) {} - + int getChimeras(Sequence* query); + Sequence print(ostream&, ostream&); + #ifdef USE_MPI + Sequence print(MPI_File&, MPI_File&); + #endif private: Dist* distCalc; DeCalculator* decalc; - int iters; - string fastafile, templateFile; + int iters, window, numWanted; + string fastafile, mapInfo; + Sequence* querySeq; - vector lines; - vector templateLines; - vector querySeqs; - vector templateSeqs; - vector< map > spotMap; + map spotMap; map::iterator it; - vector< vector > windows; //windows[0] is the vector of window breaks for querySeqs[0] - vector windowSizes; //windowSizes[0] is the size of the windows for querySeqs[0] - vector< map > trim; //trim[0] is the map containing the starting and ending positions for querySeqs[0] set of seqs - vector< vector > closest; //closest[0] is a vector of sequence at are closest to queryseqs[0]... - vector< vector > averageRef; //averageRef[0] is the average distance at each window for the references for querySeqs[0] - vector< vector > averageQuery; //averageQuery[0] is the average distance at each winow for the query for querySeqs[0] - vector< vector > sumRef; //sumRef[0] is the sum of distances at each window for the references for querySeqs[0] - vector< vector > sumSquaredRef; //sumSquaredRef[0] is the sum of squared distances at each window for the references for querySeqs[0] - vector< vector > sumQuery; //sumQuery[0] is the sum of distances at each window for the comparison of query to references for querySeqs[0] - vector< vector > sumSquaredQuery; //sumSquaredQuery[0] is the sum of squared distances at each window for the comparison of query to references for querySeqs[0] - vector< vector > varRef; //varRef[0] is the variance among references seqs at each window for querySeqs[0] - vector< vector > varQuery; //varQuery[0] is the variance among references and querySeqs[0] at each window - vector< vector > sdRef; //sdRef[0] is the standard deviation of references seqs at each window for querySeqs[0] - vector< vector > sdQuery; //sdQuery[0] is the standard deviation of references and querySeqs[0] at each window - vector< vector > anova; //anova[0] is the vector of anova scores for each window for querySeqs[0] - vector refCombo; //refCombo[0] is the number of reference sequences combinations for querySeqs[0] - vector< vector > isChimericConfidence; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits - vector< vector > isChimericTStudent; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits - vector< vector > isChimericANOVA; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits + vector windows; //windows is the vector of window breaks for query + int windowSizes; //windowSizes is the size of the windows for query + map trim; //trim is the map containing the starting and ending positions for query + vector closest; //closest is a vector of sequence at are closest to query + vector averageRef; //averageRef is the average distance at each window for the references for query + vector averageQuery; //averageQuery is the average distance at each winow for the query for query + vector sumRef; //sumRef is the sum of distances at each window for the references for query + vector sumSquaredRef; //sumSquaredRef is the sum of squared distances at each window for the references for query + vector sumQuery; //sumQuery is the sum of distances at each window for the comparison of query to references for query + vector sumSquaredQuery; //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query + vector varRef; //varRef is the variance among references seqs at each window for query + vector varQuery; //varQuery is the variance among references and query at each window + vector sdRef; //sdRef is the standard deviation of references seqs at each window for query + vector sdQuery; //sdQuery is the standard deviation of references and query at each window + vector anova; //anova is the vector of anova scores for each window for query + int refCombo; //refCombo is the number of reference sequences combinations for query + vector isChimericConfidence; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits + vector isChimericTStudent; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits + vector isChimericANOVA; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits - vector< vector > findClosest(int, int, int); - void removeBadReferenceSeqs(vector&, int); //removes sequences from closest that are to different of too similar to eachother. - void trimSequences(int); - vector findWindows(int); - void getAverageRef(vector, int); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i]. - void getAverageQuery (vector, int); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i]. - void findVarianceRef (int); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0. - void findVarianceQuery (int); //fills varQuery[i] and sdQuery[i] - void determineChimeras (int); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i]. + vector findClosest(Sequence*, int); + void removeBadReferenceSeqs(vector&); //removes sequences from closest that are to different of too similar to eachother. + void trimSequences(Sequence*); + vector findWindows(); + void getAverageRef(vector); //fills sumRef, averageRef, sumSquaredRef and refCombo. + void getAverageQuery (vector, Sequence*); //fills sumQuery, averageQuery, sumSquaredQuery. + void findVarianceRef (); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. + void findVarianceQuery (); //fills varQuery and sdQuery + void determineChimeras (); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch float getT(int); float getF(int); - void createProcessesClosest(); - + #ifdef USE_MPI + int printMapping(string&); + MPI_File outMap; + #endif + }; /***********************************************************/