X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=ccode.h;fp=ccode.h;h=456b735df524ff3eed9c2688be95f8537c0d0c86;hb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;hp=0000000000000000000000000000000000000000;hpb=1b73ff67c83892a025e597dabd9df6fe7b58206a;p=mothur.git diff --git a/ccode.h b/ccode.h new file mode 100644 index 0000000..456b735 --- /dev/null +++ b/ccode.h @@ -0,0 +1,93 @@ +#ifndef CCODE_H +#define CCODE_H + +/* + * ccode.h + * Mothur + * + * Created by westcott on 8/24/09. + * Copyright 2009 Schloss LAB. All rights reserved. + * + */ + +#include "chimera.h" +#include "dist.h" +#include "decalc.h" + +/***********************************************************/ +//This class was created using the algorythms described in the +// "Evaluating putative chimeric sequences from PCR-amplified products" paper +//by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez. + +/***********************************************************/ + +class Ccode : public Chimera { + + public: + Ccode(string, string, bool, string, int, int, string); //fasta, template, filter, mask, window, numWanted, outputDir + ~Ccode(); + + int getChimeras(Sequence* query); + Sequence print(ostream&, ostream&); + + #ifdef USE_MPI + Sequence print(MPI_File&, MPI_File&); + #endif + private: + + Dist* distCalc; + DeCalculator* decalc; + int iters, window, numWanted; + string fastafile, mapInfo; + + Sequence* querySeq; + + map spotMap; + map::iterator it; + + vector windows; //windows is the vector of window breaks for query + int windowSizes; //windowSizes is the size of the windows for query + map trim; //trim is the map containing the starting and ending positions for query + vector closest; //closest is a vector of sequence at are closest to query + vector averageRef; //averageRef is the average distance at each window for the references for query + vector averageQuery; //averageQuery is the average distance at each winow for the query for query + vector sumRef; //sumRef is the sum of distances at each window for the references for query + vector sumSquaredRef; //sumSquaredRef is the sum of squared distances at each window for the references for query + vector sumQuery; //sumQuery is the sum of distances at each window for the comparison of query to references for query + vector sumSquaredQuery; //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query + vector varRef; //varRef is the variance among references seqs at each window for query + vector varQuery; //varQuery is the variance among references and query at each window + vector sdRef; //sdRef is the standard deviation of references seqs at each window for query + vector sdQuery; //sdQuery is the standard deviation of references and query at each window + vector anova; //anova is the vector of anova scores for each window for query + int refCombo; //refCombo is the number of reference sequences combinations for query + vector isChimericConfidence; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits + vector isChimericTStudent; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits + vector isChimericANOVA; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits + + vector findClosest(Sequence*, int); + void removeBadReferenceSeqs(vector&); //removes sequences from closest that are to different of too similar to eachother. + void trimSequences(Sequence*); + vector findWindows(); + void getAverageRef(vector); //fills sumRef, averageRef, sumSquaredRef and refCombo. + void getAverageQuery (vector, Sequence*); //fills sumQuery, averageQuery, sumSquaredQuery. + void findVarianceRef (); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0. + void findVarianceQuery (); //fills varQuery and sdQuery + void determineChimeras (); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. + + int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch + float getT(int); + float getF(int); + + #ifdef USE_MPI + int printMapping(string&); + MPI_File outMap; + #endif + +}; + +/***********************************************************/ + +#endif + +