X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=pintail.h;h=59d4feeec7fa3743a059b331db524b0df3fe48cf;hb=aa9238c0a9e6e7aa0ed8b8b606b08ad4fd7dcfe3;hp=bae39d7e3d7cb5654ca5e4fd1bfb09119a994758;hpb=348a7bac1d3c5d17ae0e4a93b78f69f4e200a190;p=mothur.git diff --git a/pintail.h b/pintail.h index bae39d7..59d4fee 100644 --- a/pintail.h +++ b/pintail.h @@ -12,7 +12,9 @@ #include "chimera.h" #include "dist.h" +#include "decalc.h" +/***********************************************************/ //This class was created using the algorythms described in the // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1. @@ -22,56 +24,55 @@ class Pintail : public Chimera { public: - Pintail(string); + Pintail(string, string); ~Pintail(); - void getChimeras(); - void print(ostream&); + int getChimeras(Sequence*); + int print(ostream&, ostream&); + + void setCons(string c) { consfile = c; } + void setQuantiles(string q) { quanfile = q; } private: - struct linePair { - int start; - int end; - linePair(int i, int j) : start(i), end(j) {} - }; - - - Dist* distCalculator; - string fastafile; + Dist* distcalculator; + DeCalculator* decalc; int iters; - vector lines; - vector querySeqs; - vector templateSeqs; + string fastafile, consfile; - map bestfit; //maps a query sequence to its most similiar sequence in the template - map::iterator itBest; + vector templateLines; + Sequence* querySeq; + + Sequence* bestfit; //closest match to query in template - map > obsDistance; //maps a query sequence to its observed distance at each window - map > expectedDistance; //maps a query sequence to its expected distance at each window - map >::iterator itObsDist; - map >::iterator itExpDist; + vector obsDistance; //obsDistance is the vector of observed distances for query + vector expectedDistance; //expectedDistance is the vector of expected distances for query + float deviation; //deviation is the percentage of mismatched pairs over the whole seq between query and its best match. + vector windowsForeachQuery; // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window. + //this is needed so you can move by bases and not just spots in the alignment + + int windowSizes; //windowSizes = window size of query + vector windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0] - vector averageProbability; //Qav - map seqCoef; //maps a sequence to its coefficient - map DE; //maps a sequence to its deviation - map::iterator itCoef; + map trimmed; //trimmed = start and stop of trimmed sequences for query + map::iterator it; - vector readSeqs(string); - vector findQav(vector); - vector calcFreq(vector); - map getCoef(vector); + vector Qav; //Qav is the vector of average variablility for query + float seqCoef; //seqCoef is the coeff for query + float DE; //DE is the deviaation for query + vector probabilityProfile; + vector< vector > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2... + vector< vector > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2... + set h; + string mergedFilterString; - void findPairs(int, int); - void calcObserved(int, int); - void calcExpected(int, int); - void calcDE(int, int); - - void createProcessesPairs(); - void createProcessesObserved(); - void createProcessesExpected(); - void createProcessesDE(); + + vector readFreq(); + Sequence* findPairs(Sequence*); + + void createProcessesQuan(); + int doPrep(); };