#include "chimera.h"
#include "dist.h"
+#include "decalc.h"
+/***********************************************************/
//This class was created using the algorythms described in the
// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
//by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
class Pintail : public Chimera {
public:
- Pintail(string);
+ Pintail(string, string, bool, int, string, string, string, int, int, string); //fastafile, templatefile, filter, processors, mask, conservation, quantile, window, increment, outputDir)
~Pintail();
- void getChimeras();
- void print(ostream&);
+ int getChimeras(Sequence*);
+ int print(ostream&, ostream&);
+
+ void setCons(string c) { consfile = c; }
+ void setQuantiles(string q) { quanfile = q; }
+ #ifdef USE_MPI
+ int print(MPI_File&, MPI_File&);
+ #endif
private:
- struct linePair {
- int start;
- int end;
- linePair(int i, int j) : start(i), end(j) {}
- };
-
-
- Dist* distCalculator;
- string fastafile;
- int iters;
- vector<linePair*> lines;
- vector<Sequence*> querySeqs;
- vector<Sequence*> templateSeqs;
+ Dist* distcalculator;
+ DeCalculator* decalc;
+ int iters, window, increment, processors;
+ string fastafile, quanfile, consfile;
- map<Sequence*, Sequence*> bestfit; //maps a query sequence to its most similiar sequence in the template
- map<Sequence*, Sequence*>::iterator itBest;
+ vector<linePair*> templateLines;
+ Sequence* querySeq;
+
+ Sequence* bestfit; //closest match to query in template
- map<Sequence*, vector<float> > obsDistance; //maps a query sequence to its observed distance at each window
- map<Sequence*, vector<float> > expectedDistance; //maps a query sequence to its expected distance at each window
- map<Sequence*, vector<float> >::iterator itObsDist;
- map<Sequence*, vector<float> >::iterator itExpDist;
+ vector<float> obsDistance; //obsDistance is the vector of observed distances for query
+ vector<float> expectedDistance; //expectedDistance is the vector of expected distances for query
+ float deviation; //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
+ vector<int> windowsForeachQuery; // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
+ //this is needed so you can move by bases and not just spots in the alignment
+
+ int windowSizes; //windowSizes = window size of query
+ vector<int> windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0]
- vector<float> averageProbability; //Qav
- map<Sequence*, float> seqCoef; //maps a sequence to its coefficient
- map<Sequence*, float> DE; //maps a sequence to its deviation
- map<Sequence*, float>::iterator itCoef;
+ map<int, int> trimmed; //trimmed = start and stop of trimmed sequences for query
+ map<int, int>::iterator it;
- vector<Sequence*> readSeqs(string);
- vector<float> findQav(vector<float>);
- vector<float> calcFreq(vector<Sequence*>);
- map<Sequence*, float> getCoef(vector<float>);
+ vector<float> Qav; //Qav is the vector of average variablility for query
+ float seqCoef; //seqCoef is the coeff for query
+ float DE; //DE is the deviaation for query
+ vector<float> probabilityProfile;
+ vector< vector<float> > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
+ vector< vector<float> > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
+ set<int> h;
+ string mergedFilterString;
- void findPairs(int, int);
- void calcObserved(int, int);
- void calcExpected(int, int);
- void calcDE(int, int);
-
- void createProcessesPairs();
- void createProcessesObserved();
- void createProcessesExpected();
- void createProcessesDE();
+ vector< vector<float> > readQuantiles();
+ vector<float> readFreq();
+ Sequence* findPairs(Sequence*);
+
+ void createProcessesQuan();
+ int doPrep();
+ void printQuanFile(string, string);
};