#include "chimera.h"
#include "dist.h"
+#include "decalc.h"
//This class was created using the algorythms described in the
// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
void getChimeras();
void print(ostream&);
- void setCons(string c) { consfile = c; }
+ void setCons(string c) { consfile = c; }
+ void setQuantiles(string q) { quanfile = q; }
private:
int start;
int end;
linePair(int i, int j) : start(i), end(j) {}
+ linePair(){}
};
Dist* distcalculator;
+ DeCalculator* decalc;
int iters;
- string fastafile, templateFile, consfile;
+ string fastafile, templateFile, consfile, quanfile;
+
+
vector<linePair*> lines;
+ vector<linePair*> templateLines;
vector<Sequence*> querySeqs;
vector<Sequence*> templateSeqs;
- vector<Sequence> bestfit; //bestfit[0] matches queryseqs[0]...
+ vector<Sequence*> bestfit; //bestfit[0] matches queryseqs[0]...
vector< vector<float> > obsDistance; //obsDistance[0] is the vector of observed distances for queryseqs[0]...
vector< vector<float> > expectedDistance; //expectedDistance[0] is the vector of expected distances for queryseqs[0]...
vector<float> deviation; //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
- vector< vector<int> > windows; // windows[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
+ vector< vector<int> > windowsForeachQuery; // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
//this is needed so you can move by bases and not just spots in the alignment
- vector< map<int, int> > trim; //trim[0] is the start and end position of trimmed querySeqs[0]. Used to find the variability over each sequence window.
- vector<int> windowSizes; //windowSizes[0] = window size of querySeqs[0]
+ vector<int> windowSizes; //windowSizes[0] = window size of querySeqs[0]
+ vector<int> windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0]
+
+ vector< map<int, int> > trimmed; //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
+ map<int, int>::iterator it;
vector< vector<float> > Qav; //Qav[0] is the vector of average variablility for queryseqs[0]...
vector<float> seqCoef; //seqCoef[0] is the coeff for queryseqs[0]...
vector<float> DE; //DE[0] is the deviaation for queryseqs[0]...
vector<float> probabilityProfile;
+ vector< vector<float> > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
+ vector< set<int> > h;
- vector<Sequence*> readSeqs(string);
- void trimSeqs(Sequence*, Sequence&, int);
- vector<float> readFreq();
- vector< vector<float> > findQav(int, int);
- vector<float> calcFreq(vector<Sequence*>);
- vector<float> getCoef(int, int);
- vector<Sequence> findPairs(int, int);
- vector< vector<int> > findWindows(int, int);
- vector< vector<float> > calcObserved(int, int);
- vector< vector<float> > calcExpected(int, int);
- vector<float> calcDE(int, int);
- vector<float> calcDist(int, int);
-
+ vector<float> readFreq();
+ vector< vector<float> > readQuantiles();
+ vector<Sequence*> findPairs(int, int);
+
void createProcessesSpots();
+ void createProcessesPairs();
void createProcesses();
+ void createProcessesQuan();