]> git.donarmstrong.com Git - mothur.git/blob - pintail.h
worked on chimeras
[mothur.git] / pintail.h
1 #ifndef PINTAIL_H
2 #define PINTAIL_H
3
4 /*
5  *  pintail.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 7/9/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12
13 #include "chimera.h"
14 #include "dist.h"
15
16 //This class was created using the algorythms described in the 
17 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
18 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
19
20 /***********************************************************/
21
22 class Pintail : public Chimera {
23         
24         public:
25                 Pintail(string, string);        
26                 ~Pintail();
27                 
28                 void getChimeras();
29                 void print(ostream&);
30                 
31                 void setCons(string c)          { consfile = c;  }
32                 void setQuantiles(string q) { quanfile = q;  }
33                 
34                 
35         private:
36         
37                 struct linePair {
38                         int start;
39                         int end;
40                         linePair(int i, int j) : start(i), end(j) {}
41                         linePair(){}
42                 };
43
44                 Dist* distcalculator;
45                 int iters;
46                 string fastafile, templateFile, consfile, quanfile;
47                 
48                 vector<linePair*> lines;
49                 vector<linePair*> templateLines;
50                 vector<Sequence*> querySeqs;
51                 vector<Sequence*> templateSeqs;
52                 
53                 vector<Sequence> bestfit;  //bestfit[0] matches queryseqs[0]...
54                 
55                 vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
56                 vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
57                 vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
58                 vector< vector<int> > windowsForeachQuery;  // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
59                                                                                 //this is needed so you can move by bases and not just spots in the alignment
60                                                                                 
61                 vector<int> windowSizes;                        //windowSizes[0] = window size of querySeqs[0]
62                 vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
63                 
64                 vector< map<int, int> > trimmed;    //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
65                 map<int, int>::iterator it;
66                 
67                 vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
68                 vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
69                 vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
70                 vector<float> probabilityProfile;
71                 vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
72                 
73                 void trimSeqs(Sequence*, Sequence, map<int, int>&);
74                 vector<float> readFreq();
75                 vector< vector<float> > readQuantiles();
76                 vector< vector<float> > getQuantiles(int, int);
77                 vector<float> calcFreq(vector<Sequence*>);
78                 
79                 vector<Sequence> findPairs(int, int);
80                 vector<int> findWindows(Sequence*, int, int, int&);
81                 vector<float> calcObserved(Sequence*, Sequence, vector<int>, int);
82                 vector<float>  calcExpected(vector<float>, float);
83                 vector<float>  findQav(vector<int>, int);  
84                 float calcDE(vector<float>, vector<float>);
85                 float calcDist(Sequence*, Sequence, int, int);
86                 float getCoef(vector<float>, vector<float>);
87         
88                 void createProcessesSpots();
89                 void createProcesses();
90                 void createProcessesQuan();
91                 
92                 
93                 
94 };
95
96 /***********************************************************/
97
98 #endif
99