]> git.donarmstrong.com Git - mothur.git/blob - pintail.h
changed sequence class so that when constructor is called aligned and unaligned value...
[mothur.git] / pintail.h
1 #ifndef PINTAIL_H
2 #define PINTAIL_H
3
4 /*
5  *  pintail.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 7/9/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12
13 #include "chimera.h"
14 #include "dist.h"
15
16 //This class was created using the algorythms described in the 
17 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
18 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
19
20 /***********************************************************/
21
22 class Pintail : public Chimera {
23         
24         public:
25                 Pintail(string, string);        
26                 ~Pintail();
27                 
28                 void getChimeras();
29                 void print(ostream&);
30                 
31                 void setCons(string c)          { consfile = c;  }
32                 void setQuantiles(string q) { quanfile = q;  }
33                 
34                 
35         private:
36         
37                 struct linePair {
38                         int start;
39                         int end;
40                         linePair(int i, int j) : start(i), end(j) {}
41                 };
42
43                 Dist* distcalculator;
44                 int iters;
45                 string fastafile, templateFile, consfile, quanfile;
46                 
47                 vector<linePair*> lines;
48                 vector<Sequence*> querySeqs;
49                 vector<Sequence*> templateSeqs;
50                 
51                 vector<Sequence> bestfit;  //bestfit[0] matches queryseqs[0]...
52                 
53                 vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
54                 vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
55                 vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
56                 vector< vector<int> > windows;  // windows[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
57                                                                                 //this is needed so you can move by bases and not just spots in the alignment
58                                                                                 
59                 vector<int> windowSizes;    //windowSizes[0] = window size of querySeqs[0]
60                 
61                 vector< map<int, int> > trimmed;    //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
62                 map<int, int>::iterator it;
63                 
64                 vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
65                 vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
66                 vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
67                 vector<float> probabilityProfile;
68                 vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
69                 
70                 vector<Sequence*> readSeqs(string);
71                 map<int, int> trimSeqs(Sequence*, Sequence, int);
72                 vector<float> readFreq();
73                 vector< vector<float> > readQuantiles();
74                 vector< vector<float> > getQuantiles(int, int);
75                 vector<float> calcFreq(vector<Sequence*>);
76                 
77                 
78                 vector<Sequence> findPairs(int, int);
79                 vector<int> findWindows(Sequence*, int, int, int&);
80                 vector<float> calcObserved(Sequence*, Sequence, vector<int>, int);
81                 vector<float>  calcExpected(vector<float>, float);
82                 vector<float>  findQav(vector<int>, int);  
83                 float calcDE(vector<float>, vector<float>);
84                 float calcDist(Sequence*, Sequence, int, int);
85                 float getCoef(vector<float>, vector<float>);
86         
87                 void createProcessesSpots();
88                 void createProcesses();
89                 void createProcessesQuan();
90                 
91                 
92                 
93 };
94
95 /***********************************************************/
96
97 #endif
98