]> git.donarmstrong.com Git - mothur.git/blob - pintail.h
chimera.seqs pintail is working.
[mothur.git] / pintail.h
1 #ifndef PINTAIL_H
2 #define PINTAIL_H
3
4 /*
5  *  pintail.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 7/9/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12
13 #include "chimera.h"
14 #include "dist.h"
15 #include "decalc.h"
16
17 //This class was created using the algorythms described in the 
18 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
19 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
20
21 /***********************************************************/
22
23 class Pintail : public Chimera {
24         
25         public:
26                 Pintail(string, string);        
27                 ~Pintail();
28                 
29                 void getChimeras();
30                 void print(ostream&);
31                 
32                 void setCons(string c)          { consfile = c;  }
33                 void setQuantiles(string q) { quanfile = q;  }
34                 
35                 
36         private:
37         
38                 struct linePair {
39                         int start;
40                         int end;
41                         linePair(int i, int j) : start(i), end(j) {}
42                         linePair(){}
43                 };
44
45                 Dist* distcalculator;
46                 DeCalculator* decalc;
47                 int iters;
48                 string fastafile, templateFile, consfile, quanfile;
49                 
50                 
51                 vector<linePair*> lines;
52                 vector<linePair*> templateLines;
53                 vector<Sequence*> querySeqs;
54                 vector<Sequence*> templateSeqs;
55                 
56                 vector<Sequence*> bestfit;  //bestfit[0] matches queryseqs[0]...
57                 
58                 vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
59                 vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
60                 vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
61                 vector< vector<int> > windowsForeachQuery;  // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
62                                                                                 //this is needed so you can move by bases and not just spots in the alignment
63                                                                                 
64                 vector<int> windowSizes;                        //windowSizes[0] = window size of querySeqs[0]
65                 vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
66                 
67                 vector< map<int, int> > trimmed;    //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
68                 map<int, int>::iterator it;
69                 
70                 vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
71                 vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
72                 vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
73                 vector<float> probabilityProfile;
74                 vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
75                 vector< vector<quanMember> > quantilesMembers;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
76                 vector< set<int> > h;
77                 
78                 
79                 vector<float> readFreq();
80                 vector< vector<float> > readQuantiles();
81                 vector<Sequence*> findPairs(int, int);
82                         
83                 void createProcessesSpots();
84                 void createProcessesPairs();
85                 void createProcesses();
86                 void createProcessesQuan();
87                 
88                 vector<float> makeCompliant;  //used by decalc->getQuantiles so pintail and mallard can use same function, it contains the highest de value for each seq in the template
89                 
90 };
91
92 /***********************************************************/
93
94 #endif
95