]> git.donarmstrong.com Git - mothur.git/blob - pintail.h
added modify names parameter to set.dir
[mothur.git] / pintail.h
1 #ifndef PINTAIL_H
2 #define PINTAIL_H
3
4 /*
5  *  pintail.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 7/9/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12
13 #include "chimera.h"
14 #include "dist.h"
15 #include "decalc.h"
16
17 /***********************************************************/
18 //This class was created using the algorythms described in the 
19 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
20 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
21
22 /***********************************************************/
23
24 class Pintail : public Chimera {
25         
26         public:
27                 Pintail(string, string, bool, int, string, string, string, int, int, string); //fastafile, templatefile, filter, processors, mask, conservation, quantile, window, increment, outputDir)        
28                 ~Pintail();
29                 
30                 int getChimeras(Sequence*);
31                 Sequence print(ostream&, ostream&);
32                 
33                 void setCons(string c)          { consfile = c;  }
34                 void setQuantiles(string q) { quanfile = q;  }
35                 
36                 #ifdef USE_MPI
37                 Sequence print(MPI_File&, MPI_File&);
38                 #endif
39                 
40         private:
41         
42                 Dist* distcalculator;
43                 DeCalculator* decalc;
44                 int iters, window, increment, processors;
45                 string fastafile, quanfile, consfile;
46                 
47                 vector<linePair*> templateLines;
48                 Sequence* querySeq;
49                                 
50                 Sequence* bestfit;  //closest match to query in template
51                 
52                 vector<float>  obsDistance;  //obsDistance is the vector of observed distances for query 
53                 vector<float>  expectedDistance;  //expectedDistance is the vector of expected distances for query
54                 float deviation;  //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
55                 vector<int>  windowsForeachQuery;  // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
56                                                                                 //this is needed so you can move by bases and not just spots in the alignment
57                                                                                 
58                 int  windowSizes;                       //windowSizes = window size of query
59                 vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
60                 
61                 map<int, int> trimmed;    //trimmed = start and stop of trimmed sequences for query
62                 map<int, int>::iterator it;
63                 
64                 vector<float>  Qav;     //Qav is the vector of average variablility for query
65                 float  seqCoef;         //seqCoef is the coeff for query
66                 float DE;                       //DE is the deviaation for query
67                 vector<float> probabilityProfile;
68                 vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
69                 vector< vector<float> > quantilesMembers;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
70                 set<int>  h;
71                 string mergedFilterString;
72                 
73                 vector< vector<float> > readQuantiles();
74                 vector<float> readFreq();
75                 Sequence* findPairs(Sequence*);
76                         
77                 void createProcessesQuan();
78                 int doPrep();
79                 void printQuanFile(string, string);
80                 
81 };
82
83 /***********************************************************/
84
85 #endif
86