8 * Created by Sarah Westcott on 7/9/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
17 /***********************************************************/
18 //This class was created using the algorythms described in the
19 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
20 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
22 /***********************************************************/
24 class Pintail : public Chimera {
27 Pintail(string, string);
30 int getChimeras(Sequence*);
33 void setCons(string c) { consfile = c; }
34 void setQuantiles(string q) { quanfile = q; }
42 string fastafile, consfile;
44 vector<linePair*> templateLines;
47 Sequence* bestfit; //closest match to query in template
49 vector<float> obsDistance; //obsDistance is the vector of observed distances for query
50 vector<float> expectedDistance; //expectedDistance is the vector of expected distances for query
51 float deviation; //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
52 vector<int> windowsForeachQuery; // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
53 //this is needed so you can move by bases and not just spots in the alignment
55 int windowSizes; //windowSizes = window size of query
56 vector<int> windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0]
58 map<int, int> trimmed; //trimmed = start and stop of trimmed sequences for query
59 map<int, int>::iterator it;
61 vector<float> Qav; //Qav is the vector of average variablility for query
62 float seqCoef; //seqCoef is the coeff for query
63 float DE; //DE is the deviaation for query
64 vector<float> probabilityProfile;
65 vector< vector<float> > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
66 vector< vector<quanMember> > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
70 vector<float> readFreq();
71 Sequence* findPairs(Sequence*);
73 void createProcessesQuan();
78 /***********************************************************/