8 * Created by Sarah Westcott on 7/9/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
17 /***********************************************************/
18 //This class was created using the algorythms described in the
19 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
20 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
22 /***********************************************************/
24 class Pintail : public Chimera {
27 Pintail(string, string, bool, int, string, string, string, int, int, string); //fastafile, templatefile, filter, processors, mask, conservation, quantile, window, increment, outputDir)
30 int getChimeras(Sequence*);
31 int print(ostream&, ostream&);
33 void setCons(string c) { consfile = c; }
34 void setQuantiles(string q) { quanfile = q; }
37 int print(MPI_File&, MPI_File&);
44 int iters, window, increment, processors;
45 string fastafile, quanfile, consfile;
47 vector<linePair*> templateLines;
50 Sequence* bestfit; //closest match to query in template
52 vector<float> obsDistance; //obsDistance is the vector of observed distances for query
53 vector<float> expectedDistance; //expectedDistance is the vector of expected distances for query
54 float deviation; //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
55 vector<int> windowsForeachQuery; // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
56 //this is needed so you can move by bases and not just spots in the alignment
58 int windowSizes; //windowSizes = window size of query
59 vector<int> windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0]
61 map<int, int> trimmed; //trimmed = start and stop of trimmed sequences for query
62 map<int, int>::iterator it;
64 vector<float> Qav; //Qav is the vector of average variablility for query
65 float seqCoef; //seqCoef is the coeff for query
66 float DE; //DE is the deviaation for query
67 vector<float> probabilityProfile;
68 vector< vector<float> > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
69 vector< vector<float> > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
71 string mergedFilterString;
73 vector< vector<float> > readQuantiles();
74 vector<float> readFreq();
75 Sequence* findPairs(Sequence*);
77 void createProcessesQuan();
79 void printQuanFile(string, string);
83 /***********************************************************/