8 * Created by Sarah Westcott on 7/9/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
17 //This class was created using the algorythms described in the
18 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
19 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
21 /***********************************************************/
23 class Pintail : public Chimera {
26 Pintail(string, string);
32 void setCons(string c) { consfile = c; }
33 void setQuantiles(string q) { quanfile = q; }
41 linePair(int i, int j) : start(i), end(j) {}
48 string fastafile, templateFile, consfile, quanfile;
51 vector<linePair*> lines;
52 vector<linePair*> templateLines;
53 vector<Sequence*> querySeqs;
54 vector<Sequence*> templateSeqs;
56 vector<Sequence*> bestfit; //bestfit[0] matches queryseqs[0]...
58 vector< vector<float> > obsDistance; //obsDistance[0] is the vector of observed distances for queryseqs[0]...
59 vector< vector<float> > expectedDistance; //expectedDistance[0] is the vector of expected distances for queryseqs[0]...
60 vector<float> deviation; //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
61 vector< vector<int> > windowsForeachQuery; // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
62 //this is needed so you can move by bases and not just spots in the alignment
64 vector<int> windowSizes; //windowSizes[0] = window size of querySeqs[0]
65 vector<int> windowSizesTemplate; //windowSizesTemplate[0] = window size of templateSeqs[0]
67 vector< map<int, int> > trimmed; //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
68 map<int, int>::iterator it;
70 vector< vector<float> > Qav; //Qav[0] is the vector of average variablility for queryseqs[0]...
71 vector<float> seqCoef; //seqCoef[0] is the coeff for queryseqs[0]...
72 vector<float> DE; //DE[0] is the deviaation for queryseqs[0]...
73 vector<float> probabilityProfile;
74 vector< vector<float> > quantiles; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
75 vector< vector<quanMember> > quantilesMembers; //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
79 vector<float> readFreq();
80 vector< vector<float> > readQuantiles();
81 vector<Sequence*> findPairs(int, int);
83 void createProcessesSpots();
84 void createProcessesPairs();
85 void createProcesses();
86 void createProcessesQuan();
88 vector<float> makeCompliant; //used by decalc->getQuantiles so pintail and mallard can use same function, it contains the highest de value for each seq in the template
92 /***********************************************************/