8 * Created by Sarah Westcott on 7/9/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
16 //This class was created using the algorythms described in the
17 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
18 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
20 /***********************************************************/
22 class Pintail : public Chimera {
25 Pintail(string, string);
31 void setCons(string c) { consfile = c; }
39 linePair(int i, int j) : start(i), end(j) {}
44 string fastafile, templateFile, consfile;
45 vector<linePair*> lines;
46 vector<Sequence*> querySeqs;
47 vector<Sequence*> templateSeqs;
49 vector<Sequence> bestfit; //bestfit[0] matches queryseqs[0]...
51 vector< vector<float> > obsDistance; //obsDistance[0] is the vector of observed distances for queryseqs[0]...
52 vector< vector<float> > expectedDistance; //expectedDistance[0] is the vector of expected distances for queryseqs[0]...
53 vector<float> deviation; //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
54 vector< vector<int> > windows; // windows[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
55 //this is needed so you can move by bases and not just spots in the alignment
56 vector< map<int, int> > trim; //trim[0] is the start and end position of trimmed querySeqs[0]. Used to find the variability over each sequence window.
58 vector<int> windowSizes; //windowSizes[0] = window size of querySeqs[0]
60 vector< vector<float> > Qav; //Qav[0] is the vector of average variablility for queryseqs[0]...
61 vector<float> seqCoef; //seqCoef[0] is the coeff for queryseqs[0]...
62 vector<float> DE; //DE[0] is the deviaation for queryseqs[0]...
63 vector<float> probabilityProfile;
65 vector<Sequence*> readSeqs(string);
66 void trimSeqs(Sequence*, Sequence&, int);
67 vector<float> readFreq();
68 vector< vector<float> > findQav(int, int);
69 vector<float> calcFreq(vector<Sequence*>);
70 vector<float> getCoef(int, int);
72 vector<Sequence> findPairs(int, int);
73 vector< vector<int> > findWindows(int, int);
74 vector< vector<float> > calcObserved(int, int);
75 vector< vector<float> > calcExpected(int, int);
76 vector<float> calcDE(int, int);
77 vector<float> calcDist(int, int);
79 void createProcessesSpots();
80 void createProcesses();
86 /***********************************************************/