8 * Created by westcott on 8/24/09.
9 * Copyright 2009 Schloss LAB. All rights reserved.
17 /***********************************************************/
18 //This class was created using the algorythms described in the
19 // "Evaluating putative chimeric sequences from PCR-amplified products" paper
20 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
22 /***********************************************************/
24 class Ccode : public Chimera {
27 Ccode(string, string);
33 void setCons(string c) {}
34 void setQuantiles(string q) {}
42 string fastafile, templateFile;
45 vector<linePair*> lines;
46 vector<Sequence*> querySeqs;
47 vector<Sequence*> templateSeqs;
48 vector< map<int, int> > spotMap;
49 map<int, int>::iterator it;
51 vector< vector<int> > windows; //windows[0] is the vector of window breaks for querySeqs[0]
52 vector<int> windowSizes; //windowSizes[0] is the size of the windows for querySeqs[0]
53 vector< map<int, int> > trim; //trim[0] is the map containing the starting and ending positions for querySeqs[0] set of seqs
54 vector< vector<SeqDist> > closest; //closest[0] is a vector of sequence at are closest to queryseqs[0]...
55 vector< vector<float> > averageRef; //averageRef[0] is the average distance at each window for the references for querySeqs[0]
56 vector< vector<float> > averageQuery; //averageQuery[0] is the average distance at each winow for the query for querySeqs[0]
57 vector< vector<float> > sumRef; //sumRef[0] is the sum of distances at each window for the references for querySeqs[0]
58 vector< vector<float> > sumSquaredRef; //sumSquaredRef[0] is the sum of squared distances at each window for the references for querySeqs[0]
59 vector< vector<float> > sumQuery; //sumQuery[0] is the sum of distances at each window for the comparison of query to references for querySeqs[0]
60 vector< vector<float> > sumSquaredQuery; //sumSquaredQuery[0] is the sum of squared distances at each window for the comparison of query to references for querySeqs[0]
61 vector< vector<float> > varRef; //varRef[0] is the variance among references seqs at each window for querySeqs[0]
62 vector< vector<float> > varQuery; //varQuery[0] is the variance among references and querySeqs[0] at each window
63 vector< vector<float> > sdRef; //sdRef[0] is the standard deviation of references seqs at each window for querySeqs[0]
64 vector< vector<float> > sdQuery; //sdQuery[0] is the standard deviation of references and querySeqs[0] at each window
65 vector< vector<float> > anova; //anova[0] is the vector of anova scores for each window for querySeqs[0]
66 vector<int> refCombo; //refCombo[0] is the number of reference sequences combinations for querySeqs[0]
67 vector< vector<bool> > isChimericConfidence; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
68 vector< vector<bool> > isChimericTStudent; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
69 vector< vector<bool> > isChimericANOVA; //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
71 vector< vector<SeqDist> > findClosest(int, int, int);
72 void removeBadReferenceSeqs(vector<SeqDist>&, int); //removes sequences from closest that are to different of too similar to eachother.
73 void trimSequences(int);
74 vector<int> findWindows(int);
75 void getAverageRef(vector<SeqDist>, int); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
76 void getAverageQuery (vector<SeqDist>, int); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
77 void findVarianceRef (int); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
78 void findVarianceQuery (int); //fills varQuery[i] and sdQuery[i]
79 void determineChimeras (int); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
81 int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch
85 void createProcessesClosest();
86 void createProcessesRemoveBad();
87 void createProcessesAverages();
88 void createProcessesVariances();
89 void createProcessesDetermine();
93 /***********************************************************/