8 * Created by westcott on 8/24/09.
9 * Copyright 2009 Schloss LAB. All rights reserved.
17 /***********************************************************/
18 //This class was created using the algorythms described in the
19 // "Evaluating putative chimeric sequences from PCR-amplified products" paper
20 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
22 /***********************************************************/
24 class Ccode : public Chimera {
27 Ccode(string, string);
30 int getChimeras(Sequence* query);
32 void printHeader(ostream&);
38 string fastafile, mapInfo;
42 map<int, int> spotMap;
43 map<int, int>::iterator it;
45 vector<int> windows; //windows is the vector of window breaks for query
46 int windowSizes; //windowSizes is the size of the windows for query
47 map<int, int> trim; //trim is the map containing the starting and ending positions for query
48 vector<SeqDist> closest; //closest is a vector of sequence at are closest to query
49 vector<float> averageRef; //averageRef is the average distance at each window for the references for query
50 vector<float> averageQuery; //averageQuery is the average distance at each winow for the query for query
51 vector<float> sumRef; //sumRef is the sum of distances at each window for the references for query
52 vector<float> sumSquaredRef; //sumSquaredRef is the sum of squared distances at each window for the references for query
53 vector<float> sumQuery; //sumQuery is the sum of distances at each window for the comparison of query to references for query
54 vector<float> sumSquaredQuery; //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query
55 vector<float> varRef; //varRef is the variance among references seqs at each window for query
56 vector<float> varQuery; //varQuery is the variance among references and query at each window
57 vector<float> sdRef; //sdRef is the standard deviation of references seqs at each window for query
58 vector<float> sdQuery; //sdQuery is the standard deviation of references and query at each window
59 vector<float> anova; //anova is the vector of anova scores for each window for query
60 int refCombo; //refCombo is the number of reference sequences combinations for query
61 vector<bool> isChimericConfidence; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
62 vector<bool> isChimericTStudent; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
63 vector<bool> isChimericANOVA; //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
65 vector<SeqDist> findClosest(Sequence*, int);
66 void removeBadReferenceSeqs(vector<SeqDist>&); //removes sequences from closest that are to different of too similar to eachother.
67 void trimSequences(Sequence*);
68 vector<int> findWindows();
69 void getAverageRef(vector<SeqDist>); //fills sumRef, averageRef, sumSquaredRef and refCombo.
70 void getAverageQuery (vector<SeqDist>, Sequence*); //fills sumQuery, averageQuery, sumSquaredQuery.
71 void findVarianceRef (); //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
72 void findVarianceQuery (); //fills varQuery and sdQuery
73 void determineChimeras (); //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
75 int getDiff(string, string); //return number of mismatched bases, a gap to base is not counted as a mismatch
80 /***********************************************************/