]> git.donarmstrong.com Git - mothur.git/blob - ccode.h
finished with ccode, returned bellerophon to last save before move, cleaned up pintai...
[mothur.git] / ccode.h
1 #ifndef CCODE_H
2 #define CCODE_H
3
4 /*
5  *  ccode.h
6  *  Mothur
7  *
8  *  Created by westcott on 8/24/09.
9  *  Copyright 2009 Schloss LAB. All rights reserved.
10  *
11  */
12
13 #include "chimera.h"
14 #include "dist.h"
15 #include "decalc.h"
16
17 //This class was created using the algorythms described in the 
18 // "Evaluating putative chimeric sequences from PCR-amplified products" paper 
19 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
20
21 /***********************************************************/
22
23 class Ccode : public Chimera {
24         
25         public:
26                 Ccode(string, string);  
27                 ~Ccode();
28                 
29                 void getChimeras();
30                 void print(ostream&);
31                 
32                 void setCons(string c)          {}
33                 void setQuantiles(string q) {}
34                 
35                 
36         private:
37         
38                 Dist* distCalc;
39                 DeCalculator* decalc;
40                 int iters;
41                 string fastafile, templateFile;
42                 
43                 
44                 vector<linePair*> lines;
45                 vector<linePair*> templateLines;
46                 vector<Sequence*> querySeqs;
47                 vector<Sequence*> templateSeqs;
48                 vector< map<int, int> > spotMap;
49                 map<int, int>::iterator it;
50                 
51                 vector< vector<int> > windows; //windows[0] is the vector of window breaks for querySeqs[0]
52                 vector<int> windowSizes;  //windowSizes[0] is the size of the windows for querySeqs[0]
53                 vector< map<int, int> > trim;  //trim[0] is the map containing the starting and ending positions for querySeqs[0] set of seqs
54                 vector< vector<SeqDist> > closest;  //closest[0] is a vector of sequence at are closest to queryseqs[0]...
55                 vector< vector<float> > averageRef;  //averageRef[0] is the average distance at each window for the references for querySeqs[0]
56                 vector< vector<float> > averageQuery;  //averageQuery[0] is the average distance at each winow for the query for querySeqs[0]
57                 vector< vector<float> >  sumRef;  //sumRef[0] is the sum of distances at each window for the references for querySeqs[0]
58                 vector< vector<float> >  sumSquaredRef;  //sumSquaredRef[0] is the sum of squared distances at each window for the references for querySeqs[0]
59                 vector< vector<float> > sumQuery;  //sumQuery[0] is the sum of distances at each window for the comparison of query to references for querySeqs[0]
60                 vector< vector<float> >  sumSquaredQuery;  //sumSquaredQuery[0] is the sum of squared distances at each window for the comparison of query to references for querySeqs[0]
61                 vector< vector<float> > varRef;  //varRef[0] is the variance among references seqs at each window for querySeqs[0]
62                 vector< vector<float> > varQuery;  //varQuery[0] is the variance among references and querySeqs[0] at each window
63                 vector< vector<float> > sdRef;  //sdRef[0] is the standard deviation of references seqs at each window for querySeqs[0]
64                 vector< vector<float> > sdQuery;  //sdQuery[0] is the standard deviation of references and querySeqs[0] at each window
65                 vector< vector<float> > anova;  //anova[0] is the vector of anova scores for each window for querySeqs[0]
66                 vector<int> refCombo;  //refCombo[0] is the number of reference sequences combinations for querySeqs[0]
67                 vector< vector<bool> > isChimericConfidence;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
68                 vector< vector<bool> > isChimericTStudent;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
69                 vector< vector<bool> > isChimericANOVA;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
70                 
71                 vector< vector<SeqDist> > findClosest(int, int, int); 
72                 void removeBadReferenceSeqs(vector<SeqDist>&, int);  //removes sequences from closest that are to different of too similar to eachother. 
73                 void trimSequences(int);
74                 vector<int> findWindows(int);
75                 void getAverageRef(vector<SeqDist>, int);               //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
76                 void getAverageQuery (vector<SeqDist>, int);    //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
77                 void findVarianceRef (int);                                             //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
78                 void findVarianceQuery (int);                                   //fills varQuery[i] and sdQuery[i]
79                 void determineChimeras (int);                                   //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
80                 
81                 int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
82                 float getT(int); 
83                 float getF(int); 
84                 
85                 void createProcessesClosest();
86                 void createProcessesRemoveBad();
87                 void createProcessesAverages();
88                 void createProcessesVariances();
89                 void createProcessesDetermine();
90                                 
91 };
92
93 /***********************************************************/
94
95 #endif
96
97