]> git.donarmstrong.com Git - mothur.git/blob - chimeracheckrdp.h
continued work on chimeras and fixed bug in trim.seqs and reverse.seqs that was due...
[mothur.git] / chimeracheckrdp.h
1 #ifndef CHIMERACHECK_H
2 #define CHIMERACHECK_H
3
4 /*
5  *  chimeracheckrdp.h
6  *  Mothur
7  *
8  *  Created by westcott on 9/8/09.
9  *  Copyright 2009 Schloss Lab. All rights reserved.
10  *
11  */
12
13
14 #include "chimera.h"
15 #include "kmer.hpp"
16 #include "kmerdb.hpp"
17 #include "database.hpp"
18
19 //This class was created using the algorythms described in 
20 //CHIMERA_CHECK version 2.7 written by Niels Larsen. 
21
22 /***********************************************************/
23
24 class ChimeraCheckRDP : public Chimera {
25         
26         public:
27                 ChimeraCheckRDP(string, string);        
28                 ~ChimeraCheckRDP();
29                 
30                 void getChimeras();
31                 void print(ostream&);
32                 
33                 void setCons(string){};
34                 void setQuantiles(string q) {};
35                 
36                 
37         private:
38                 
39                 vector<linePair*> lines;
40                 vector<Sequence*> querySeqs;
41                 Database* templateDB;
42                 Kmer* kmer;
43                 
44                 vector< vector<sim> > IS;  //IS[0] is the vector of IS values for each window for querySeqs[0]
45                 
46                 //map of vector of maps- I know its a little convaluted but I am trying to save time 
47                 //I think that since the window is only sliding 10 bases there is a good probability that the closest seq to each fragment
48                 //will be the same for several windows so I want to save the vector of maps containing its kmer info rather than regenerating it.
49                 //So...
50                 map<string, vector< map<int, int> > > seqKmerInfo;  // outer map - sequence name -> kmer info 
51                                                                                                                         // kmer info: inner vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
52                                                                                                                         //example:  seqKmerInfo["ecoli"][50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
53                                                                                                                         //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
54                                                                                                                         //kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
55                 map<string, vector< map<int, int> > >:: iterator it;
56                 map<int, int>::iterator it2;
57                 
58                 vector<Sequence> closest;               //closest[0] is the closest overall seq to querySeqs[0].
59                 
60                 string fastafile, templateFile;
61                 
62                 
63                 vector<sim> findIS(int);
64                 int calcKmers(map<int, int>, map<int, int>);            
65                 vector< vector<sim> > createProcessesIS(vector<Sequence*>, vector<linePair*>);
66                 
67 };
68
69 /***********************************************************/
70
71 #endif
72