#include "chimera.h"
#include "kmer.hpp"
#include "kmerdb.hpp"
-#include "database.hpp"
+#include "alignmentdb.h"
+/***********************************************************/
//This class was created using the algorythms described in
//CHIMERA_CHECK version 2.7 written by Niels Larsen.
class ChimeraCheckRDP : public Chimera {
public:
- ChimeraCheckRDP(string, string);
+ ChimeraCheckRDP(string, string, string, bool, int, int, string); //fasta, template, name, svg, increment, ksize, outputDir
~ChimeraCheckRDP();
- void getChimeras();
- void print(ostream&);
-
- void setCons(string){};
- void setQuantiles(string q) {};
+ int getChimeras(Sequence*);
+ int print(ostream&, ostream&);
+ #ifdef USE_MPI
+ int print(MPI_File&, MPI_File&);
+ #endif
private:
- vector<linePair*> lines;
- vector<Sequence*> querySeqs;
- Database* templateDB;
+ Sequence* querySeq;
+ AlignmentDB* templateDB;
Kmer* kmer;
-
- vector< vector<sim> > IS; //IS[0] is the vector of IS values for each window for querySeqs[0]
-
- //map of vector of maps- I know its a little convaluted but I am trying to save time
- //I think that since the window is only sliding 10 bases there is a good probability that the closest seq to each fragment
- //will be the same for several windows so I want to save the vector of maps containing its kmer info rather than regenerating it.
- //So...
- map<string, vector< map<int, int> > > seqKmerInfo; // outer map - sequence name -> kmer info
- // kmer info: inner vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
- //example: seqKmerInfo["ecoli"][50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
- //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the
- //kmers 2 seqs had in common. There may be a better way to do this thats why I am leaving so many comments...
- map<string, vector< map<int, int> > >:: iterator it;
- map<int, int>::iterator it2;
-
- vector<Sequence> closest; //closest[0] is the closest overall seq to querySeqs[0].
-
- string fastafile, templateFile;
-
-
- vector<sim> findIS(int);
- int calcKmers(map<int, int>, map<int, int>);
- vector< vector<sim> > createProcessesIS(vector<Sequence*>, vector<linePair*>);
-
+ Sequence closest; //closest is the closest overall seq to query
+
+ vector<sim> IS; //IS is the vector of IS values for each window for query
+ string fastafile;
+ map<string, string> names;
+ string name;
+ bool svg;
+ int kmerSize, increment;
+
+ vector<sim> findIS();
+ int calcKmers(map<int, int>, map<int, int>);
+ void makeSVGpic(vector<sim>);
+ void readName(string);
};
-
/***********************************************************/
#endif