From 05a0195e07c42c996592ee1c8abb63adedb7f493 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 28 Sep 2009 14:31:48 +0000 Subject: [PATCH] started adding chimeraslayer method and fixed minor bug in treegroupscommand deconstructor --- Mothur.xcodeproj/project.pbxproj | 26 +- ccode.cpp | 2 +- chimera.cpp | 4 +- chimera.h | 9 +- chimeraseqscommand.cpp | 37 ++- chimeraseqscommand.h | 3 +- chimeraslayer.cpp | 188 +++++++++++++ chimeraslayer.h | 55 ++++ decalc.cpp | 143 ++++++++++ decalc.h | 2 + maligner.cpp | 459 +++++++++++++++++++++++++++++++ maligner.h | 77 ++++++ mothur.cpp | 2 +- pintail.cpp | 23 +- sharedcommand.cpp | 3 + slayer.cpp | 133 +++++++++ slayer.h | 58 ++++ treegroupscommand.cpp | 4 +- 18 files changed, 1191 insertions(+), 37 deletions(-) create mode 100644 chimeraslayer.cpp create mode 100644 chimeraslayer.h create mode 100644 maligner.cpp create mode 100644 maligner.h create mode 100644 slayer.cpp create mode 100644 slayer.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 59291a3..6dbf319 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -165,7 +165,10 @@ A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; }; A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; }; A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; }; + A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; }; + A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; }; A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A782106913F900688F62 /* getsharedotucommand.cpp */; }; + A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A823106A9AD700688F62 /* maligner.cpp */; }; EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; }; EB1216E50F61ACFB004A865F /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216E40F61ACFB004A865F /* bstick.cpp */; }; EB1217230F61C9AC004A865F /* sharedkstest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1217220F61C9AC004A865F /* sharedkstest.cpp */; }; @@ -517,14 +520,20 @@ A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; }; A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlinecommand.cpp; sourceTree = SOURCE_ROOT; }; A70B53A90F4CD7AD0064797E /* getlinecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlinecommand.h; sourceTree = SOURCE_ROOT; }; - A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = ""; }; - A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = ""; }; + A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = SOURCE_ROOT; }; + A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; }; A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; }; A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; }; A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; }; A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; }; - A7E4A781106913F900688F62 /* getsharedotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getsharedotucommand.h; sourceTree = ""; }; - A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = ""; }; + A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; }; + A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; }; + A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; }; + A7B0450D106CEEC90046FC83 /* slayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slayer.cpp; sourceTree = SOURCE_ROOT; }; + A7E4A781106913F900688F62 /* getsharedotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getsharedotucommand.h; sourceTree = SOURCE_ROOT; }; + A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = SOURCE_ROOT; }; + A7E4A822106A9AD700688F62 /* maligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maligner.h; sourceTree = SOURCE_ROOT; }; + A7E4A823106A9AD700688F62 /* maligner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maligner.cpp; sourceTree = SOURCE_ROOT; }; C6859E8B029090EE04C91782 /* Mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Mothur.1; sourceTree = ""; }; EB1216860F619B83004A865F /* bergerparker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bergerparker.h; sourceTree = SOURCE_ROOT; }; EB1216870F619B83004A865F /* bergerparker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bergerparker.cpp; sourceTree = SOURCE_ROOT; }; @@ -661,10 +670,16 @@ A75B887B104C16860083C454 /* ccode.cpp */, A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */, A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */, + A7B04491106CC3E60046FC83 /* chimeraslayer.h */, + A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */, 372A55531017922B00C5194B /* decalc.h */, 372A55541017922B00C5194B /* decalc.cpp */, + A7E4A822106A9AD700688F62 /* maligner.h */, + A7E4A823106A9AD700688F62 /* maligner.cpp */, 374F301110063B6F00E97C66 /* pintail.h */, 374F301210063B6F00E97C66 /* pintail.cpp */, + A7B0450C106CEEC90046FC83 /* slayer.h */, + A7B0450D106CEEC90046FC83 /* slayer.cpp */, ); name = chimera; sourceTree = ""; @@ -1191,6 +1206,9 @@ A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */, A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */, A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */, + A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */, + A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */, + A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/ccode.cpp b/ccode.cpp index 0f1dd50..a43a5a1 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -1480,7 +1480,7 @@ void Ccode::createProcessesVariances() { //find the averages for the query for (int i = 0; i < querySeqs.size(); i++) { - findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0. + findVarianceQuery(i); //fills v arQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0. } #endif } diff --git a/chimera.cpp b/chimera.cpp index 6ec24d1..8851b75 100644 --- a/chimera.cpp +++ b/chimera.cpp @@ -10,7 +10,7 @@ #include "chimera.h" //*************************************************************************************************************** -//this is a vertical filter +//this is a vertical soft filter void Chimera::createFilter(vector seqs) { try { @@ -40,6 +40,7 @@ void Chimera::createFilter(vector seqs) { } } + //zero out spot where all sequences have blanks //zero out spot where all sequences have blanks int numColRemoved = 0; for(int i = 0;i < seqs[0]->getAligned().length(); i++){ @@ -48,6 +49,7 @@ void Chimera::createFilter(vector seqs) { else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) { filterString[i] = '0'; numColRemoved++; } //cout << "a = " << a[i] << " t = " << t[i] << " g = " << g[i] << " c = " << c[i] << endl; } + //cout << "filter = " << filterString << endl; mothurOut("Filter removed " + toString(numColRemoved) + " columns."); mothurOutEndLine(); diff --git a/chimera.h b/chimera.h index 5df383d..3efe0e4 100644 --- a/chimera.h +++ b/chimera.h @@ -71,6 +71,12 @@ class Chimera { virtual void setKmerSize(int k) { kmerSize = k; } virtual void setSVG(int s) { svg = s; } virtual void setName(string n) { name = n; } + virtual void setMatch(int m) { match = m; } + virtual void setMisMatch(int m) { misMatch = m; } + virtual void setDivR(float d) { divR = d; } + virtual void setParents(int p) { parents = p; } + virtual void setMinSim(int s) { minSim = s; } + virtual void setCons(string){}; virtual void setQuantiles(string){}; @@ -88,7 +94,8 @@ class Chimera { protected: bool filter, correction, svg; - int processors, window, increment, numWanted, kmerSize; + int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents; + float divR; string seqMask, quanfile, filterString, name; diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp index bd0ad95..925de63 100644 --- a/chimeraseqscommand.cpp +++ b/chimeraseqscommand.cpp @@ -12,6 +12,7 @@ #include "pintail.h" #include "ccode.h" #include "chimeracheckrdp.h" +#include "chimeraslayer.h" //*************************************************************************************************************** @@ -25,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ else { //valid paramters for this command - string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name" }; + string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -86,15 +87,34 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; } svg = isTrue(temp); - temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; } + temp = validParameter.validFile(parameters, "window", false); + if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; } + else if (temp == "not found") { temp = "0"; } convert(temp, window); - + + temp = validParameter.validFile(parameters, "match", false); if (temp == "not found") { temp = "5"; } + convert(temp, match); + + temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found") { temp = "-4"; } + convert(temp, mismatch); + + temp = validParameter.validFile(parameters, "divergence", false); if (temp == "not found") { temp = "1.0"; } + convert(temp, divR); + + temp = validParameter.validFile(parameters, "minsim", false); if (temp == "not found") { temp = "90"; } + convert(temp, minSimilarity); + + temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "5"; } + convert(temp, parents); + temp = validParameter.validFile(parameters, "increment", false); - if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; } + if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; } else if (temp == "not found") { temp = "25"; } convert(temp, increment); - temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "20"; } + temp = validParameter.validFile(parameters, "numwanted", false); + if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; } + else if (temp == "not found") { temp = "20"; } convert(temp, numwanted); @@ -170,6 +190,7 @@ int ChimeraSeqsCommand::execute(){ else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile); } else if (method == "ccode") { chimera = new Ccode(fastafile, templatefile); } else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, templatefile); } + else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(fastafile, templatefile); } else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; } //set user options @@ -191,6 +212,12 @@ int ChimeraSeqsCommand::execute(){ chimera->setKmerSize(ksize); chimera->setSVG(svg); chimera->setName(namefile); + chimera->setMatch(match); + chimera->setMisMatch(mismatch); + chimera->setDivR(divR); + chimera->setParents(parents); + chimera->setMinSim(minSimilarity); + //find chimeras chimera->getChimeras(); diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h index 98504b0..865a84d 100644 --- a/chimeraseqscommand.h +++ b/chimeraseqscommand.h @@ -30,7 +30,8 @@ private: bool abort; string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile; bool filter, correction, svg; - int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize; + int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity; + float divR; Chimera* chimera; diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp new file mode 100644 index 0000000..293ee80 --- /dev/null +++ b/chimeraslayer.cpp @@ -0,0 +1,188 @@ +/* + * chimeraslayer.cpp + * Mothur + * + * Created by westcott on 9/25/09. + * Copyright 2009 Pschloss Lab. All rights reserved. + * + */ + +#include "chimeraslayer.h" + +//*************************************************************************************************************** +ChimeraSlayer::ChimeraSlayer(string filename, string temp) { fastafile = filename; templateFile = temp; } +//*************************************************************************************************************** + +ChimeraSlayer::~ChimeraSlayer() { + try { + for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; } + for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } + } + catch(exception& e) { + errorOut(e, "ChimeraSlayer", "~ChimeraSlayer"); + exit(1); + } +} +//*************************************************************************************************************** +void ChimeraSlayer::print(ostream& out) { + try { + + mothurOutEndLine(); + + + } + catch(exception& e) { + errorOut(e, "ChimeraSlayer", "print"); + exit(1); + } +} + +//*************************************************************************************************************** +void ChimeraSlayer::getChimeras() { + try { + + //read in query sequences and subject sequences + mothurOut("Reading sequences and template file... "); cout.flush(); + querySeqs = readSeqs(fastafile); + templateSeqs = readSeqs(templateFile); + mothurOut("Done."); mothurOutEndLine(); + + int numSeqs = querySeqs.size(); + + //break up file if needed + int linesPerProcess = numSeqs / processors ; + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + //find breakup of sequences for all times we will Parallelize + if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); } + else { + //fill line pairs + for (int i = 0; i < (processors-1); i++) { + lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess))); + } + //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end + int i = processors - 1; + lines.push_back(new linePair((i*linesPerProcess), numSeqs)); + } + #else + lines.push_back(new linePair(0, numSeqs)); + #endif + + if (seqMask != "") { decalc = new DeCalculator(); } //to use below + + //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity + maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim); + slayer = new Slayer(window, increment, minSim, divR); + + for (int i = 0; i < querySeqs.size(); i++) { + + string chimeraFlag = maligner->getResults(querySeqs[i]); + float percentIdentical = maligner->getPercentID(); + vector Results = maligner->getOutput(); + + cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl; + + for (int j = 0; j < Results.size(); j++) { + cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl; + + } + + if (chimeraFlag == "yes") { + + //get sequence that were given from maligner results + vector seqs; + for (int j = 0; j < Results.size(); j++) { + Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template + + //seq = NULL if error occurred in getSequence + if (seq == NULL) { break; } + else { + SeqDist member; + member.seq = seq; + member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal; + seqs.push_back(member); + } + } + + //limit number of parents to explore - default 5 + if (Results.size() > parents) { + //sort by distance + sort(seqs.begin(), seqs.end(), compareSeqDist); + //prioritize larger more similiar sequence fragments + reverse(seqs.begin(), seqs.end()); + + for (int k = seqs.size()-1; k > (parents-1); k--) { + delete seqs[k].seq; + seqs.pop_back(); + } + } + + //put seqs into vector to send to slayer + vector seqsForSlayer; + for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } + + //mask then send to slayer... + if (seqMask != "") { + decalc->setMask(seqMask); + + //mask querys + decalc->runMask(querySeqs[i]); + + //mask parents + for (int k = 0; k < seqsForSlayer.size(); k++) { + decalc->runMask(seqsForSlayer[k]); + } + + } + + //send to slayer + slayer->getResults(querySeqs[i], seqsForSlayer); + + //free memory + for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } + } + + } + //free memory + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } + + if (seqMask != "") { + delete decalc; + } + + + } + catch(exception& e) { + errorOut(e, "ChimeraSlayer", "getChimeras"); + exit(1); + } +} +//*************************************************************************************************************** +Sequence* ChimeraSlayer::getSequence(string name) { + try{ + Sequence* temp; + + //look through templateSeqs til you find it + int spot = -1; + for (int i = 0; i < templateSeqs.size(); i++) { + if (name == templateSeqs[i]->getName()) { + spot = i; + break; + } + } + + if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; } + + temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned()); + + return temp; + } + catch(exception& e) { + errorOut(e, "ChimeraSlayer", "getSequence"); + exit(1); + } +} +//*************************************************************************************************************** + + + diff --git a/chimeraslayer.h b/chimeraslayer.h new file mode 100644 index 0000000..617ba7e --- /dev/null +++ b/chimeraslayer.h @@ -0,0 +1,55 @@ +#ifndef CHIMERASLAYER_H +#define CHIMERASLAYER_H + +/* + * chimeraslayer.h + * Mothur + * + * Created by westcott on 9/25/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + + +#include "chimera.h" +#include "maligner.h" +#include "slayer.h" + +/***********************************************************************/ +//This class was modeled after the chimeraSlayer written by the Broad Institute +/***********************************************************************/ + + +class ChimeraSlayer : public Chimera { + + public: + ChimeraSlayer(string, string); + ~ChimeraSlayer(); + + void getChimeras(); + void print(ostream&); + + void setCons(string){}; + void setQuantiles(string q) {}; + + + private: + DeCalculator* decalc; + Maligner* maligner; + Slayer* slayer; + vector lines; + vector querySeqs; + vector templateSeqs; + + string fastafile, templateFile; + + Sequence* getSequence(string); //find sequence from name + + +}; + +/************************************************************************/ + +#endif + + diff --git a/decalc.cpp b/decalc.cpp index bfaae00..177636c 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -8,6 +8,10 @@ */ #include "decalc.h" +#include "chimera.h" +#include "dist.h" +#include "eachgapdist.h" + //*************************************************************************************************************** void DeCalculator::setMask(string m) { @@ -675,5 +679,144 @@ float DeCalculator::getCoef(vector obs, vector qav) { } } //*************************************************************************************************************** +vector DeCalculator::findClosest(Sequence* querySeq, vector db, int numWanted) { + try { + + vector seqsMatches; + vector dists; + + Dist* distcalculator = new eachGapDist(); + + Sequence query = *(querySeq); + + for(int j = 0; j < db.size(); j++){ + + Sequence temp = *(db[j]); + + distcalculator->calcDist(query, temp); + float dist = distcalculator->getDist(); + + SeqDist subject; + subject.seq = db[j]; + subject.dist = dist; + + dists.push_back(subject); + } + + delete distcalculator; + + sort(dists.begin(), dists.end(), compareSeqDist); + + for (int i = 0; i < numWanted; i++) { + Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + seqsMatches.push_back(temp); + } + + return seqsMatches; + } + catch(exception& e) { + errorOut(e, "DeCalculator", "findClosest"); + exit(1); + } +} +/***************************************************************************************************************/ +void DeCalculator::trimSeqs(Sequence* query, vector topMatches) { + try { + + int frontPos = 0; //should contain first position in all seqs that is not a gap character + int rearPos = query->getAligned().length(); + + //********find first position in topMatches that is a non gap character***********// + //find first position all query seqs that is a non gap character + for (int i = 0; i < topMatches.size(); i++) { + + string aligned = topMatches[i]->getAligned(); + int pos = 0; + + //find first spot in this seq + for (int j = 0; j < aligned.length(); j++) { + if (isalpha(aligned[j])) { + pos = j; + break; + } + } + + //save this spot if it is the farthest + if (pos > frontPos) { frontPos = pos; } + } + + + string aligned = query->getAligned(); + int pos = 0; + + //find first position in query that is a non gap character + for (int j = 0; j < aligned.length(); j++) { + if (isalpha(aligned[j])) { + pos = j; + break; + } + } + + //save this spot if it is the farthest + if (pos > frontPos) { frontPos = pos; } + + + //********find last position in topMatches that is a non gap character***********// + for (int i = 0; i < topMatches.size(); i++) { + + string aligned = topMatches[i]->getAligned(); + int pos = aligned.length(); + + //find first spot in this seq + for (int j = aligned.length()-1; j >= 0; j--) { + if (isalpha(aligned[j])) { + pos = j; + break; + } + } + + //save this spot if it is the farthest + if (pos < rearPos) { rearPos = pos; } + } + + + aligned = query->getAligned(); + pos = aligned.length(); + + //find last position in query that is a non gap character + for (int j = aligned.length()-1; j >= 0; j--) { + if (isalpha(aligned[j])) { + pos = j; + break; + } + } + + //save this spot if it is the farthest + if (pos < rearPos) { rearPos = pos; } + + //check to make sure that is not whole seq + if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); } +//cout << "front = " << frontPos << " rear = " << rearPos << endl; + //trim query + string newAligned = query->getAligned(); + newAligned = newAligned.substr(frontPos, (rearPos-frontPos)); + query->setAligned(newAligned); + + //trim topMatches + for (int i = 0; i < topMatches.size(); i++) { + newAligned = topMatches[i]->getAligned(); + newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); + topMatches[i]->setAligned(newAligned); + } + + } + catch(exception& e) { + errorOut(e, "DeCalculator", "trimSequences"); + exit(1); + } + +} +//*************************************************************************************************************** + diff --git a/decalc.h b/decalc.h index dca562a..5f0a820 100644 --- a/decalc.h +++ b/decalc.h @@ -39,11 +39,13 @@ class DeCalculator { DeCalculator() {}; ~DeCalculator() {}; + vector findClosest(Sequence*, vector, int); //takes querySeq, a reference db and numWanted - returns indexes to closest seqs in db set getPos() { return h; } void setMask(string); void setAlignmentLength(int l) { alignLength = l; } void runMask(Sequence*); void trimSeqs(Sequence*, Sequence*, map&); + void trimSeqs(Sequence*, vector); void removeObviousOutliers(vector< vector >&, int); vector calcFreq(vector, string); vector findWindows(Sequence*, int, int, int&, int); diff --git a/maligner.cpp b/maligner.cpp new file mode 100644 index 0000000..824a2cc --- /dev/null +++ b/maligner.cpp @@ -0,0 +1,459 @@ +/* + * maligner.cpp + * Mothur + * + * Created by westcott on 9/23/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "maligner.h" + +/***********************************************************************/ +Maligner::Maligner(vector temp, int num, int match, int misMatch, float div, int minCov) : + db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minCoverage(minCov) {} +/***********************************************************************/ +string Maligner::getResults(Sequence* q) { + try { + + //make copy so trimming doesn't destroy query from calling class - remember to deallocate + query = new Sequence(q->getName(), q->getAligned()); + + string chimera; + + decalc = new DeCalculator(); + + //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate + refSeqs = decalc->findClosest(query, db, numWanted); + + ofstream out; + string outFile = "parentsOf" + query->getName(); + openOutputFile(outFile, out); + for (int i = 0; i < refSeqs.size(); i++) { refSeqs[i]->printSequence(out); } + out.close(); + + refSeqs = minCoverageFilter(refSeqs); + + if (refSeqs.size() < 2) { + for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + percentIdenticalQueryChimera = 0.0; + return "unknown"; + } + + int chimeraPenalty = computeChimeraPenalty(); + + //trims seqs to first non gap char in all seqs and last non gap char in all seqs + decalc->trimSeqs(query, refSeqs); + + vector temp = refSeqs; + temp.push_back(query); + + verticalFilter(temp); + + vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes + + fillScoreMatrix(matrix, refSeqs, chimeraPenalty); + + vector path = extractHighestPath(matrix); + + vector trace = mapTraceRegionsToAlignment(path, refSeqs); + + if (trace.size() > 1) { chimera = "yes"; } + else { chimera = "no"; } + + int traceStart = path[0].col; + int traceEnd = path[path.size()-1].col; + + string queryInRange = query->getAligned(); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); + + string chimeraSeq = constructChimericSeq(trace, refSeqs); + + percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); + + delete decalc; + + //save output results + for (int i = 0; i < trace.size(); i++) { + int regionStart = trace[i].col; + int regionEnd = trace[i].oldCol; + int seqIndex = trace[i].row; + + results temp; + + temp.parent = refSeqs[seqIndex]->getName(); + temp.regionStart = regionStart; + temp.regionEnd = regionEnd; + + string parentInRange = refSeqs[seqIndex]->getAligned(); + parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); + + temp.queryToParent = computePercentID(queryInRange, parentInRange); + temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); + + string queryInRegion = query->getAligned(); + queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); + + string parentInRegion = refSeqs[seqIndex]->getAligned(); + parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); + + temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); + + outputResults.push_back(temp); + } + + //free memory + delete query; + for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + + return chimera; + } + catch(exception& e) { + errorOut(e, "Maligner", "getResults"); + exit(1); + } +} +/***********************************************************************/ +//removes top matches that do not have minimum coverage with query. +vector Maligner::minCoverageFilter(vector ref){ + try { + vector newRefs; + + string queryAligned = query->getAligned(); + + for (int i = 0; i < ref.size(); i++) { + + string refAligned = ref[i]->getAligned(); + + int numBases = 0; + int numCovered = 0; + + //calculate coverage + for (int j = 0; j < queryAligned.length(); j++) { + + if (isalpha(queryAligned[j])) { + numBases++; + + if (isalpha(refAligned[j])) { + numCovered++; + } + } + } + + int coverage = ((numCovered/(float)numBases)*100); + + //if coverage above minimum + if (coverage > minCoverage) { + newRefs.push_back(ref[i]); + } + } + + return newRefs; + } + catch(exception& e) { + errorOut(e, "Maligner", "minCoverageFilter"); + exit(1); + } +} +/***********************************************************************/ +// a breakpoint should yield fewer mismatches than this number with respect to the best parent sequence. +int Maligner::computeChimeraPenalty() { + try { + + int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases()); + + int penalty = int(numAllowable + 1) * misMatchPenalty; + + return penalty; + + } + catch(exception& e) { + errorOut(e, "Maligner", "computeChimeraPenalty"); + exit(1); + } +} +/***********************************************************************/ +//this is a vertical filter +void Maligner::verticalFilter(vector seqs) { + try { + vector gaps; gaps.resize(query->getAligned().length(), 0); + + string filterString = (string(query->getAligned().length(), '1')); + + //for each sequence + for (int i = 0; i < seqs.size(); i++) { + + string seqAligned = seqs[i]->getAligned(); + + for (int j = 0; j < seqAligned.length(); j++) { + //if this spot is a gap + if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; } + } + } + + //zero out spot where all sequences have blanks + int numColRemoved = 0; + for(int i = 0; i < seqs[0]->getAligned().length(); i++){ + if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } + } + + //for each sequence + for (int i = 0; i < seqs.size(); i++) { + + string seqAligned = seqs[i]->getAligned(); + string newAligned = ""; + + for (int j = 0; j < seqAligned.length(); j++) { + //if this spot is not a gap + if (filterString[j] == '1') { newAligned += seqAligned[j]; } + } + + seqs[i]->setAligned(newAligned); + } + + + } + catch(exception& e) { + errorOut(e, "Maligner", "verticalFilter"); + exit(1); + } +} +//*************************************************************************************************************** +vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { + try{ + + vector< vector > m; m.resize(rows); + + for (int i = 0; i < m.size(); i++) { + for (int j = 0; j < cols; j++) { + + //initialize each cell + score_struct temp; + temp.prev = -1; + temp.score = -9999999; + temp.col = j; + temp.row = i; + + m[i].push_back(temp); + } + } + + return m; + } + catch(exception& e) { + errorOut(e, "Maligner", "buildScoreMatrix"); + exit(1); + } +} +//*************************************************************************************************************** +void Maligner::fillScoreMatrix(vector >& m, vector seqs, int penalty) { + try{ + + //get matrix dimensions + int numCols = query->getAligned().length(); + int numRows = seqs.size(); + + //initialize first col + string queryAligned = query->getAligned(); + for (int i = 0; i < numRows; i++) { + string subjectAligned = seqs[i]->getAligned(); + + //are you both gaps? + if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) { + m[i][0].score = 0; + }else if (queryAligned[0] == subjectAligned[0]) { + m[i][0].score = matchScore; + }else{ + m[i][0].score = 0; + } + } + + //fill rest of matrix + for (int j = 1; j < numCols; j++) { //iterate through matrix columns + + for (int i = 0; i < numRows; i++) { //iterate through matrix rows + + string subjectAligned = seqs[i]->getAligned(); + + int matchMisMatchScore = 0; + //are you both gaps? + if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) { + //leave the same + }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) { + //leave the same + }else if (queryAligned[j] == subjectAligned[j]) { + matchMisMatchScore = matchScore; + }else if (queryAligned[j] != subjectAligned[j]) { + matchMisMatchScore = misMatchPenalty; + } + + //compute score based on previous columns scores + for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows + + int sumScore = matchMisMatchScore + m[prevIndex][j-1].score; + + //you are not at yourself + if (prevIndex != i) { sumScore += penalty; } + if (sumScore < 0) { sumScore = 0; } + + if (sumScore > m[i][j].score) { + m[i][j].score = sumScore; + m[i][j].prev = prevIndex; + } + } + } + } + + } + catch(exception& e) { + errorOut(e, "Maligner", "fillScoreMatrix"); + exit(1); + } +} +//*************************************************************************************************************** +vector Maligner::extractHighestPath(vector > m) { + try { + + //get matrix dimensions + int numCols = query->getAligned().length(); + int numRows = m.size(); + + + //find highest score scoring matrix + score_struct highestStruct; + int highestScore = 0; + + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + if (m[i][j].score > highestScore) { + highestScore = m[i][j].score; + highestStruct = m[i][j]; + } + } + } + + vector path; + + int rowIndex = highestStruct.row; + int pos = highestStruct.col; + int score = highestStruct.score; + + while (pos >= 0 && score > 0) { + score_struct temp = m[rowIndex][pos]; + score = temp.score; + + if (score > 0) { path.push_back(temp); } + + rowIndex = temp.prev; + pos--; + } + + reverse(path.begin(), path.end()); + + return path; + + } + catch(exception& e) { + errorOut(e, "Maligner", "extractHighestPath"); + exit(1); + } +} +//*************************************************************************************************************** +vector Maligner::mapTraceRegionsToAlignment(vector path, vector seqs) { + try { + vector trace; + + int region_index = path[0].row; + int region_start = path[0].col; + + for (int i = 1; i < path.size(); i++) { + + int next_region_index = path[i].row; + + if (next_region_index != region_index) { + + // add trace region + int col_index = path[i].col; + trace_struct temp; + temp.col = region_start; + temp.oldCol = col_index-1; + temp.row = region_index; + + trace.push_back(temp); + + region_index = path[i].row; + region_start = col_index; + } + } + + // get last one + trace_struct temp; + temp.col = region_start; + temp.oldCol = path[path.size()-1].col; + temp.row = region_index; + trace.push_back(temp); + + return trace; + + } + catch(exception& e) { + errorOut(e, "Maligner", "mapTraceRegionsToAlignment"); + exit(1); + } +} +//*************************************************************************************************************** +string Maligner::constructChimericSeq(vector trace, vector seqs) { + try { + string chimera = ""; + + for (int i = 0; i < trace.size(); i++) { + string seqAlign = seqs[trace[i].row]->getAligned(); + seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); + chimera += seqAlign; + } + + return chimera; + } + catch(exception& e) { + errorOut(e, "Maligner", "constructChimericSeq"); + exit(1); + } +} +//*************************************************************************************************************** +float Maligner::computePercentID(string queryAlign, string chimera) { + try { + + if (queryAlign.length() != chimera.length()) { + mothurOut("Error, alignment strings are of different lengths: "); mothurOutEndLine(); + mothurOut(toString(queryAlign.length())); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine(); + mothurOut(toString(chimera.length())); mothurOutEndLine(); + return -1.0; + } + + + int numBases = 0; + int numIdentical = 0; + + for (int i = 0; i < queryAlign.length(); i++) { + if ((isalpha(queryAlign[i])) || (isalpha(chimera[i]))) { + numBases++; + if (queryAlign[i] == chimera[i]) { + numIdentical++; + } + } + } + + if (numBases == 0) { return 0; } + + float percentIdentical = (numIdentical/(float)numBases) * 100; + + return percentIdentical; + + } + catch(exception& e) { + errorOut(e, "Maligner", "computePercentID"); + exit(1); + } +} +//*************************************************************************************************************** + diff --git a/maligner.h b/maligner.h new file mode 100644 index 0000000..6277e73 --- /dev/null +++ b/maligner.h @@ -0,0 +1,77 @@ +#ifndef MALIGNER_H +#define MALIGNER_H +/* + * maligner.h + * Mothur + * + * Created by westcott on 9/23/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "decalc.h" + +/***********************************************************************/ +//This class was modeled after the chimeraMaligner written by the Broad Institute +/***********************************************************************/ +struct score_struct { + int prev; + int score; + int row; + int col; +}; +/***********************************************************************/ +struct trace_struct { + int col; + int oldCol; + int row; +}; +/***********************************************************************/ +struct results { + int regionStart; + int regionEnd; + string parent; + float queryToParent; + float queryToParentLocal; + float divR; +}; + +/**********************************************************************/ +class Maligner { + + public: + + Maligner(vector, int, int, int, float, int); + ~Maligner() {}; + + string getResults(Sequence*); + float getPercentID() { return percentIdenticalQueryChimera; } + vector getOutput() { return outputResults; } + + + private: + DeCalculator* decalc; + Sequence* query; + vector refSeqs; + vector db; + int numWanted, matchScore, misMatchPenalty, minCoverage; + float minDivR, percentIdenticalQueryChimera; + vector outputResults; + + vector minCoverageFilter(vector); //removes top matches that do not have minimum coverage with query. + int computeChimeraPenalty(); + void verticalFilter(vector); + + vector< vector > buildScoreMatrix(int, int); + void fillScoreMatrix(vector >&, vector, int); + vector extractHighestPath(vector >); + vector mapTraceRegionsToAlignment(vector, vector); + string constructChimericSeq(vector, vector); + float computePercentID(string, string); + +}; + +/***********************************************************************/ + +#endif + diff --git a/mothur.cpp b/mothur.cpp index 99ff5c6..4618927 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -43,7 +43,7 @@ int main(int argc, char *argv[]){ //header mothurOut("mothur v.1.6.0"); mothurOutEndLine(); - mothurOut("Last updated: 9/15/2009"); + mothurOut("Last updated: 10/6/2009"); mothurOutEndLine(); mothurOutEndLine(); mothurOut("by"); diff --git a/pintail.cpp b/pintail.cpp index 7ae3ff6..e92e19f 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -439,28 +439,9 @@ vector Pintail::findPairs(int start, int end) { vector seqsMatches; for(int i = start; i < end; i++){ - - float smallest = 10000.0; - Sequence query = *(querySeqs[i]); - Sequence* match; - - for(int j = 0; j < templateSeqs.size(); j++){ - - Sequence temp = *(templateSeqs[j]); - - distcalculator->calcDist(query, temp); - float dist = distcalculator->getDist(); - - if (dist < smallest) { - match = templateSeqs[j]; - smallest = dist; - } - } - - //make copy so trimSeqs doesn't overtrim - Sequence* copy = new Sequence(match->getName(), match->getAligned()); - seqsMatches.push_back(copy); + vector copy = decalc->findClosest(querySeqs[i], templateSeqs, 1); + seqsMatches.push_back(copy[0]); } return seqsMatches; diff --git a/sharedcommand.cpp b/sharedcommand.cpp index c3f99d2..2ce9bbc 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -66,6 +66,9 @@ int SharedCommand::execute(){ if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) { mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); + out.close(); + remove(filename.c_str()); //remove blank shared file you made + createMisMatchFile(); //delete memory diff --git a/slayer.cpp b/slayer.cpp new file mode 100644 index 0000000..d838a2e --- /dev/null +++ b/slayer.cpp @@ -0,0 +1,133 @@ +/* + * slayer.cpp + * Mothur + * + * Created by westcott on 9/25/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "slayer.h" + +/***********************************************************************/ +Slayer::Slayer(int win, int increment, int parentThreshold, float div) : + windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {} +/***********************************************************************/ +void Slayer::getResults(Sequence* query, vector refSeqs) { + try { + + for (int i = 0; i < refSeqs.size(); i++) { + + for (int j = i+1; j < refSeqs.size(); j++) { + + //make copies of query and each parent because runBellerophon removes gaps and messes them up + Sequence* q = new Sequence(query->getName(), query->getAligned()); + Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned()); + Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned()); + + vector divs = runBellerophon(q, leftParent, rightParent); + + delete q; + delete leftParent; + delete rightParent; + } + + } + + + } + catch(exception& e) { + errorOut(e, "Slayer", "getResults"); + exit(1); + } +} +/***********************************************************************/ +vector Slayer::runBellerophon(Sequence* query, Sequence* parentA, Sequence* parentB) { + try{ + + vector data; + + //vertical filter + vector temp; + verticalFilter(temp); + + int alignLength = query->getAligned().length(); + + + + + return data; + + } + catch(exception& e) { + errorOut(e, "Slayer", "runBellerophon"); + exit(1); + } +} +/***********************************************************************/ +float Slayer::computePercentID(string queryFrag, string parent, int left, int right) { + try { + int total = 0; + int matches = 0; + + for (int i = left; i <= right; i++) { + total++; + if (queryFrag[i] == parent[i]) { + matches++; + } + } + + float percentID =( matches/(float)total) * 100; + + return percentID; + } + catch(exception& e) { + errorOut(e, "Slayer", "computePercentID"); + exit(1); + } +} +/***********************************************************************/ +//this is a vertical filter +void Slayer::verticalFilter(vector seqs) { + try { + vector gaps; gaps.resize(seqs[0]->getAligned().length(), 0); + + string filterString = (string(seqs[0]->getAligned().length(), '1')); + + //for each sequence + for (int i = 0; i < seqs.size(); i++) { + + string seqAligned = seqs[i]->getAligned(); + + for (int j = 0; j < seqAligned.length(); j++) { + //if this spot is a gap + if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; } + } + } + + //zero out spot where all sequences have blanks + int numColRemoved = 0; + for(int i = 0; i < seqs[0]->getAligned().length(); i++){ + if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } + } + + //for each sequence + for (int i = 0; i < seqs.size(); i++) { + + string seqAligned = seqs[i]->getAligned(); + string newAligned = ""; + + for (int j = 0; j < seqAligned.length(); j++) { + //if this spot is not a gap + if (filterString[j] == '1') { newAligned += seqAligned[j]; } + } + + seqs[i]->setAligned(newAligned); + } + } + catch(exception& e) { + errorOut(e, "Slayer", "verticalFilter"); + exit(1); + } +} +/***********************************************************************/ diff --git a/slayer.h b/slayer.h new file mode 100644 index 0000000..e898989 --- /dev/null +++ b/slayer.h @@ -0,0 +1,58 @@ +#ifndef SLAYER_H +#define SLAYER_H +/* + * slayer.h + * Mothur + * + * Created by westcott on 9/25/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + + +#include "sequence.hpp" + +/***********************************************************************/ +//This class was modeled after the chimeraSlayer written by the Broad Institute +/***********************************************************************/ +struct data_struct { //not right needs work... + int regionStart; + int regionEnd; + string parent; + float queryToParent; + float queryToParentLocal; + float divR; +}; +/***********************************************************************/ + + +class Slayer { + + public: + + Slayer(int, int, int, float); + ~Slayer() {}; + + void getResults(Sequence*, vector); + //float getPercentID() { return percentIdenticalQueryChimera; } + //vector getOutput() { return outputResults; } + + + private: + + int windowSize, windowStep, parentFragmentThreshold; + float divRThreshold; + + void verticalFilter(vector); + float computePercentID(string, string, int, int); + + vector runBellerophon(Sequence*, Sequence*, Sequence*); + + +}; + +/***********************************************************************/ + +#endif + + diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index d25c46a..e15d56b 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -188,9 +188,9 @@ void TreeGroupCommand::help(){ TreeGroupCommand::~TreeGroupCommand(){ if (abort == false) { - if (format == "sharedfile") { delete read; delete input; globaldata->ginput = NULL;} + if (format == "sharedfile") { delete read; delete input; globaldata->ginput = NULL; } else { delete readMatrix; delete matrix; delete list; } - delete tmap; + delete tmap; globaldata->gTreemap = NULL; delete validCalculator; } -- 2.39.2