]> git.donarmstrong.com Git - mothur.git/commitdiff
started adding chimeraslayer method and fixed minor bug in treegroupscommand deconstr...
authorwestcott <westcott>
Mon, 28 Sep 2009 14:31:48 +0000 (14:31 +0000)
committerwestcott <westcott>
Mon, 28 Sep 2009 14:31:48 +0000 (14:31 +0000)
18 files changed:
Mothur.xcodeproj/project.pbxproj
ccode.cpp
chimera.cpp
chimera.h
chimeraseqscommand.cpp
chimeraseqscommand.h
chimeraslayer.cpp [new file with mode: 0644]
chimeraslayer.h [new file with mode: 0644]
decalc.cpp
decalc.h
maligner.cpp [new file with mode: 0644]
maligner.h [new file with mode: 0644]
mothur.cpp
pintail.cpp
sharedcommand.cpp
slayer.cpp [new file with mode: 0644]
slayer.h [new file with mode: 0644]
treegroupscommand.cpp

index 59291a3d80849c433545b63e555c8f0af52084c9..6dbf3194aaaeb4d257f81822fc114dfcffa279d0 100644 (file)
                A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
                A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
+               A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
+               A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
                A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A782106913F900688F62 /* getsharedotucommand.cpp */; };
+               A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A823106A9AD700688F62 /* maligner.cpp */; };
                EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; };
                EB1216E50F61ACFB004A865F /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216E40F61ACFB004A865F /* bstick.cpp */; };
                EB1217230F61C9AC004A865F /* sharedkstest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1217220F61C9AC004A865F /* sharedkstest.cpp */; };
                A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; };
                A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlinecommand.cpp; sourceTree = SOURCE_ROOT; };
                A70B53A90F4CD7AD0064797E /* getlinecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlinecommand.h; sourceTree = SOURCE_ROOT; };
-               A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = "<group>"; };
-               A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = "<group>"; };
+               A70DECD71063D8B40057C03C /* secondarystructurecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = secondarystructurecommand.h; sourceTree = SOURCE_ROOT; };
+               A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; };
                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
                A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
-               A7E4A781106913F900688F62 /* getsharedotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getsharedotucommand.h; sourceTree = "<group>"; };
-               A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = "<group>"; };
+               A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
+               A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayer.cpp; sourceTree = SOURCE_ROOT; };
+               A7B0450C106CEEC90046FC83 /* slayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slayer.h; sourceTree = SOURCE_ROOT; };
+               A7B0450D106CEEC90046FC83 /* slayer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slayer.cpp; sourceTree = SOURCE_ROOT; };
+               A7E4A781106913F900688F62 /* getsharedotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getsharedotucommand.h; sourceTree = SOURCE_ROOT; };
+               A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = SOURCE_ROOT; };
+               A7E4A822106A9AD700688F62 /* maligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maligner.h; sourceTree = SOURCE_ROOT; };
+               A7E4A823106A9AD700688F62 /* maligner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maligner.cpp; sourceTree = SOURCE_ROOT; };
                C6859E8B029090EE04C91782 /* Mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Mothur.1; sourceTree = "<group>"; };
                EB1216860F619B83004A865F /* bergerparker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bergerparker.h; sourceTree = SOURCE_ROOT; };
                EB1216870F619B83004A865F /* bergerparker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bergerparker.cpp; sourceTree = SOURCE_ROOT; };
                                A75B887B104C16860083C454 /* ccode.cpp */,
                                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */,
                                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */,
+                               A7B04491106CC3E60046FC83 /* chimeraslayer.h */,
+                               A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */,
                                372A55531017922B00C5194B /* decalc.h */,
                                372A55541017922B00C5194B /* decalc.cpp */,
+                               A7E4A822106A9AD700688F62 /* maligner.h */,
+                               A7E4A823106A9AD700688F62 /* maligner.cpp */,
                                374F301110063B6F00E97C66 /* pintail.h */,
                                374F301210063B6F00E97C66 /* pintail.cpp */,
+                               A7B0450C106CEEC90046FC83 /* slayer.h */,
+                               A7B0450D106CEEC90046FC83 /* slayer.cpp */,
                        );
                        name = chimera;
                        sourceTree = "<group>";
                                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */,
                                A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */,
                                A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */,
+                               A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */,
+                               A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */,
+                               A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 0f1dd50aba5fec78ddd3aff3667533fcdef500b4..a43a5a10e9e1aa72eb0e26f858de55e5b507afc0 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
@@ -1480,7 +1480,7 @@ void Ccode::createProcessesVariances() {
                        
                        //find the averages for the query 
                        for (int i = 0; i < querySeqs.size(); i++) {
-                               findVarianceQuery(i);  //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
+                               findVarianceQuery(i);  //fills v arQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
                        }
 #endif         
        }
index 6ec24d155263e86951ed600ac59cbbe05c932581..8851b755a09a0b030418e7881d09caedb4753584 100644 (file)
@@ -10,7 +10,7 @@
 #include "chimera.h"
 
 //***************************************************************************************************************
-//this is a vertical filter
+//this is a vertical soft filter
 void Chimera::createFilter(vector<Sequence*> seqs) {
        try {
                
@@ -40,6 +40,7 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                        }
                }
                
+               //zero out spot where all sequences have blanks
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
                for(int i = 0;i < seqs[0]->getAligned().length(); i++){
@@ -48,6 +49,7 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                        else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) {      filterString[i] = '0';  numColRemoved++;  }
                        //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
                }
+       
 //cout << "filter = " << filterString << endl; 
 
                mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  mothurOutEndLine();
index 5df383ddcc56e62e853babb4f23e0978ce76d9c1..3efe0e4b63095042de78f3938118c7ab1d642656 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -71,6 +71,12 @@ class Chimera {
                virtual void setKmerSize(int k)                 {       kmerSize = k;           }
                virtual void setSVG(int s)                              {       svg = s;                        }
                virtual void setName(string n)                  {       name = n;                       }
+               virtual void setMatch(int m)                    {       match = m;                      }
+               virtual void setMisMatch(int m)                 {       misMatch = m;           }
+               virtual void setDivR(float d)                   {       divR = d;                       }
+               virtual void setParents(int p)                  {       parents = p;            }
+               virtual void setMinSim(int s)                   {       minSim = s;                     }
+
                
                virtual void setCons(string){};
                virtual void setQuantiles(string){};
@@ -88,7 +94,8 @@ class Chimera {
        protected:
                
                bool filter, correction, svg;
-               int processors, window, increment, numWanted, kmerSize;
+               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents;
+               float divR;
                string seqMask, quanfile, filterString, name;
                        
 
index bd0ad956aababd99e7e926cfd7a305c20890f428..925de63c17bf44011e9bb29e1e4b9a7a980fd3ff 100644 (file)
@@ -12,6 +12,7 @@
 #include "pintail.h"
 #include "ccode.h"
 #include "chimeracheckrdp.h"
+#include "chimeraslayer.h"
 
 
 //***************************************************************************************************************
@@ -25,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -86,15 +87,34 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "svg", false);                              if (temp == "not found") { temp = "F"; }
                        svg = isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       temp = validParameter.validFile(parameters, "window", false);   
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }                     
+                       else if (temp == "not found") { temp = "0"; }
                        convert(temp, window);
-                                       
+                       
+                       temp = validParameter.validFile(parameters, "match", false);                    if (temp == "not found") { temp = "5"; }
+                       convert(temp, match);
+                       
+                       temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
+                       convert(temp, mismatch);
+                       
+                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.0"; }
+                       convert(temp, divR);
+                       
+                       temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
+                       convert(temp, minSimilarity);
+                       
+                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
+                       convert(temp, parents); 
+                        
                        temp = validParameter.validFile(parameters, "increment", false);                
-                       if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
+                       if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
                        else if (temp == "not found") { temp = "25"; }
                        convert(temp, increment);
                        
-                       temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
+                       temp = validParameter.validFile(parameters, "numwanted", false);
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }              
+                       else if (temp == "not found") { temp = "20"; }
                        convert(temp, numwanted);
 
                        
@@ -170,6 +190,7 @@ int ChimeraSeqsCommand::execute(){
                else if (method == "pintail")                   {               chimera = new Pintail(fastafile, templatefile);                         }
                else if (method == "ccode")                             {               chimera = new Ccode(fastafile, templatefile);                           }
                else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, templatefile);         }
+               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer(fastafile, templatefile);           }
                else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
                
                //set user options
@@ -191,6 +212,12 @@ int ChimeraSeqsCommand::execute(){
                chimera->setKmerSize(ksize);
                chimera->setSVG(svg);
                chimera->setName(namefile);
+               chimera->setMatch(match);
+               chimera->setMisMatch(mismatch);
+               chimera->setDivR(divR);
+               chimera->setParents(parents);
+               chimera->setMinSim(minSimilarity);
+               
                                
                //find chimeras
                chimera->getChimeras();
index 98504b0e1155f6d2254c4e89f1602381d7a2fd8e..865a84d378d0429a77e833aa5b1db7eca097a83f 100644 (file)
@@ -30,7 +30,8 @@ private:
        bool abort;
        string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile;
        bool filter, correction, svg;
-       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize;
+       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity;
+       float divR;
        Chimera* chimera;
        
        
diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp
new file mode 100644 (file)
index 0000000..293ee80
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ *  chimeraslayer.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/25/09.
+ *  Copyright 2009 Pschloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeraslayer.h"
+
+//***************************************************************************************************************
+ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
+//***************************************************************************************************************
+
+ChimeraSlayer::~ChimeraSlayer() {
+       try {
+               for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
+               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
+               exit(1);
+       }
+}      
+//***************************************************************************************************************
+void ChimeraSlayer::print(ostream& out) {
+       try {
+               
+               mothurOutEndLine();
+               
+                               
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "print");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+void ChimeraSlayer::getChimeras() {
+       try {
+               
+               //read in query sequences and subject sequences
+               mothurOut("Reading sequences and template file... "); cout.flush();
+               querySeqs = readSeqs(fastafile);
+               templateSeqs = readSeqs(templateFile);
+               mothurOut("Done."); mothurOutEndLine();
+               
+               int numSeqs = querySeqs.size();
+               
+               //break up file if needed
+               int linesPerProcess = numSeqs / processors ;
+               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       //find breakup of sequences for all times we will Parallelize
+                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
+                       else {
+                               //fill line pairs
+                               for (int i = 0; i < (processors-1); i++) {                      
+                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+                               }
+                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
+                               int i = processors - 1;
+                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
+                       }
+               #else
+                       lines.push_back(new linePair(0, numSeqs));
+               #endif
+               
+               if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
+               
+               //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
+               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
+               slayer = new Slayer(window, increment, minSim, divR);
+               
+               for (int i = 0; i < querySeqs.size(); i++) {
+               
+                       string chimeraFlag = maligner->getResults(querySeqs[i]);
+                       float percentIdentical = maligner->getPercentID();
+                       vector<results> Results = maligner->getOutput();
+                       
+                       cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+                       
+                       for (int j = 0; j < Results.size(); j++) {
+                               cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
+                               
+                       }
+                       
+                       if (chimeraFlag == "yes") {
+                       
+                               //get sequence that were given from maligner results
+                               vector<SeqDist> seqs;
+                               for (int j = 0; j < Results.size(); j++) {
+                                       Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
+                                       
+                                       //seq = NULL if error occurred in getSequence
+                                       if (seq == NULL) {  break;      }
+                                       else {  
+                                               SeqDist member;
+                                               member.seq = seq;
+                                               member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                                               seqs.push_back(member); 
+                                       }
+                               }
+                       
+                               //limit number of parents to explore - default 5
+                               if (Results.size() > parents) {
+                                       //sort by distance
+                                       sort(seqs.begin(), seqs.end(), compareSeqDist);
+                                       //prioritize larger more similiar sequence fragments
+                                       reverse(seqs.begin(), seqs.end());
+                                       
+                                       for (int k = seqs.size()-1; k > (parents-1); k--)  {  
+                                               delete seqs[k].seq;
+                                               seqs.pop_back();        
+                                       }
+                               }
+                               
+                               //put seqs into vector to send to slayer
+                               vector<Sequence*> seqsForSlayer;
+                               for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+                       
+                               //mask then send to slayer...
+                               if (seqMask != "") {
+                                       decalc->setMask(seqMask);
+
+                                       //mask querys
+                                       decalc->runMask(querySeqs[i]);
+                                       
+                                       //mask parents
+                                       for (int k = 0; k < seqsForSlayer.size(); k++) {
+                                               decalc->runMask(seqsForSlayer[k]);
+                                       }
+                                       
+                               }
+                               
+                               //send to slayer
+                               slayer->getResults(querySeqs[i], seqsForSlayer);
+                               
+                               //free memory
+                               for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
+                       }
+                       
+               }       
+               //free memory
+               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
+               
+               if (seqMask != "") {
+                       delete decalc; 
+               }
+
+                       
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "getChimeras");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+Sequence* ChimeraSlayer::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+
+
diff --git a/chimeraslayer.h b/chimeraslayer.h
new file mode 100644 (file)
index 0000000..617ba7e
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef CHIMERASLAYER_H
+#define CHIMERASLAYER_H
+
+/*
+ *  chimeraslayer.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/25/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "chimera.h"
+#include "maligner.h"
+#include "slayer.h"
+
+/***********************************************************************/
+//This class was modeled after the chimeraSlayer written by the Broad Institute
+/***********************************************************************/
+
+
+class ChimeraSlayer : public Chimera {
+       
+       public:
+               ChimeraSlayer(string, string);  
+               ~ChimeraSlayer();
+               
+               void getChimeras();
+               void print(ostream&);
+               
+               void setCons(string){};
+               void setQuantiles(string q) {};
+               
+               
+       private:
+               DeCalculator* decalc;
+               Maligner* maligner;
+               Slayer* slayer;
+               vector<linePair*> lines;
+               vector<Sequence*> querySeqs;
+               vector<Sequence*> templateSeqs;
+                               
+               string fastafile, templateFile;
+               
+               Sequence* getSequence(string);  //find sequence from name
+                       
+                               
+};
+
+/************************************************************************/
+
+#endif
+
+
index bfaae000386c872584952b27b5dcc64f52a785ea..177636ccd1214e5ea713fcaeeb388ceb0c715b33 100644 (file)
@@ -8,6 +8,10 @@
  */
 
 #include "decalc.h"
+#include "chimera.h"
+#include "dist.h"
+#include "eachgapdist.h"
+
 
 //***************************************************************************************************************
 void DeCalculator::setMask(string m) { 
@@ -675,5 +679,144 @@ float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
        }
 }
 //***************************************************************************************************************
+vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
+       try {
+               
+               vector<Sequence*> seqsMatches;  
+               vector<SeqDist> dists;
+               
+               Dist* distcalculator = new eachGapDist();
+               
+               Sequence query = *(querySeq);
+               
+               for(int j = 0; j < db.size(); j++){
+                       
+                       Sequence temp = *(db[j]);
+                       
+                       distcalculator->calcDist(query, temp);
+                       float dist = distcalculator->getDist();
+                       
+                       SeqDist subject;
+                       subject.seq = db[j];
+                       subject.dist = dist;
+                       
+                       dists.push_back(subject);
+               }
+               
+               delete distcalculator;
+               
+               sort(dists.begin(), dists.end(), compareSeqDist);
+               
+               for (int i = 0; i < numWanted; i++) {
+                       Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+                       seqsMatches.push_back(temp);
+               }
+               
+               return seqsMatches;
+       }
+       catch(exception& e) {
+               errorOut(e, "DeCalculator", "findClosest");
+               exit(1);
+       }
+}
+/***************************************************************************************************************/
+void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
+       try {
+               
+               int frontPos = 0;  //should contain first position in all seqs that is not a gap character
+               int rearPos = query->getAligned().length();
+               
+               //********find first position in topMatches that is a non gap character***********//
+               //find first position all query seqs that is a non gap character
+               for (int i = 0; i < topMatches.size(); i++) {
+                       
+                       string aligned = topMatches[i]->getAligned();
+                       int pos = 0;
+                       
+                       //find first spot in this seq
+                       for (int j = 0; j < aligned.length(); j++) {
+                               if (isalpha(aligned[j])) {
+                                       pos = j;
+                                       break;
+                               }
+                       }
+                       
+                       //save this spot if it is the farthest
+                       if (pos > frontPos) { frontPos = pos; }
+               }
+               
+               
+               string aligned = query->getAligned();
+               int pos = 0;
+                       
+               //find first position in query that is a non gap character
+               for (int j = 0; j < aligned.length(); j++) {
+                       if (isalpha(aligned[j])) {
+                               pos = j;
+                               break;
+                       }
+               }
+               
+               //save this spot if it is the farthest
+               if (pos > frontPos) { frontPos = pos; }
+               
+               
+               //********find last position in topMatches that is a non gap character***********//
+               for (int i = 0; i < topMatches.size(); i++) {
+                       
+                       string aligned = topMatches[i]->getAligned();
+                       int pos = aligned.length();
+                       
+                       //find first spot in this seq
+                       for (int j = aligned.length()-1; j >= 0; j--) {
+                               if (isalpha(aligned[j])) {
+                                       pos = j;
+                                       break;
+                               }
+                       }
+                       
+                       //save this spot if it is the farthest
+                       if (pos < rearPos) { rearPos = pos; }
+               }
+               
+               
+               aligned = query->getAligned();
+               pos = aligned.length();
+               
+               //find last position in query that is a non gap character
+               for (int j = aligned.length()-1; j >= 0; j--) {
+                       if (isalpha(aligned[j])) {
+                               pos = j;
+                               break;
+                       }
+               }
+               
+               //save this spot if it is the farthest
+               if (pos < rearPos) { rearPos = pos; }
+
+               //check to make sure that is not whole seq
+               if ((rearPos - frontPos - 1) <= 0) {  mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1);  }
+//cout << "front = " << frontPos << " rear = " << rearPos << endl;             
+               //trim query
+               string newAligned = query->getAligned();
+               newAligned = newAligned.substr(frontPos, (rearPos-frontPos));
+               query->setAligned(newAligned);
+               
+               //trim topMatches
+               for (int i = 0; i < topMatches.size(); i++) {
+                       newAligned = topMatches[i]->getAligned();
+                       newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+                       topMatches[i]->setAligned(newAligned);
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "DeCalculator", "trimSequences");
+               exit(1);
+       }
+
+}
+//***************************************************************************************************************
+
 
 
index dca562a24e549a0a640006b97415e27b804c688a..5f0a82025c557a75c3adc4715526fa17bd5fe5a5 100644 (file)
--- a/decalc.h
+++ b/decalc.h
@@ -39,11 +39,13 @@ class DeCalculator {
                DeCalculator() {};
                ~DeCalculator() {};
                
+               vector<Sequence*> findClosest(Sequence*, vector<Sequence*>, int);  //takes querySeq, a reference db and numWanted - returns indexes to closest seqs in db
                set<int> getPos() {  return h;  }
                void setMask(string); 
                void setAlignmentLength(int l) {  alignLength = l;  }
                void runMask(Sequence*);
                void trimSeqs(Sequence*, Sequence*, map<int, int>&);
+               void trimSeqs(Sequence*, vector<Sequence*>);
                void removeObviousOutliers(vector< vector<quanMember> >&, int);
                vector<float> calcFreq(vector<Sequence*>, string);
                vector<int> findWindows(Sequence*, int, int, int&, int);
diff --git a/maligner.cpp b/maligner.cpp
new file mode 100644 (file)
index 0000000..824a2cc
--- /dev/null
@@ -0,0 +1,459 @@
+/*
+ *  maligner.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/23/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "maligner.h"
+
+/***********************************************************************/
+Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int minCov) :
+               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minCoverage(minCov) {}
+/***********************************************************************/
+string Maligner::getResults(Sequence* q) {
+       try {
+               
+               //make copy so trimming doesn't destroy query from calling class - remember to deallocate
+               query = new Sequence(q->getName(), q->getAligned());
+               
+               string chimera;
+       
+               decalc = new DeCalculator();
+               
+               //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
+               refSeqs = decalc->findClosest(query, db, numWanted);
+               
+               ofstream out;
+               string outFile = "parentsOf" + query->getName();
+               openOutputFile(outFile, out);
+               for (int i = 0; i < refSeqs.size(); i++) {   refSeqs[i]->printSequence(out);    }
+               out.close();
+               
+               refSeqs = minCoverageFilter(refSeqs);
+               
+               if (refSeqs.size() < 2)  { 
+                       for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
+                       percentIdenticalQueryChimera = 0.0;
+                       return "unknown"; 
+               }
+               
+               int chimeraPenalty = computeChimeraPenalty();
+       
+               //trims seqs to first non gap char in all seqs and last non gap char in all seqs
+               decalc->trimSeqs(query, refSeqs);
+               
+               vector<Sequence*> temp = refSeqs;
+               temp.push_back(query);
+               
+               verticalFilter(temp);
+
+               vector< vector<score_struct> > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes
+               
+               fillScoreMatrix(matrix, refSeqs, chimeraPenalty);
+               
+               vector<score_struct> path = extractHighestPath(matrix);
+               
+               vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
+               
+               if (trace.size() > 1) {         chimera = "yes";        }
+               else { chimera = "no";  }
+               
+               int traceStart = path[0].col;
+               int traceEnd = path[path.size()-1].col;
+               
+               string queryInRange = query->getAligned();
+               queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
+       
+               string chimeraSeq = constructChimericSeq(trace, refSeqs);
+               
+               percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
+               
+               delete decalc;
+               
+               //save output results
+               for (int i = 0; i < trace.size(); i++) {
+                       int regionStart = trace[i].col;
+                       int regionEnd = trace[i].oldCol;
+                       int seqIndex = trace[i].row;
+                       
+                       results temp;
+                       
+                       temp.parent = refSeqs[seqIndex]->getName();
+                       temp.regionStart = regionStart;
+                       temp.regionEnd = regionEnd;
+                       
+                       string parentInRange = refSeqs[seqIndex]->getAligned();
+                       parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1));
+                       
+                       temp.queryToParent = computePercentID(queryInRange, parentInRange);
+                       temp.divR = (percentIdenticalQueryChimera / temp.queryToParent);
+                       
+                       string queryInRegion = query->getAligned();
+                       queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1));
+                       
+                       string parentInRegion = refSeqs[seqIndex]->getAligned();
+                       parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1));
+                       
+                       temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion);
+               
+                       outputResults.push_back(temp);
+               }
+               
+               //free memory
+               delete query;
+               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
+               
+               return chimera;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "getResults");
+               exit(1);
+       }
+}
+/***********************************************************************/
+//removes top matches that do not have minimum coverage with query.
+vector<Sequence*> Maligner::minCoverageFilter(vector<Sequence*> ref){  
+       try {
+               vector<Sequence*> newRefs;
+               
+               string queryAligned = query->getAligned();
+               
+               for (int i = 0; i < ref.size(); i++) {
+                       
+                       string refAligned = ref[i]->getAligned();
+                       
+                       int numBases = 0;
+                       int numCovered = 0;
+                       
+                       //calculate coverage
+                       for (int j = 0; j < queryAligned.length(); j++) {
+                               
+                               if (isalpha(queryAligned[j])) {
+                                       numBases++;
+                                       
+                                       if (isalpha(refAligned[j])) {
+                                               numCovered++;
+                                       }
+                               }
+                       }
+                       
+                       int coverage = ((numCovered/(float)numBases)*100);
+                       
+                       //if coverage above minimum
+                       if (coverage > minCoverage) {
+                               newRefs.push_back(ref[i]);
+                       }
+               }
+               
+               return newRefs;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "minCoverageFilter");
+               exit(1);
+       }
+}
+/***********************************************************************/
+// a breakpoint should yield fewer mismatches than this number with respect to the best parent sequence.
+int Maligner::computeChimeraPenalty() {
+       try {
+               
+               int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases());
+       
+               int penalty = int(numAllowable + 1) * misMatchPenalty;
+                                                                                        
+               return penalty;
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "computeChimeraPenalty");
+               exit(1);
+       }
+}
+/***********************************************************************/
+//this is a vertical filter
+void Maligner::verticalFilter(vector<Sequence*> seqs) {
+       try {
+               vector<int> gaps;       gaps.resize(query->getAligned().length(), 0);
+               
+               string filterString = (string(query->getAligned().length(), '1'));
+               
+               //for each sequence
+               for (int i = 0; i < seqs.size(); i++) {
+               
+                       string seqAligned = seqs[i]->getAligned();
+                       
+                       for (int j = 0; j < seqAligned.length(); j++) {
+                               //if this spot is a gap
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
+                       }
+               }
+               
+               //zero out spot where all sequences have blanks
+               int numColRemoved = 0;
+               for(int i = 0; i < seqs[0]->getAligned().length(); i++){
+                       if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
+               }
+               
+               //for each sequence
+               for (int i = 0; i < seqs.size(); i++) {
+               
+                       string seqAligned = seqs[i]->getAligned();
+                       string newAligned = "";
+                       
+                       for (int j = 0; j < seqAligned.length(); j++) {
+                               //if this spot is not a gap
+                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                       }
+                       
+                       seqs[i]->setAligned(newAligned);
+               }
+
+       
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "verticalFilter");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+vector< vector<score_struct> > Maligner::buildScoreMatrix(int cols, int rows) {
+       try{
+               
+               vector< vector<score_struct> > m; m.resize(rows);
+               
+               for (int i = 0; i < m.size(); i++) {
+                       for (int j = 0; j < cols; j++) {
+                               
+                               //initialize each cell
+                               score_struct temp;
+                               temp.prev = -1;
+                               temp.score = -9999999;
+                               temp.col = j;
+                               temp.row = i;
+                               
+                               m[i].push_back(temp);
+                       }
+               }
+               
+               return m;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "buildScoreMatrix");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+void Maligner::fillScoreMatrix(vector<vector<score_struct> >& m, vector<Sequence*> seqs, int penalty) {
+       try{
+               
+               //get matrix dimensions
+               int numCols = query->getAligned().length();
+               int numRows = seqs.size();
+               
+               //initialize first col
+               string queryAligned = query->getAligned();
+               for (int i = 0; i < numRows; i++) {
+                       string subjectAligned = seqs[i]->getAligned();
+                       
+                       //are you both gaps?
+                       if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) {
+                               m[i][0].score = 0;
+                       }else if (queryAligned[0] == subjectAligned[0]) {
+                               m[i][0].score = matchScore;
+                       }else{
+                               m[i][0].score = 0;
+                       }
+               }
+               
+               //fill rest of matrix
+               for (int j = 1; j < numCols; j++) {  //iterate through matrix columns
+               
+                       for (int i = 0; i < numRows; i++) {  //iterate through matrix rows
+                               
+                               string subjectAligned = seqs[i]->getAligned();
+                               
+                               int matchMisMatchScore = 0;
+                               //are you both gaps?
+                               if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) {
+                                       //leave the same
+                               }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) {
+                                       //leave the same
+                               }else if (queryAligned[j] == subjectAligned[j]) {
+                                       matchMisMatchScore = matchScore;
+                               }else if (queryAligned[j] != subjectAligned[j]) {
+                                       matchMisMatchScore = misMatchPenalty;
+                               }
+                               
+                               //compute score based on previous columns scores
+                               for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows
+                                       
+                                       int sumScore = matchMisMatchScore + m[prevIndex][j-1].score;
+                                       
+                                       //you are not at yourself
+                                       if (prevIndex != i) {   sumScore += penalty;    }
+                                       if (sumScore < 0)       {       sumScore = 0;                   }
+                                       
+                                       if (sumScore > m[i][j].score) {
+                                               m[i][j].score = sumScore;
+                                               m[i][j].prev = prevIndex;
+                                       }
+                               }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "fillScoreMatrix");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+vector<score_struct> Maligner::extractHighestPath(vector<vector<score_struct> > m) {
+       try {
+       
+               //get matrix dimensions
+               int numCols = query->getAligned().length();
+               int numRows = m.size();
+       
+       
+               //find highest score scoring matrix
+               score_struct highestStruct;
+               int highestScore = 0;
+               
+               for (int i = 0; i < numRows; i++) {
+                       for (int j = 0; j < numCols; j++) {
+                               if (m[i][j].score > highestScore) {
+                                       highestScore = m[i][j].score;
+                                       highestStruct = m[i][j];
+                               }
+                       }
+               }
+                               
+               vector<score_struct> path;
+               
+               int rowIndex = highestStruct.row;
+               int pos = highestStruct.col;
+               int score = highestStruct.score;
+               
+               while (pos >= 0 && score > 0) {
+                       score_struct temp = m[rowIndex][pos];
+                       score = temp.score;
+                       
+                       if (score > 0) {        path.push_back(temp);   }
+                       
+                       rowIndex = temp.prev;
+                       pos--;
+               }
+
+               reverse(path.begin(), path.end());
+       
+               return path;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "extractHighestPath");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
+       try {
+               vector<trace_struct> trace;
+               
+               int region_index = path[0].row;
+               int region_start = path[0].col;
+       
+               for (int i = 1; i < path.size(); i++) {
+               
+                       int next_region_index = path[i].row;
+                       
+                       if (next_region_index != region_index) {
+                               
+                               // add trace region
+                               int col_index = path[i].col;
+                               trace_struct temp;
+                               temp.col = region_start;
+                               temp.oldCol = col_index-1;
+                               temp.row = region_index;
+                               
+                               trace.push_back(temp);
+                                                       
+                               region_index = path[i].row;
+                               region_start = col_index;
+                       }
+               }
+       
+               // get last one
+               trace_struct temp;
+               temp.col = region_start;
+               temp.oldCol = path[path.size()-1].col;
+               temp.row = region_index;
+               trace.push_back(temp);
+
+               return trace;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "mapTraceRegionsToAlignment");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+string Maligner::constructChimericSeq(vector<trace_struct> trace, vector<Sequence*> seqs) {
+       try {
+               string chimera = "";
+               
+               for (int i = 0; i < trace.size(); i++) {
+                       string seqAlign = seqs[trace[i].row]->getAligned();
+                       seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1));
+                       chimera += seqAlign;
+               }
+                       
+               return chimera;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "constructChimericSeq");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+float Maligner::computePercentID(string queryAlign, string chimera) {
+       try {
+       
+               if (queryAlign.length() != chimera.length()) {
+                       mothurOut("Error, alignment strings are of different lengths: "); mothurOutEndLine();
+                       mothurOut(toString(queryAlign.length())); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine();
+                       mothurOut(toString(chimera.length())); mothurOutEndLine();
+                       return -1.0;
+               }
+
+       
+               int numBases = 0;
+               int numIdentical = 0;
+       
+               for (int i = 0; i < queryAlign.length(); i++) {
+                       if ((isalpha(queryAlign[i])) || (isalpha(chimera[i])))  {
+                               numBases++;             
+                               if (queryAlign[i] == chimera[i]) {
+                                       numIdentical++;
+                               }
+                       }
+               }
+       
+               if (numBases == 0) { return 0; }
+       
+               float percentIdentical = (numIdentical/(float)numBases) * 100;
+
+               return percentIdentical;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "computePercentID");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
diff --git a/maligner.h b/maligner.h
new file mode 100644 (file)
index 0000000..6277e73
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef MALIGNER_H
+#define MALIGNER_H
+/*
+ *  maligner.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/23/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+#include "decalc.h"
+
+/***********************************************************************/
+//This class was modeled after the chimeraMaligner written by the Broad Institute
+/***********************************************************************/
+struct score_struct {
+       int prev;
+       int score;
+       int row;
+       int col;
+};
+/***********************************************************************/
+struct trace_struct {
+       int col;
+       int oldCol;
+       int row;
+};
+/***********************************************************************/
+struct results {
+       int regionStart;
+       int regionEnd;
+       string parent;
+       float queryToParent;
+       float queryToParentLocal;
+       float divR;
+};
+
+/**********************************************************************/
+class Maligner {
+
+       public:
+               
+               Maligner(vector<Sequence*>, int, int, int, float, int);
+               ~Maligner() {};
+               
+               string getResults(Sequence*);
+               float getPercentID() {  return percentIdenticalQueryChimera;    }
+               vector<results> getOutput()  {  return outputResults;                   }
+               
+                               
+       private:
+               DeCalculator* decalc;
+               Sequence* query;
+               vector<Sequence*> refSeqs;
+               vector<Sequence*> db;
+               int numWanted, matchScore, misMatchPenalty, minCoverage;
+               float minDivR, percentIdenticalQueryChimera;
+               vector<results> outputResults;
+               
+               vector<Sequence*> minCoverageFilter(vector<Sequence*>);  //removes top matches that do not have minimum coverage with query.
+               int computeChimeraPenalty();
+               void verticalFilter(vector<Sequence*>);
+               
+               vector< vector<score_struct> > buildScoreMatrix(int, int);
+               void fillScoreMatrix(vector<vector<score_struct> >&, vector<Sequence*>, int);
+               vector<score_struct> extractHighestPath(vector<vector<score_struct> >);
+               vector<trace_struct> mapTraceRegionsToAlignment(vector<score_struct>, vector<Sequence*>);
+               string constructChimericSeq(vector<trace_struct>, vector<Sequence*>);
+               float computePercentID(string, string);
+               
+};
+
+/***********************************************************************/
+
+#endif
+
index 99ff5c6ee54e6e25c402bed6c19685f65b01d86f..46189271b470a5b58562a66438db48372214720b 100644 (file)
@@ -43,7 +43,7 @@ int main(int argc, char *argv[]){
                //header
                mothurOut("mothur v.1.6.0");
                mothurOutEndLine();             
-               mothurOut("Last updated: 9/15/2009");
+               mothurOut("Last updated: 10/6/2009");
                mothurOutEndLine();     
                mothurOutEndLine();             
                mothurOut("by");
index 7ae3ff6dbb96224197dff97db4b435880738d87b..e92e19f519fb31ec9833736e1223f3f76c9281f3 100644 (file)
@@ -439,28 +439,9 @@ vector<Sequence*> Pintail::findPairs(int start, int end) {
                vector<Sequence*> seqsMatches;  
                
                for(int i = start; i < end; i++){
-               
-                       float smallest = 10000.0;
-                       Sequence query = *(querySeqs[i]);
-                       Sequence* match;
-                       
-                       for(int j = 0; j < templateSeqs.size(); j++){
-                               
-                               Sequence temp = *(templateSeqs[j]);
-                               
-                               distcalculator->calcDist(query, temp);
-                               float dist = distcalculator->getDist();
-                               
-                               if (dist < smallest) { 
-                                       match = templateSeqs[j];
-                                       smallest = dist;
-                               }
-                       }
-                       
-                       //make copy so trimSeqs doesn't overtrim
-                       Sequence* copy = new Sequence(match->getName(), match->getAligned());
                        
-                       seqsMatches.push_back(copy);
+                       vector<Sequence*> copy = decalc->findClosest(querySeqs[i], templateSeqs, 1);
+                       seqsMatches.push_back(copy[0]);
                }
                
                return seqsMatches;
index c3f99d2704bec1fc11f0383a69f015768857aad2..2ce9bbcded2ba51409f9d130489953a3753ea438 100644 (file)
@@ -66,6 +66,9 @@ int SharedCommand::execute(){
                if (SharedList->getNumSeqs() != groupMap->getNumSeqs()) {  
                        mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); mothurOutEndLine(); 
                        
+                       out.close();
+                       remove(filename.c_str()); //remove blank shared file you made
+                       
                        createMisMatchFile();
                        
                        //delete memory
diff --git a/slayer.cpp b/slayer.cpp
new file mode 100644 (file)
index 0000000..d838a2e
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ *  slayer.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/25/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "slayer.h"
+
+/***********************************************************************/
+Slayer::Slayer(int win, int increment, int parentThreshold, float div) :
+               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {}
+/***********************************************************************/
+void Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
+       try {
+               
+               for (int i = 0; i < refSeqs.size(); i++) {
+               
+                       for (int j = i+1; j < refSeqs.size(); j++) {
+                       
+                               //make copies of query and each parent because runBellerophon removes gaps and messes them up
+                               Sequence* q = new Sequence(query->getName(), query->getAligned());
+                               Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned());
+                               Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned());
+                               
+                               vector<data_struct> divs = runBellerophon(q, leftParent, rightParent);
+                               
+                               delete q;
+                               delete leftParent;
+                               delete rightParent;
+                       }
+                       
+               }
+               
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "getResults");
+               exit(1);
+       }
+}
+/***********************************************************************/
+vector<data_struct> Slayer::runBellerophon(Sequence* query, Sequence* parentA, Sequence* parentB) {
+       try{
+               
+               vector<data_struct> data;
+               
+               //vertical filter
+               vector<Sequence*> temp;
+               verticalFilter(temp);
+               
+               int alignLength = query->getAligned().length();
+               
+               
+               
+               
+               return data;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "runBellerophon");
+               exit(1);
+       }
+}
+/***********************************************************************/
+float Slayer::computePercentID(string queryFrag, string parent, int left, int right) {
+       try {
+               int total = 0;
+               int matches = 0;
+       
+               for (int i = left; i <= right; i++) {
+                       total++;
+                       if (queryFrag[i] == parent[i]) {
+                               matches++;
+                       }
+               }
+
+               float percentID =( matches/(float)total) * 100;
+               
+               return percentID;
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "computePercentID");
+               exit(1);
+       }
+}
+/***********************************************************************/
+//this is a vertical filter
+void Slayer::verticalFilter(vector<Sequence*> seqs) {
+       try {
+               vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
+               
+               string filterString = (string(seqs[0]->getAligned().length(), '1'));
+               
+               //for each sequence
+               for (int i = 0; i < seqs.size(); i++) {
+               
+                       string seqAligned = seqs[i]->getAligned();
+                       
+                       for (int j = 0; j < seqAligned.length(); j++) {
+                               //if this spot is a gap
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
+                       }
+               }
+               
+               //zero out spot where all sequences have blanks
+               int numColRemoved = 0;
+               for(int i = 0; i < seqs[0]->getAligned().length(); i++){
+                       if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
+               }
+               
+               //for each sequence
+               for (int i = 0; i < seqs.size(); i++) {
+               
+                       string seqAligned = seqs[i]->getAligned();
+                       string newAligned = "";
+                       
+                       for (int j = 0; j < seqAligned.length(); j++) {
+                               //if this spot is not a gap
+                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                       }
+                       
+                       seqs[i]->setAligned(newAligned);
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "verticalFilter");
+               exit(1);
+       }
+}
+/***********************************************************************/
diff --git a/slayer.h b/slayer.h
new file mode 100644 (file)
index 0000000..e898989
--- /dev/null
+++ b/slayer.h
@@ -0,0 +1,58 @@
+#ifndef SLAYER_H
+#define SLAYER_H
+/*
+ *  slayer.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/25/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "sequence.hpp"
+
+/***********************************************************************/
+//This class was modeled after the chimeraSlayer written by the Broad Institute
+/***********************************************************************/
+struct data_struct { //not right needs work...
+       int regionStart;
+       int regionEnd;
+       string parent;
+       float queryToParent;
+       float queryToParentLocal;
+       float divR;
+};
+/***********************************************************************/
+
+
+class Slayer {
+
+       public:
+               
+               Slayer(int, int, int, float);
+               ~Slayer() {};
+               
+               void getResults(Sequence*, vector<Sequence*>);
+               //float getPercentID() {        return percentIdenticalQueryChimera;    }
+               //vector<results> getOutput()  {        return outputResults;                   }
+               
+                               
+       private:
+               
+               int windowSize, windowStep, parentFragmentThreshold;
+               float divRThreshold; 
+               
+               void verticalFilter(vector<Sequence*>);
+               float computePercentID(string, string, int, int);
+               
+               vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);
+               
+                               
+};
+
+/***********************************************************************/
+
+#endif
+
+
index d25c46adde9b46ce3044a115d1c816dd552ac146..e15d56ba82268e2b5d8d95b4f5bb562df4c70868 100644 (file)
@@ -188,9 +188,9 @@ void TreeGroupCommand::help(){
 TreeGroupCommand::~TreeGroupCommand(){
        if (abort == false) {
                
-               if (format == "sharedfile") { delete read;  delete input; globaldata->ginput = NULL;}
+               if (format == "sharedfile") { delete read;  delete input; globaldata->ginput = NULL; }
                else { delete readMatrix;  delete matrix; delete list; }
-               delete tmap;
+               delete tmap;  globaldata->gTreemap = NULL;
                delete validCalculator;
        }