]> git.donarmstrong.com Git - mothur.git/commitdiff
working on chimera.slayer
authorwestcott <westcott>
Fri, 29 Apr 2011 18:14:49 +0000 (18:14 +0000)
committerwestcott <westcott>
Fri, 29 Apr 2011 18:14:49 +0000 (18:14 +0000)
12 files changed:
blastdb.cpp
chimera.h
chimerarealigner.cpp
chimeraslayer.cpp
chimeraslayer.h
chimeraslayercommand.cpp
decalc.cpp
decalc.h
maligner.cpp
maligner.h
slayer.cpp
slayer.h

index f162cdfd254d084e13ae14f3ff866db4d0805c1e..fdb5456050bcce78b620e9d3d697a334626a54ae 100644 (file)
@@ -187,7 +187,9 @@ void BlastDB::generateDB() {
                //m->mothurOut("Generating the temporary BLAST database...\t"); cout.flush();
                
                path = m->argv;
-               path = path.substr(0, (path.find_last_of('m')));
+               string tempPath = path;
+               for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
+               path = path.substr(0, (tempPath.find_last_of('m')));
        
                string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
                system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
index 58d637ec9698fa33d7c522c27cbc539595d42e5f..3f4eb4ef9f430ed9191f7de78e41df3427c4bec4 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -45,10 +45,10 @@ struct data_struct {
 struct data_results {
        vector<data_struct> results;
        string flag;
-       map<int, int> spotMap;
        Sequence trimQuery;
+       //results malignerResults;
        
-       data_results(vector<data_struct> d, string f, map<int, int> s, Sequence t) : results(d), flag(f), spotMap(s), trimQuery(t) {}
+       data_results(vector<data_struct> d, string f, map<int, int> s, Sequence t) : results(d), flag(f), trimQuery(t) {}
        data_results() {}
 };
 /***********************************************************************/
@@ -137,7 +137,7 @@ class Chimera {
        public:
        
                Chimera(){ m = MothurOut::getInstance(); length = 0; unaligned = false;  }
-               virtual ~Chimera(){     for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } };
+               virtual ~Chimera(){     for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } for (int i = 0; i < filteredTemplateSeqs.size(); i++) { delete filteredTemplateSeqs[i];  } };
                virtual bool getUnaligned()                             {       return unaligned;                       }
                virtual int getLength()                                 {   return length;      }
                virtual vector<Sequence*> readSeqs(string);
@@ -162,6 +162,7 @@ class Chimera {
        protected:
                
                vector<Sequence*> templateSeqs;
+               vector<Sequence*> filteredTemplateSeqs;
                bool filter, unaligned; 
                int length; 
                string seqMask, filterString, outputDir, templateFileName; 
index a07e43392a1d96727d4870154bccef452119755b..48d9bc16a14b3afa8ad7588141cc9ffaaf2e03c2 100644 (file)
@@ -82,7 +82,6 @@ void ChimeraReAligner::reAlign(Sequence* query, vector<results> parents) {
                        //make sure you don't cutoff end of query 
                        if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
                        
-                       //set query to new aligned string
                        query->setAligned(newQuery);
 
                        //free memory
index 7aa7cd4271f0ccbdcb443ae580b7ddd046b255cf..fe63ad0a9696cddc6ea1699bdc77922ef4f055c2 100644 (file)
@@ -98,8 +98,14 @@ int ChimeraSlayer::doPrep() {
                
                        if (m->control_pressed) {  return 0; } 
                
-                       //run filter on template
-                       for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  return 0; }  runFilter(templateSeqs[i]);  }
+                       //run filter on template copying templateSeqs into filteredTemplateSeqs
+                       for (int i = 0; i < templateSeqs.size(); i++) {  
+                               if (m->control_pressed) {  return 0; }
+                               
+                               Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+                               filteredTemplateSeqs.push_back(newSeq);
+                               runFilter(newSeq);  
+                       }
                }
                string  kmerDBNameLeft;
                string  kmerDBNameRight;
@@ -219,7 +225,7 @@ int ChimeraSlayer::doPrep() {
        }
 }
 //***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
+vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
        try {
                
                //when template=self, the query file is sorted from most abundance to least abundant
@@ -236,7 +242,10 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
                        if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
                        
                        //if its am not chimeric add it
-                       if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { userTemplate.push_back(templateSeqs[i]); }
+                       if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
+                               userTemplate.push_back(templateSeqs[i]); 
+                               if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
+                       }
                }
                
                string  kmerDBNameLeft;
@@ -360,15 +369,15 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                                        if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
                                        
                                        if (trimChimera) {  
-                                               int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
-                                               int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+                                               int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
+                                               int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
                                                
                                                string newAligned = trim->getAligned();
 
                                                if (lengthLeft > lengthRight) { //trim right
-                                                       for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                       for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                }else { //trim left
-                                                       for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
+                                                       for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
                                                }
                                                trim->setAligned(newAligned);
                                        }
@@ -434,41 +443,41 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP
                                                                                                
                                                //right side is fine so keep that
                                                if ((leftChimeric) && (!rightChimeric)) {
-                                                       for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                       for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
-                                                       for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                       for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                }else { //both sides are chimeric, keep longest piece
                                                        
-                                                       int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
-                                                       int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
+                                                       int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
+                                                       int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
                                                        
                                                        int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
                                                        int length = lengthLeftLeft;
                                                        if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
                                                        
-                                                       int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
-                                                       int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
+                                                       int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
+                                                       int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
                                                        
                                                        if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
                                                        if (lengthRightRight > length) { longest = 4; }
                                                        
                                                        if (longest == 1) { //leftleft
-                                                               for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else if (longest == 2) { //leftright
                                                                //get rid of leftleft
-                                                               for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
                                                                //get rid of right
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else if (longest == 3) { //rightleft
                                                                //get rid of left
-                                                               for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                               for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                                //get rid of rightright
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else { //rightright
                                                                //get rid of left
-                                                               for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                               for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                                //get rid of rightleft
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
                                                        }
                                                }
                                                        
@@ -554,41 +563,41 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                                                
                                                //right side is fine so keep that
                                                if ((leftChimeric) && (!rightChimeric)) {
-                                                       for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                       for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
-                                                       for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                       for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                }else { //both sides are chimeric, keep longest piece
                                                        
-                                                       int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
-                                                       int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
+                                                       int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
+                                                       int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
                                                        
                                                        int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
                                                        int length = lengthLeftLeft;
                                                        if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
                                                        
-                                                       int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
-                                                       int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
+                                                       int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
+                                                       int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
                                                        
                                                        if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
                                                        if (lengthRightRight > length) { longest = 4; }
                                                        
                                                        if (longest == 1) { //leftleft
-                                                               for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else if (longest == 2) { //leftright
                                                                //get rid of leftleft
-                                                               for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
                                                                //get rid of right
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else if (longest == 3) { //rightleft
                                                                //get rid of left
-                                                               for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                               for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                                //get rid of rightright
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                        }else { //rightright
                                                                //get rid of left
-                                                               for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; } 
+                                                               for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
                                                                //get rid of rightleft
-                                                               for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+                                                               for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
                                                        }
                                                }
                                                
@@ -664,14 +673,14 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                                        delete buf2;
                                        
                                        if (trimChimera) {  
-                                               int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
-                                               int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+                                               int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
+                                               int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
                                                
                                                string newAligned = trim->getAligned();
                                                if (lengthLeft > lengthRight) { //trim right
-                                                       for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+                                                       for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
                                                }else { //trim left
-                                                       for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+                                                       for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
                                                }
                                                trim->setAligned(newAligned);   
                                        }
@@ -719,25 +728,24 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                
                chimeraFlags = "no";
                printResults.flag = "no";
-
-               //filter query
-               spotMap = runFilter(query);     
-               printResults.spotMap = spotMap;
                
                querySeq = query;
                
                //you must create a template
                vector<Sequence*> thisTemplate;
-               if (templateFileName != "self") { thisTemplate = templateSeqs; }
-               else {  thisTemplate = getTemplate(query);  } //fills this template and creates the databases
+               vector<Sequence*> thisFilteredTemplate;
+               if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
+               else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
                
                if (m->control_pressed) {  return 0;  }
                
                if (thisTemplate.size() == 0) {  return 0; } //not chimeric
                
-               //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
-               Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
-               Slayer slayer(window, increment, minSim, divR, iters, minSNP);
+               //moved this out of maligner - 4/29/11
+               vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
+                       
+               Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
+               Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
                
                if (templateFileName == "self") {
                        if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
@@ -752,11 +760,8 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                
                vector<results> Results = maligner.getOutput();
                
-               //cout << query->getName() << endl;
-               //for (int i = 0; i < Results.size(); i++) {
-                       //cout << Results[i].parent << '\t' << Results[i].regionStart << '\t' << Results[i].regionEnd << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd << '\t' << Results[i].queryToParent << '\t' << Results[i].queryToParentLocal << endl;
-               //}
-               //cout << "done\n" << endl;
+               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
+               
                if (chimeraFlag == "yes") {
                        
                        if (realign) {
@@ -809,24 +814,8 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        
                        //put seqs into vector to send to slayer
                        vector<Sequence*> seqsForSlayer;
-                       
                        for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
                        
-                       //mask then send to slayer...
-                       if (seqMask != "") {
-                               decalc->setMask(seqMask);
-                               
-                               //mask querys
-                               decalc->runMask(query);
-                               
-                               //mask parents
-                               for (int k = 0; k < seqsForSlayer.size(); k++) {
-                                       decalc->runMask(seqsForSlayer[k]);
-                               }
-                               
-                               spotMap = decalc->getMaskMap();
-                       }
-                       
                        if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
 
                        //send to slayer
@@ -834,12 +823,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        if (m->control_pressed) {  return 0;  }
                        chimeraResults = slayer.getOutput();
                        
-                       //free memory
-                       for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       
-                       printResults.spotMap = spotMap;
                        printResults.flag = chimeraFlags;
                        printResults.results = chimeraResults;
+                       
+                       //free memory
+                       for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
                }
                
                return 0;
@@ -858,7 +846,7 @@ void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
                out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
                out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
                
-               out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
+               out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
                
        }
        catch(exception& e) {
@@ -877,7 +865,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo
                        out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
                        out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
                
-                       out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
+                       out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
                
                }else if ((!leftChimeric) && (rightChimeric)) {  //print right
                        out << querySeq->getName() << '\t';
@@ -886,7 +874,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo
                        out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
                        out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
                        
-                       out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';                      
+                       out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
                        
                }else  { //print both results
                        if (leftdata.flag == "yes") {
@@ -896,7 +884,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo
                                out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
                                out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
                                
-                               out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
+                               out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
                        }
                        
                        if (rightdata.flag == "yes") {
@@ -908,7 +896,7 @@ void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bo
                                out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
                                out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
                                
-                               out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';                      
+                               out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
                
                        }
                }
@@ -931,7 +919,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo
                        out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
                        out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
                        
-                       out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
+                       out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
                        
                }else if ((!leftChimeric) && (rightChimeric)) {  //print right
                        out += querySeq->getName() + "\t";
@@ -940,7 +928,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo
                        out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
                        out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
                        
-                       out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";                       
+                       out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
                        
                }else  { //print both results
                        
@@ -951,7 +939,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo
                                out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
                                out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
                                
-                               out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
+                               out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
                        }
                        
                        if (rightdata.flag == "yes") {
@@ -962,7 +950,7 @@ string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bo
                                out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
                                out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
                                
-                               out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";                       
+                               out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
                        }
                }
                
@@ -986,7 +974,7 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){
                outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
                outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
                
-               outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
+               outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
                
                return outputString;
        }
@@ -995,5 +983,163 @@ string ChimeraSlayer::getBlock(data_struct data, string flag){
                exit(1);
        }
 }
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
+       try {
+               
+               vector<Sequence*> refSeqs;
+               
+               if (searchMethod == "distance") {
+                       //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
+                       Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
+                       runFilter(newSeq);
+                       refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted);
+               }else if (searchMethod == "blast")  {
+                       refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
+               }else if (searchMethod == "kmer") {
+                       refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
+               }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
+               
+               return refSeqs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
+               exit(1);
+       }
+}
 //***************************************************************************************************************/
+vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+       try {   
+               
+               vector<Sequence*> refResults;
+               
+               //get parts of query
+               string queryUnAligned = q->getUnaligned();
+               string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
+               string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
+               
+               Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
+               Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
+               
+               vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
+               vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
+               
+               vector<int> smaller;
+               vector<int> larger;
+               
+               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
+               else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
+               
+               //merge results         
+               map<int, int> seen;
+               map<int, int>::iterator it;
+               vector<int> mergedResults;
+               for (int i = 0; i < smaller.size(); i++) {
+                       //add left if you havent already
+                       it = seen.find(smaller[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(smaller[i]);
+                               seen[smaller[i]] = smaller[i];
+                       }
+                       
+                       //add right if you havent already
+                       it = seen.find(larger[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(larger[i]);
+                               seen[larger[i]] = larger[i];
+                       }
+               }
+               
+               for (int i = smaller.size(); i < larger.size(); i++) {
+                       //add right if you havent already
+                       it = seen.find(larger[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(larger[i]);
+                               seen[larger[i]] = larger[i];
+                       }
+               }
+               //numWanted = mergedResults.size();
+
+               //cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted <<  endl;            
+               for (int i = 0; i < mergedResults.size(); i++) {
+                       //cout << db[mergedResults[i]]->getName()  << '\t' << mergedResults[i] << endl; 
+                       
+                       if (db[mergedResults[i]]->getName() != q->getName()) { 
+                               Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+                               refResults.push_back(temp);
+                               //cout << db[mergedResults[i]]->getName() << endl;
+                       }
+                       
+                       //cout << mergedResults[i] << endl;
+               }
+               //cout << "done " << q->getName()  << endl;             
+               delete queryRight;
+               delete queryLeft;
+               
+               return refResults;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+       try {   
+               
+               //get parts of query
+               string queryUnAligned = q->getUnaligned();
+               string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
+               string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
+               
+               Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
+               Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+               
+               vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, numWanted);
+               vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, numWanted);
+               
+               //merge results         
+               map<int, int> seen;
+               map<int, int>::iterator it;
+                       vector<int> mergedResults;
+               for (int i = 0; i < tempIndexesLeft.size(); i++) {
+                       //add left if you havent already
+                       it = seen.find(tempIndexesLeft[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesLeft[i]);
+                               seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
+                       }
+                       
+                       //add right if you havent already
+                       it = seen.find(tempIndexesRight[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesRight[i]);
+                               seen[tempIndexesRight[i]] = tempIndexesRight[i];
+                       }
+               }
+               
+               //numWanted = mergedResults.size();
+                       
+               //cout << q->getName() << endl;         
+               vector<Sequence*> refResults;
+               for (int i = 0; i < mergedResults.size(); i++) {
+                       //cout << db[mergedResults[i]]->getName() << endl;      
+                       if (db[mergedResults[i]]->getName() != q->getName()) { 
+                               Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+                               refResults.push_back(temp);
+                       }
+               }
+               //cout << endl;         
+               delete queryRight;
+               delete queryLeft;
+               
+               return refResults;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
 
index ace98ce615b7f8106a2b401db75499be2e55d534..ded0f89b1cb57c88c66777405d1c0d8de9df088d 100644 (file)
@@ -43,12 +43,11 @@ class ChimeraSlayer : public Chimera {
                Sequence* querySeq;
                Sequence trimQuery;
                DeCalculator* decalc;
-               map<int, int>  spotMap;
                Database* databaseRight;
                Database* databaseLeft;
                map<string, int> priority; //for template=self, seqname, seqAligned, abundance
                set<string> chimericSeqs; //for template=self, so we don't add chimeric sequences to the userTemplate set
-               
+       
                vector<data_struct>  chimeraResults;
                data_results printResults;
                string chimeraFlags, searchMethod, fastafile;
@@ -61,7 +60,10 @@ class ChimeraSlayer : public Chimera {
                string getBlock(data_struct, string);
                string getBlock(data_results, data_results, bool, bool, string);
                //int readNameFile(string);
-               vector<Sequence*> getTemplate(Sequence*);
+               vector<Sequence*> getTemplate(Sequence*, vector<Sequence*>&);
+               vector<Sequence*> getRefSeqs(Sequence*, vector<Sequence*>&, vector<Sequence*>&);
+               vector<Sequence*> getBlastSeqs(Sequence*, vector<Sequence*>&, int);
+               vector<Sequence*> getKmerSeqs(Sequence*, vector<Sequence*>&, int);
                
 };
 
index cf219951f08e314f57ebb2e6c27985d3f5409167..11193329c604d13a20d4094a08292954eb2c54bd 100644 (file)
@@ -342,7 +342,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
                        convert(temp, parents); 
                        
-                       temp = validParameter.validFile(parameters, "realign", true);                   if (temp == "not found") { temp = "t"; }
+                       temp = validParameter.validFile(parameters, "realign", false);                  if (temp == "not found") { temp = "t"; }
                        realign = m->isTrue(temp); 
                        
                        temp = validParameter.validFile(parameters, "trim", false);                             if (temp == "not found") { temp = "f"; }
@@ -374,7 +374,6 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
 
 int ChimeraSlayerCommand::execute(){
        try{
-               
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                for (int s = 0; s < fastaFileNames.size(); s++) {
index 52607fcb668dd9d90b903007f6ca978755798ec5..def8097745a576681717c7443340d9af5bb62d81 100644 (file)
@@ -683,9 +683,9 @@ float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
 }
 //***************************************************************************************************************
 //gets closest matches to each end, since chimeras will most likely have different parents on each end
-vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int& numWanted, vector<int>& indexes) {
+vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate, int& numWanted) {
        try {
-               indexes.clear();
+               //indexes.clear();
                
                vector<Sequence*> seqsMatches;  
                
@@ -744,14 +744,14 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                Sequence queryRight(querySeq->getName(), rightQuery);
 //cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl;
 //cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
-               for(int j = 0; j < db.size(); j++){
+               for(int j = 0; j < thisFilteredTemplate.size(); j++){
                        
-                       string dbAligned = db[j]->getAligned();
+                       string dbAligned = thisFilteredTemplate[j]->getAligned();
                        string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence
                        string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot)); //last 1/3 of the sequence
                        
-                       Sequence dbLeft(db[j]->getName(), leftDB);
-                       Sequence dbRight(db[j]->getName(), rightDB);
+                       Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB);
+                       Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB);
 
                        distcalculator->calcDist(queryLeft, dbLeft);
                        float distLeft = distcalculator->getDist();
@@ -780,14 +780,6 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                //sort by smallest distance
                sort(distsRight.begin(), distsRight.end(), compareSeqDist);
                sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);
-//             cout << distsLeft.size() << '\t' << distsRight.size() << endl;
-//             for(int i=0;i<15;i++){
-//                     cout << "left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
-//             }
-//             for(int i=0;i<15;i++){
-//                     cout << "right\t" << db[distsLeft[i].index]->getName() << '\t' << distsRight[i].dist << endl;
-//             }
-               
                
                //merge results         
                map<string, string> seen;
@@ -799,80 +791,36 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                //int lasti = 0;
                for (int i = 0; i < numWanted+1; i++) {
                        //add left if you havent already
-                       it = seen.find(db[distsLeft[i].index]->getName());
+                       it = seen.find(thisTemplate[distsLeft[i].index]->getName());
                        if (it == seen.end()) {  
                                dists.push_back(distsLeft[i]);
-                               seen[db[distsLeft[i].index]->getName()] = db[distsLeft[i].index]->getName();
+                               seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName();
                                lastLeft =  distsLeft[i].dist;
 //                             cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
                        }
 
                        //add right if you havent already
-                       it = seen.find(db[distsRight[i].index]->getName());
+                       it = seen.find(thisTemplate[distsRight[i].index]->getName());
                        if (it == seen.end()) {  
                                dists.push_back(distsRight[i]);
-                               seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName();
+                               seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName();
                                lastRight =  distsRight[i].dist;
 //                             cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
                        }
                        
-                       //if (dists.size() > numWanted) { lasti = i; break; } //you have enough results
                }
                
-//             cout << "lastLeft\t" << lastLeft << endl;
-               
-               //add in sequences with same distance as last sequence added
-       /*      lasti++;
-               int i = lasti;
-               while (i < distsLeft.size()) {  
-                       if (distsLeft[i].dist == lastLeft) {
-                               it = seen.find(db[distsLeft[i].index]->getName());
 
-                               if (it == seen.end()) {  
-//                                     cout << "newLoop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist <<  endl;
-                                       dists.push_back(distsLeft[i]);
-                                       seen[db[distsRight[i].index]->getName()] = db[distsLeft[i].index]->getName();
-//                                     numWanted++; 
-                               }
-                       }
-                       else { break; }
-                       i++;
-               }
-               
-//             cout << "lastRight\t" << lastRight << endl;
-               //add in sequences with same distance as last sequence added
-               i = lasti;
-               while (i < distsRight.size()) {  
-                       if (distsRight[i].dist == lastRight) {
-                               it = seen.find(db[distsRight[i].index]->getName());
-                               
-                               if (it == seen.end()) {  
-//                                     cout << "newLoop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
-                                       dists.push_back(distsRight[i]);
-                                       seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName();
-//                                     numWanted++; 
-                               }
-                       }
-                       else { break; }
-                       i++;
-               }
-*/
-               numWanted = dists.size();
-               
-               if (numWanted > dists.size()) { 
-                       //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); 
-                       numWanted = dists.size();
-               }
+               //numWanted = dists.size();
 
-//cout << numWanted << endl;
-               for (int i = 0; i < numWanted; i++) {
+               //cout << numWanted << endl;
+               for (int i = 0; i < dists.size(); i++) {
 //                     cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
 
-                       if (db[dists[i].index]->getName() != querySeq->getName()) {
-                               Sequence* temp = new Sequence(db[dists[i].index]->getName(), db[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+                       if (thisTemplate[dists[i].index]->getName() != querySeq->getName()) {
+                               Sequence* temp = new Sequence(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
                        
                                seqsMatches.push_back(temp);
-                               indexes.push_back(dists[i].index);
                        }
 
                }
index 2d0d529dfb7136e22894ecc46a0f36be01eef353..34a470fa5f33470db6e69a296bfa21e137f7b7f7 100644 (file)
--- a/decalc.h
+++ b/decalc.h
@@ -39,7 +39,7 @@ class DeCalculator {
                DeCalculator() { m = MothurOut::getInstance(); }
                ~DeCalculator() {};
                
-               vector<Sequence*> findClosest(Sequence*, vector<Sequence*>, int&, vector<int>&);  //takes querySeq, a reference db, numWanted and indexes 
+               vector<Sequence*> findClosest(Sequence*, vector<Sequence*>&, vector<Sequence*>&, int&);  //takes querySeq, a reference db, filteredRefDB, numWanted 
                Sequence* findClosest(Sequence*, vector<Sequence*>);
                set<int> getPos() {  return h;  }
                void setMask(string); 
index c5cc83a4d216b9c5a89eb6f60de8a15e5906d06c..2c2cb4bb30decaed945ba21e7bbdb171c86f817f 100644 (file)
@@ -8,23 +8,14 @@
  */
 
 #include "maligner.h"
-#include "kmerdb.hpp"
-#include "blastdb.hpp"
 
-/***********************************************************************/
-Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int ms, int minCov, string mode, Database* dataLeft, Database* dataRight) :
-               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) { 
-                       
+/***********************************************************************/ //int num, int match, int misMatch, , string mode, Database* dataLeft, Database* dataRight
+Maligner::Maligner(vector<Sequence*> temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { 
+                       //numWanted(num),  , searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight)
                        
                        m = MothurOut::getInstance(); 
                        
-//                     cout << matchScore << '\t' << misMatchPenalty << endl;
-//                     
-//                     matchScore = 1;
-//                     misMatchPenalty = -1;
-                       
-               }
-
+}
 /***********************************************************************/
 string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
        try {
@@ -36,21 +27,14 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
                
                string chimera;
                
-               if (searchMethod == "distance") {
-                       //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
-                       refSeqs = decalc->findClosest(query, db, numWanted, indexes);
-               }else if (searchMethod == "blast")  {
-                       refSeqs = getBlastSeqs(query, numWanted); //fills indexes
-               }else if (searchMethod == "kmer") {
-                       refSeqs = getKmerSeqs(query, numWanted); //fills indexes
-               }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
-               
-               if (m->control_pressed) { return chimera;  }
+               //copy refSeqs so that filter does not effect original
+               for(int i = 0; i < db.size(); i++) {  
+                       Sequence* newSeq = new Sequence(db[i]->getName(), db[i]->getAligned());
+                       refSeqs.push_back(newSeq);
+               }
                
                refSeqs = minCoverageFilter(refSeqs);
                
-               
-               
                if (refSeqs.size() < 2)  { 
                        for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
                        percentIdenticalQueryChimera = 0.0;
@@ -58,7 +42,7 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
                }
                
                int chimeraPenalty = computeChimeraPenalty();
-               //cout << "chimeraPenalty = " << chimeraPenalty << endl;
+               
                //fills outputResults
                chimera = chimeraMaligner(chimeraPenalty, decalc);
                
@@ -127,7 +111,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                        results temp;
                        
                        temp.parent = refSeqs[seqIndex]->getName();
-                       temp.parentAligned = db[indexes[seqIndex]]->getAligned();
+                       temp.parentAligned = db[seqIndex]->getAligned();
                        temp.nastRegionStart = spotMap[regionStart];
                        temp.nastRegionEnd = spotMap[regionEnd];
                        temp.regionStart = regionStart;
@@ -146,7 +130,8 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                        parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1));
                        
                        temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion);
-               
+                       
+                       //cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl;
                        outputResults.push_back(temp);
                }
                
@@ -626,256 +611,3 @@ float Maligner::computePercentID(string queryAlign, string chimera) {
        }
 }
 //***************************************************************************************************************
-vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
-       try {   
-               indexes.clear();
-               vector<Sequence*> refResults;
-                               
-               //get parts of query
-               string queryUnAligned = q->getUnaligned();
-               string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
-               string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
-               
-               Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
-               Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
-
-               vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
-               vector<float> leftScores = databaseLeft->getSearchScores();
-               vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
-               vector<float> rightScores = databaseLeft->getSearchScores();
-
-               //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0))  {  m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; }
-               
-               vector<int> smaller;
-               vector<float> smallerScores;
-               vector<int> larger;
-               vector<float> largerScores;
-               
-               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; smallerScores = rightScores; larger = tempIndexesLeft; largerScores = leftScores; }
-               else { smaller = tempIndexesLeft; smallerScores = leftScores; larger = tempIndexesRight; largerScores = rightScores; } 
-               
-               //for (int i = 0; i < smaller.size(); i++) { cout << "smaller = " << smaller[i] << '\t' << smallerScores[i] << endl; }
-               //cout << endl;
-               //for (int i = 0; i < larger.size(); i++) { cout << "larger = " << larger[i] << '\t' << largerScores[i] << endl; }
-               
-               //merge results         
-               map<int, int> seen;
-               map<int, int>::iterator it;
-               float lastSmaller = smallerScores[0];
-               float lastLarger = largerScores[0];
-               int lasti = 0;
-               vector<int> mergedResults;
-               for (int i = 0; i < smaller.size(); i++) {
-                       //add left if you havent already
-                       it = seen.find(smaller[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(smaller[i]);
-                               seen[smaller[i]] = smaller[i];
-                               lastSmaller = smallerScores[i];
-                       }
-
-                       //add right if you havent already
-                       it = seen.find(larger[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
-                               lastLarger = largerScores[i];
-                       }
-                       
-                       lasti = i;
-                       //if (mergedResults.size() > num) { break; }
-               }
-               
-               //save lasti for smaller ties below
-               /*lasti++;
-               int iSmaller = lasti;
-               
-               if (!(mergedResults.size() > num)) { //if we still need more results.  
-                       for (int i = smaller.size(); i < larger.size(); i++) {
-                               it = seen.find(larger[i]);
-                               if (it == seen.end()) {  
-                                       mergedResults.push_back(larger[i]);
-                                       seen[larger[i]] = larger[i];
-                                       lastLarger = largerScores[i];
-                               }
-                               
-                               lasti = i;
-                               if (mergedResults.size() > num) {  break; }
-                       }
-               }
-               
-               
-               //add in any ties from smaller
-               while (iSmaller < smaller.size()) {
-                       if (smallerScores[iSmaller] == lastSmaller) {
-                               it = seen.find(smaller[iSmaller]);
-                               
-                               if (it == seen.end()) {  
-                                       mergedResults.push_back(smaller[iSmaller]);
-                                       seen[smaller[iSmaller]] = smaller[iSmaller];
-                               }
-                       }
-                       else { break; }
-                       iSmaller++;                     
-               }
-               
-               lasti++;
-               //add in any ties from larger
-               while (lasti < larger.size()) {
-                       if (largerScores[lasti] == lastLarger) { //is it a tie
-                               it = seen.find(larger[lasti]);
-                               
-                               if (it == seen.end()) {  //we haven't already seen it
-                                       mergedResults.push_back(larger[lasti]);
-                                       seen[larger[lasti]] = larger[lasti];
-                               }
-                       }
-                       else { break; }
-                       lasti++;                        
-               }
-               */
-               
-               for (int i = smaller.size(); i < larger.size(); i++) {
-                       //add right if you havent already
-                       it = seen.find(larger[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
-                               lastLarger = largerScores[i];
-                       }
-               }
-               numWanted = mergedResults.size();
-               
-               if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); }
-//cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted <<  endl;           
-               for (int i = 0; i < numWanted; i++) {
-//cout << db[mergedResults[i]]->getName()  << '\t' << mergedResults[i] << endl;        
-                       
-                       if (db[mergedResults[i]]->getName() != q->getName()) { 
-                               Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
-                               refResults.push_back(temp);
-                               indexes.push_back(mergedResults[i]);
-                               //cout << db[mergedResults[i]]->getName() << endl;
-                       }
-                       
-//cout << mergedResults[i] << endl;
-               }
-//cout << "done " << q->getName()  << endl;            
-               delete queryRight;
-               delete queryLeft;
-                       
-               return refResults;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Maligner", "getBlastSeqs");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-vector<Sequence*> Maligner::getKmerSeqs(Sequence* q, int num) {
-       try {   
-               indexes.clear();
-               
-               //get parts of query
-               string queryUnAligned = q->getUnaligned();
-               string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
-               string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
-
-               Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
-               Sequence* queryRight = new Sequence(q->getName(), rightQuery);
-               
-               vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, numWanted);
-               vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, numWanted);
-               vector<float> scoresLeft = databaseLeft->getSearchScores();
-               vector<float> scoresRight = databaseRight->getSearchScores();
-               
-               //merge results         
-               map<int, int> seen;
-               map<int, int>::iterator it;
-               float lastRight = scoresRight[0];
-               float lastLeft = scoresLeft[0];
-               //int lasti = 0;
-               vector<int> mergedResults;
-               for (int i = 0; i < tempIndexesLeft.size(); i++) {
-                       //add left if you havent already
-                       it = seen.find(tempIndexesLeft[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(tempIndexesLeft[i]);
-                               seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
-                               lastLeft = scoresLeft[i];
-                       }
-
-                       //add right if you havent already
-                       it = seen.find(tempIndexesRight[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(tempIndexesRight[i]);
-                               seen[tempIndexesRight[i]] = tempIndexesRight[i];
-                               lastRight = scoresRight[i];
-                       }
-                       
-                       //if (mergedResults.size() > numWanted) { lasti = i; break; } //you have enough results
-               }
-               
-               //add in sequences with same distance as last sequence added
-               /*lasti++;
-               int i = lasti;
-               while (i < tempIndexesLeft.size()) {  
-                       if (scoresLeft[i] == lastLeft) {
-                               it = seen.find(tempIndexesLeft[i]);
-                               
-                               if (it == seen.end()) {  
-                                       mergedResults.push_back(tempIndexesLeft[i]);
-                                       seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
-                               }
-                       }
-                       else { break; }
-                       i++;
-               }
-               
-               //              cout << "lastRight\t" << lastRight << endl;
-               //add in sequences with same distance as last sequence added
-               i = lasti;
-               while (i < tempIndexesRight.size()) {  
-                       if (scoresRight[i] == lastRight) {
-                               it = seen.find(tempIndexesRight[i]);
-                               
-                               if (it == seen.end()) {  
-                                       mergedResults.push_back(tempIndexesRight[i]);
-                                       seen[tempIndexesRight[i]] = tempIndexesRight[i];
-                               }
-                       }
-                       else { break; }
-                       i++;
-               }*/
-               
-               numWanted = mergedResults.size();
-               
-               if (numWanted > mergedResults.size()) { 
-                       //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); 
-                       numWanted = mergedResults.size();
-               }
-               
-               
-//cout << q->getName() << endl;                
-               vector<Sequence*> refResults;
-               for (int i = 0; i < numWanted; i++) {
-//cout << db[mergedResults[i]]->getName() << endl;     
-                       if (db[mergedResults[i]]->getName() != q->getName()) { 
-                               Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
-                               refResults.push_back(temp);
-                               indexes.push_back(mergedResults[i]);
-                       }
-               }
-//cout << endl;                
-               delete queryRight;
-               delete queryLeft;
-               
-               return refResults;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Maligner", "getKmerSeqs");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-
index 6ddd6acebdff359294deda6b4cc1b4c2c5ae788f..3deacfe875eb6ed266dca8514109ea515f035fc9 100644 (file)
@@ -20,7 +20,7 @@ class Maligner {
 
        public:
                
-               Maligner(vector<Sequence*>, int, int, int, float, int, int, string, Database*, Database*);
+               Maligner(vector<Sequence*>, int, int, float, int, int); //int, int, int, , string, Database*, Database*
                ~Maligner() {};
                
                string getResults(Sequence*, DeCalculator*);
@@ -32,14 +32,10 @@ class Maligner {
                Sequence* query;
                vector<Sequence*> refSeqs;
                vector<Sequence*> db;
-               int numWanted, matchScore, misMatchPenalty, minCoverage, minSimilarity;
-               string searchMethod;
+               int minCoverage, minSimilarity, matchScore, misMatchPenalty;
                float minDivR, percentIdenticalQueryChimera;
                vector<results> outputResults;
-               vector<int> indexes;  //stores index into template seqs of the refSeqs, so we can return the whole sequence rather than the trimmed and filtered one
                map<int, int> spotMap;
-               Database* databaseLeft;
-               Database* databaseRight;
                
                vector<Sequence*> minCoverageFilter(vector<Sequence*>);  //removes top matches that do not have minimum coverage with query.
                int computeChimeraPenalty();
@@ -53,8 +49,6 @@ class Maligner {
                string constructAntiChimericSeq(vector<trace_struct>, vector<Sequence*>);
                float computePercentID(string, string);
                string chimeraMaligner(int, DeCalculator*);
-               vector<Sequence*> getBlastSeqs(Sequence*, int);
-               vector<Sequence*> getKmerSeqs(Sequence*, int);
                MothurOut* m;
                
 };
index 8575faf4312450c78cc8db249259b036edb91e0d..9e5a61eaccfe04fdb079e105939bdb7d5be560a1 100644 (file)
 #include "slayer.h"
 
 /***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i, int snp) :
-               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i), percentSNPSample(snp){ m = MothurOut::getInstance(); }
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i, int snp, int mi) :
+               minBS(mi), windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i), percentSNPSample(snp){ m = MothurOut::getInstance(); }
 /***********************************************************************/
 string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        try {
                vector<data_struct> all; all.clear();
+               myQuery = *query;
+               /*cout << "here" << endl;       
+               query->setName("S000381746"); query->setAligned("...............................................................................................................................................A-C-GC--TGG-C--G-GC-A-GG--C----C-T--AACACA-T-GC-A-AGT-CGA-G-CG----------G-CAG-CG-G---------------------------GA-GG-A-AG----------------------------------------------------CTT-G----------------------------------------------------------------------------------CTT-CCTC----------------G-CC--G--GC--G--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AT-GT-C-T-G-GG---G-A--T-CT-G--C-C-CGA--TG-G------------------------------------------------------------------A-GG----GGG-AT-AA-CCA-------------------------C-T-G-----------------------GAA-A---CGG-TGG-CTAA-TA---CC-G--C-AT-A----------A--------------------C-------------------------------------GT-C-----------------------------------------------------------------------------------------------------------------------G-CA-A--------------------------------------------------------------------------------------------------------------------------------------G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-T-G-GG-----G--GA-C--C--------------------------------------------------------------------------------------------------------------------TTC-G----------------------------------------------------------------------------------------------------------------------G-G--CC-TC--A---C-A--------------C----C-A---T-CG-G---AT---G-A-----A-CCC-AGA--T-GGG--A------TT--A--G-CT-A----G---TAGG-T-G-GG-G-T----AAT-GG-C-T-C-ACCT--A-GG-C-G--A-CG-A------------TCC-C-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AC-C-AG-CCAC-A-CTGGA--A-C-TG-A-GA-C-AC-G-G-TCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GC-GC-A----A-G-CC-T-GA-TG-CA-GCCA-TGCC-G-CG-T---G-T-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CAC--------TT-TC-A-G--C-GAG----GA-G--G---AA-GGTG---GTGA-GC----T--T--AA-T---A----------CG-CTCAT-CAA-TT-GA-CG-TT-A-C-TC-G-CA-G---------AA-----------GAAGC-ACC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGC-AC--G-CA-G-G-C-G------------G--T-TT-G-T-T-AA----G-T-C-A---G-ATG-TG-A-AA-TC--CC-CGA-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CT-T-G-GG-AA-C----T-G-C-A-T-T--------T--GA-A-A-C-T-G-GCA--A-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-C-----T-CG--TA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-G-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TC-T-GGA--GG-A-AT-A-CC-GG--T--G--GC-GAA-G--G-C---G----G--C-C-CCCTG------G-AC-GA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-GG--T-G-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CCG-T-AAA--C-GATG-TC--GA-TT---------T-GG--A--G-G-TT-G-TG-C--C--------------------------------------------------------------------------------------CTT-GA--------------------------------------------------------------------------------------------------------------------------------------------------G-G-C-GT--G-G-C-T-TC-C------GG--A----GC-TAA--CG-C-G-T--T--AA-AT--C----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGT-T--AAA-ACTC-AAA---------TGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-TACTC-TT-G-AC-A-T-C--------------CAG-A-G-------------A-AC-T-T-T--CC--A-GA-G-A-T--G-G-A--T-T-G-G--T-G-----CC-------------------------------------T--TC-G------------------------------------------GG----A----A---CT-CTG---A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TT-G--TGA-AA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TC--C-TTTG--T-T-G-C-C---AG-C-G--G--T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TCG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G----C----C-G------------G----G---A-A--CT---------------C-A-A-A-G-GA-G--AC-T-G-CCA--G-T------------------------------------G-A---TAA----------------------------------A-C-T-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-G-C-C-CTT----AC-G--AG-T-A-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGTA-T-A--C-AAA-GA-GA--------------------------------------------------------------------------------------------------A-G-C-G-A--ACCT-G-C--G---------------------------------------A-GG-G-C-----------A--A-G-CG---G----------A--CCT-C------A-T-AAAGT-AC-G-T-C-G-TAG-TCC--------GGA-T-TGGAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTCC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-T----AGA-TC-A-G--A------AT--GCT-AC-G-GT-G-AAT-ACGT-T-CCCGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TGG-G-TT-GC-AAA--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------T-TC-G------------------------------------------------------------------------------------------------------GG-A--GG-G--C---GC-TTA--CC--ACT-T----T-GT..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................");
+               refSeqs.resize(2);
+               refSeqs[1]->setName("S000381740"); refSeqs[1]->setAligned("...............................................................................................................................................a-c-gc--tgg-c--g-gc-a-gg--c----c-t--aacaca-t-gc-a-agt-cga-g-cg----------g-tag-ca-c----------------------------agga-g-ag----------------------------------------------------ctt-g----------------------------------------------------------------------------------ctc-tctg----------------g-gt--g--ac--g--ag-c-gg-c-gg-a--c-------------ggg-tgagt-a--at-gt-c-t-g-gg---a-a--a-ct-g--c-c-tga--tg-g------------------------------------------------------------------a-gg----ggg-at-aa-cta-------------------------c-t-g-----------------------gaa-a---cgg-tag-ctaa-ta---cc-g--c-at-a----------a--------------------c-------------------------------------gt-c-----------------------------------------------------------------------------------------------------------------------t-ac-g--------------------------------------------------------------------------------------------------------------------------------------g-a-c---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------caaa--g-t-g-gg-----g--ga-c--c--------------------------------------------------------------------------------------------------------------------ttc-g----------------------------------------------------------------------------------------------------------------------g-g--cc-tc--a---c-g--------------c----c-a---t-ca-g---at---g-t-----g-ccc-aga--t-ggg--a------tt--a--g-ct-a----g---tagg-t-g-gg-g-t----aat-gg-c-t-c-acct--a-gg-c-g--a-cg-a------------tcc-c-t------ag-ct-g-g-tct-g-ag----a--gg-at--g-ac-c-ag-ccac-a-ctgga--a-c-tg-a-ga-c-ac-g-g-tccaga-ctcc-tac-g--g-g-a-g-gc-a-gc-a-g-tg---gg-g-a-ata-ttgca-c-aa-t-gg--gc-gc-a----a-g-cc-t-ga-tg-ca-gcca-tgcc-g-cg-t---g-t-g--t--ga-a-g--a--a-g-g-cc-----tt-cg---------g-g-t-t-g-t--a---aa-g-cac--------tt-tc-a-g--c-gag----ga-g--g---aa-gggc---gatg-tc----t--t--aa-t---a----c-----gg-c-agc-gca-tt-ga-cg-tt-a-c-tc-g-ca-g---------aa-----------gaagc-acc-gg-c-taa---c--t-ccgt--gcca--g-c---a--gccg---c-gg--ta-at--ac---gg-ag-ggt-gca-a-g-cg-ttaa-t-cgg-aa-tt-a--c-t--gggc-gta----aa-gcgc-ac--g-ca-g-g-c-g------------g--t-tt-g-t-t-aa----g-t-c-a---g-atg-tg-a-aa-tc--cc-cgc-g--------------------------------------------------------------------ct-t-aa-------------------------------------------------------------------------cg-t-g-gg-aa-c----t-g-c-a-t-t--------t--ga-a-a-c-t-g-gca--a-g-c---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------t-a-g-a-g-t-c-----t-cg--ta-g-a------------g-gg-g-gg-t----ag--aatt-cca-g-gt--gt-a-gcg-gtgaaa-tg-cgt-agag-a-tc-t-gga--gg-a-at-a-cc-gg--t--g--gc-gaa-g--g-c---g----g--c-c-ccctg------g-ac-ga--------------------------------------------------------------ag-a-c-t--ga--cg-----ct-ca-gg--t-g-cga--aa-g-c--------------g-tggg-gag-c-a-aaca--gg-atta-g-ata-c-----cc-t-g-gta-g-t----c-ca--c-g-ctg-t-aaa--c-gatg-tc--ga-tt---------t-gg--a--g-g-tt-g-tg-c--c--------------------------------------------------------------------------------------ctt-ga--------------------------------------------------------------------------------------------------------------------------------------------------g-g-c-gt--g-g-c-t-tc-c------gg--a----gc-taa--cg-c-g-t--t--aa-at--c----g-acc-gcc-t-g-gg-gag-ta---cgg-----c-c--g-c-a-a-ggt-t--aaa-actc-aaa---------tgaa-ttg-acggg-g-g-cccg----c-a--c-a-a-gcg-gt-g--g--ag-ca-t--gt-ggt-tt-aatt-c-g-atg-caac-g-cg-a-ag-a-a-cc-tt-a-cc-tactc-tt-g-ac-a-t-c--------------cag-a-g-------------a-ac-t-t-t--cc--a-ga-g-a-t--g-g-a--t-t-g-g--t-g-----cc-------------------------------------t--tc-g------------------------------------------gg----a----a---ct-ctg---a--ga---------------------------------------------------c-a-g-g-t-gctg-ca-tgg-ct--gtc-gtc-a-gc-tc---g-tg-tt-g--tga-aa-tgt-t-gg-g-tt-aa-gt-cccgc-aa--------c-gag-cgc-a-acc-c-t-ta--tc--c-tttg--t-t-g-c-c---ag-c-g--a--t-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------tcg------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------g----t----c-g------------g----g---a-a--ct---------------c-a-a-a-g-ga-g--ac-t-g-ccg--g-t------------------------------------g-a---taa----------------------------------a-c-c-g--g-a-gg-a--agg-t--gggg-a-tgac-gtc--aagt-c---atc-a-t-g-g-c-c-ctt----ac-g--ag-t-a-gg-gc-ta-cac-acgtg-c--ta--caatg---g-cgta-t-a--c-aaa-ga-ga--------------------------------------------------------------------------------------------------a-g-c-g-a--a-ctcg-c--g---------------------------------------a-ga-g-c-----------a--a-g-cg---g----------a--cct-c------a-t-aaagt-ac-g-t-c-g-tag-tcc--------gga-t-tggag-tc--t-gcaa-ct-c-------------------------------------------------------------------------------------------------g-actcc-a-t-g-aa-g-tc-ggaat-cg-c-ta--g-ta-at-c-g-t----aga-tc-a-g--a------at--gct-ac-g-gt-g-aat-acgt-t-cccgggcct-tgta----cacaccg-ccc-gtc-----a---ca--cca-tg-gg-a--g---tgg-g-tt-gc-aaa--a-gaa------g--t-agg-ta-g-c-t-t-aa-c-c-------------------------------------------------------------t-tc-g------------------------------------------------------------------------------------------------------gg-a--gg-g--c---gc-tta--cc--act-t----t-gtg-at-tca------------------------t.........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................");
+               refSeqs[0]->setName("7000004131500404"); refSeqs[0]->setAligned(".........................................................................................................................................AT-TGAA-C-GC--TGG-C--G-GC-A-GG--C----C-T--AACACA-T-GC-A-AGT-CGA-G-CG----------G-CAG-CG-G----------------------------AAAG-A-AG----------------------------------------------------CTT-G---------------------------------------------------------------------------------ACTT-CTTT----------------G-CC--G--GC--G--AG-C-GG-C-GG-A--C-------------GGG-TGAGT-A--AT-GT-C-T-G-GG---G-A--T-CT-G--C-C-CGA--TG-G------------------------------------------------------------------A-GG----GGG-AT-AA-CTA-------------------------C-T-G-----------------------GAA-A---CGG-TAG-CTAA-TA---CC-G--C-AT-A----------A--------------------C-------------------------------------GT-C-----------------------------------------------------------------------------------------------------------------------G-CA-A--------------------------------------------------------------------------------------------------------------------------------------G-A-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CAAA--G-T-G-GG-----G--GA-C--C--------------------------------------------------------------------------------------------------------------------TTC-G----------------------------------------------------------------------------------------------------------------------G-G--CC-TC--A---C-A--------------C----C-A---T-CG-G---AT---G-A-----A-CCC-AGA--T-GGG--A------TT--A--G-CT-A----G---TAGG-T-G-GG-G-T----AAT-GG-C-T-C-ACCT--A-GG-C-G--A-CG-A------------TCC-C-T------AG-CT-G-G-TCT-G-AG----A--GG-AT--G-AC-C-AG-CCAC-A-CTGGA--A-C-TG-A-GA-C-AC-G-G-TCCAGA-CTCC-TAC-G--G-G-A-G-GC-A-GC-A-G-TG---GG-G-A-ATA-TTGCA-C-AA-T-GG--GG-GA-A----A-C-CC-T-GA-TG-CA-GCCA-TGCC-G-CG-T---G-T-G--T--GA-A-G--A--A-G-G-CC-----TT-CG---------G-G-T-T-G-T--A---AA-G-CAC--------TT-TC-A-G--C-GGG----GA-A--G---AA-GGCG---TT-A-GC---GT--T--AA-C---A----G-----CG-C-TAT-CGA-TT-GA-CG-TT-A-C-CT-G-CA-G---------AA-----------GAAGC-ACC-GG-C-TAA---C--T-CCGT--GCCA--G-C---A--GCCG---C-GG--TA-AT--AC---GG-AG-GGT-GCA-A-G-CG-TTAA-T-CGG-AA-TT-A--C-T--GGGC-GTA----AA-GCGT-AC--G-CA-G-G-C-G------------G--T-CT-G-T-T-AA----G-T-C-A---G-ATG-TG-A-AA-TC--CC-CGG-G--------------------------------------------------------------------CT-T-AA-------------------------------------------------------------------------CC-T-G-GG-AA-C----T-G-C-A-T-T--------T--GA-A-A-C-T-G-GCA--G-G-C---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------T-A-G-A-G-T-C-----T-CG--TA-G-A------------G-GG-G-GG-T----AG--AATT-CCA-G-GT--GT-A-GCG-GTGAAA-TG-CGT-AGAG-A-TC-T-GGA--GG-A-AT-A-CC-GG--T--G--GC-GAA-G--G-C---G----G--C-C-CCCTG------G-AC-GA--------------------------------------------------------------AG-A-C-T--GA--CG-----CT-CA-GG--T-A-CGA--AA-G-C--------------G-TGGG-GAG-C-A-AACA--GG-ATTA-G-ATA-C-----CC-T-G-GTA-G-T----C-CA--C-G-CTG-T-AAA--C-GATG-TC--GA-TT---------T-GA--A--G-G-TT-G-TG-G--C--------------------------------------------------------------------------------------CTT-GA--------------------------------------------------------------------------------------------------------------------------------------------------G-C-T-GT--G-G-C-T-TT-C------GG--A----GC-TAA--CG-C-G-T--T--AA-AT--C----G-ACC-GCC-T-G-GG-GAG-TA---CGG-----C-C--G-C-A-A-GGT-T--AAA-ACTC-AAA---------TGAA-TTG-ACGGG-G-G-CCCG----C-A--C-A-A-GCG-GT-G--G--AG-CA-T--GT-GGT-TT-AATT-C-G-ATG-CAAC-G-CG-A-AG-A-A-CC-TT-A-CC-TACTC-TT-G-AC-A-T-C--------------CAG-A-G-------------A-AC-T-T-G--GC--A-GA-G-A-T--G-C-C--T-T-G-G--T-G-----CC-------------------------------------T--TC-G------------------------------------------GG----A----G---CT-CTG---A--GA---------------------------------------------------C-A-G-G-T-GCTG-CA-TGG-CT--GTC-GTC-A-GC-TC---G-TG-TT-G--TGA-AA-TGT-T-GG-G-TT-AA-GT-CCCGC-AA--------C-GAG-CGC-A-ACC-C-T-TA--TC--C-TTTG--T-T-G-C-C---AG-C-G--A--T-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------G----T----C-G------------G----G---A-A--CT---------------C-A-A-A-G-GA-G--AC-T-G-CCG--G-T------------------------------------G-A---TAA----------------------------------A-C-C-G--G-A-GG-A--AGG-T--GGGG-A-TGAC-GTC--AAGT-C---ATC-A-T-G-G-C-C-CTT----AC-G--AG-T-A-GG-GC-TA-CAC-ACGTG-C--TA--CAATG---G-CGCA-T-A--C-AAA-GA-GA--------------------------------------------------------------------------------------------------A-G-C-G-A--T-CTCG-C--G---------------------------------------A-GA-G-T-----------C--A-G-CG---G----------A--CCT-C------A-C-AAAGT-GC-G-T-C-G-TAG-TCC--------GGA-T-TGGAG-TC--T-GCAA-CT-C-------------------------------------------------------------------------------------------------G-ACTCC-A-T-G-AA-G-TC-GGAAT-CG-C-TA--G-TA-AT-C-G-T----GGA-TC-A-G--A------AT--GCC-AC-G-GT-G-AAT-ACGT-T-CCTGGGCCT-TGTA----CACACCG-CCC-GTC-----A---CA--CCA-TG-GG-A--G---TGG-G-TT-GC-AAA--A-GAA------G--T-AGG-TA-G-C-T-T-AA-C-C-------------------------------------------------------------T-TC-G------------------------------------------------------------------------------------------------------GG-A--GG-G--C---GC-TTA--CC--ACT-T----T-GTG-AT-TCA------------------------TG--ACT-GGGG-TG-AAG-TCGTAACAA-GGTAA-CCGT-AGGGGAA-CCT......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................");          
+               for (int i = 0; i < refSeqs.size(); i++) {
+                       string newAligned = "";
+                       for (int j = 0; j < refSeqs[i]->getAligned().length(); j++) {
+                               newAligned += toupper(refSeqs[i]->getAligned()[j]);
+                       }
+                       refSeqs[i]->setAligned(newAligned);
+               }*/
                
                for (int i = 0; i < refSeqs.size(); i++) {
                
@@ -30,7 +43,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
 
                                map<int, int> spots;  //map from spot in original sequence to spot in filtered sequence for query and both parents
                                vector<data_struct> divs = runBellerophon(q, leftParent, rightParent, spots);
-                               
+                               //cout << divs.size() << endl;
                                if (m->control_pressed) { 
                                        delete q;
                                        delete leftParent;
@@ -71,7 +84,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                //if (abs(logR) < 1 ) {  
                                                        
                                                        float BS_A, BS_B;
-                                                       bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
+                                                       bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B, iters);
                                                        
                                                        if (m->control_pressed) { 
                                                                delete q;
@@ -289,7 +302,7 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
        }
 }
 /***********************************************************************/
-int Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, float& BSB) {
+int Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, float& BSB, int numIters) {
        try {
 
                srand((unsigned)time( NULL ));
@@ -300,7 +313,7 @@ int Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, flo
                int numLeft = max(1, int(left.size() * percentSNPSample/(float)100 + 0.5));
                int numRight = max(1, int(right.size() * percentSNPSample/(float)100 + 0.5));
 
-               for (int i = 0; i < iters; i++) {
+               for (int i = 0; i < numIters; i++) {
                        //random sampling with replacement.
                
                        if (m->control_pressed) { return 0;  }
@@ -368,6 +381,15 @@ int Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, flo
                BSA = (float) count_A / (float) iters * 100;
                BSB = (float) count_B / (float) iters * 100;
 //cout << "bsa = " << BSA << " bsb = " << BSB << endl;
+               
+               //run borderline bootstrap values longer
+               //if (numIters < 1000) {
+                       //are you within 10 points of min bootstrap value cutoff
+               //      if (((abs((double)(BSA - minBS))) <= 5) || ((abs((double)(BSB - minBS))) <= 5)) {
+               //              m->mothurOut("extending bootstrap for " + myQuery.getName()); m->mothurOutEndLine();
+               //              bootstrapSNPS(left, right, BSA, BSB, 1000);
+               //      }
+               //}
 
                return 0;
        
index 875e4a9d93c044339abfb71b8b3d0b009be0995a..107b02785fffb395662508dc8875e49013103b47 100644 (file)
--- a/slayer.h
+++ b/slayer.h
@@ -29,7 +29,7 @@ class Slayer {
 
        public:
                
-               Slayer(int, int, int, float, int, int);
+               Slayer(int, int, int, float, int, int, int);
                ~Slayer() {};
                
                string getResults(Sequence*, vector<Sequence*>);
@@ -38,17 +38,18 @@ class Slayer {
                                
        private:
                
-               int windowSize, windowStep, parentFragmentThreshold, iters, percentSNPSample;
+               int windowSize, windowStep, parentFragmentThreshold, iters, percentSNPSample, minBS;
                float divRThreshold; 
                vector<data_struct>  outputResults;
                vector< map<int, int> > baseSpots;
+               Sequence myQuery;
                
                map<int, int> verticalFilter(vector<Sequence*>);
                float computePercentID(string, string, int, int);
                
                vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*, map<int, int>&);
                vector<snps> getSNPS(string, string, string, int, int);
-               int bootstrapSNPS(vector<snps>, vector<snps>, float&, float&);
+               int bootstrapSNPS(vector<snps>, vector<snps>, float&, float&, int);
                float snpQA(vector<snps>);
                float snpQB(vector<snps>);
                float snpAB(vector<snps>);