From 4b6e3f7b5543822a2cca4fb076ab6af2ce8ca62d Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 7 Jan 2011 15:37:56 +0000 Subject: [PATCH] working on chimera change to add trim feature, fixed bug in print of distance file for unifrac.weighted and unifrac.unweighted that caused distances to be in the wrong order. --- aligncommand.cpp | 2 +- bellerophon.cpp | 5 +- bellerophon.h | 4 +- ccode.cpp | 10 ++-- ccode.h | 4 +- chimera.h | 6 ++- chimerabellerophoncommand.cpp | 6 ++- chimeraccodecommand.cpp | 4 +- chimeracheckcommand.cpp | 2 +- chimeracheckrdp.cpp | 8 +-- chimeracheckrdp.h | 4 +- chimerapintailcommand.cpp | 4 +- chimeraslayer.cpp | 50 +++++++++++++++--- chimeraslayer.h | 11 ++-- chimeraslayercommand.cpp | 97 +++++++++++++++++++++++++---------- chimeraslayercommand.h | 8 +-- classifyseqscommand.cpp | 2 +- clustersplitcommand.cpp | 1 - engine.cpp | 1 - filterseqscommand.cpp | 3 +- pairwiseseqscommand.cpp | 4 +- pintail.cpp | 13 ++--- pintail.h | 4 +- screenseqscommand.cpp | 2 +- unifracunweightedcommand.cpp | 2 +- unifracweightedcommand.cpp | 2 +- 26 files changed, 171 insertions(+), 88 deletions(-) diff --git a/aligncommand.cpp b/aligncommand.cpp index 3940842..b6a2c0b 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -310,7 +310,7 @@ int AlignCommand::execute(){ int start = time(NULL); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; MPIWroteAccnos = false; diff --git a/bellerophon.cpp b/bellerophon.cpp index 6e1e242..f4d04ea 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -75,7 +75,7 @@ Bellerophon::Bellerophon(string name, bool filterSeqs, bool c, int win, int inc } //*************************************************************************************************************** -int Bellerophon::print(ostream& out, ostream& outAcc) { +int Bellerophon::print(ostream& out, ostream& outAcc, string s) { try { int above1 = 0; @@ -130,7 +130,7 @@ int Bellerophon::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -int Bellerophon::print(MPI_File& out, MPI_File& outAcc) { +int Bellerophon::print(MPI_File& out, MPI_File& outAcc, string s) { try { int pid; @@ -535,7 +535,6 @@ int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* spar int Bellerophon::generatePreferences(vector left, vector right, int mid){ try { - //float dme = 0.0; SeqMap::iterator itR; SeqMap::iterator itL; diff --git a/bellerophon.h b/bellerophon.h index 41c1019..08dca2b 100644 --- a/bellerophon.h +++ b/bellerophon.h @@ -28,10 +28,10 @@ class Bellerophon : public Chimera { ~Bellerophon() { delete distCalculator; for (int i = 0; i < seqs.size(); i++) { delete seqs[i]; } seqs.clear(); } int getChimeras(); - int print(ostream&, ostream&); + int print(ostream&, ostream&, string); #ifdef USE_MPI - int print(MPI_File&, MPI_File&); + int print(MPI_File&, MPI_File&, string); #endif private: diff --git a/ccode.cpp b/ccode.cpp index 15e482a..e820184 100644 --- a/ccode.cpp +++ b/ccode.cpp @@ -74,7 +74,7 @@ Ccode::~Ccode() { #endif } //*************************************************************************************************************** -int Ccode::print(ostream& out, ostream& outAcc) { +Sequence* Ccode::print(ostream& out, ostream& outAcc) { try { ofstream out2; @@ -155,7 +155,7 @@ int Ccode::print(ostream& out, ostream& outAcc) { //free memory for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } - return results; + return NULL; } catch(exception& e) { m->errorOut(e, "Ccode", "print"); @@ -164,7 +164,7 @@ int Ccode::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -int Ccode::print(MPI_File& out, MPI_File& outAcc) { +Sequence* Ccode::print(MPI_File& out, MPI_File& outAcc) { try { string outMapString = ""; @@ -263,7 +263,7 @@ int Ccode::print(MPI_File& out, MPI_File& outAcc) { //free memory for (int i = 0; i < closest.size(); i++) { delete closest[i].seq; } - return results; + return NULL; } catch(exception& e) { m->errorOut(e, "Ccode", "print"); @@ -280,6 +280,8 @@ int Ccode::printMapping(string& output) { MPI_File_write_shared(outMap, buf, length, MPI_CHAR, &status); delete buf; + + return 0; } catch(exception& e) { diff --git a/ccode.h b/ccode.h index 91ef845..60feaf4 100644 --- a/ccode.h +++ b/ccode.h @@ -28,10 +28,10 @@ class Ccode : public Chimera { ~Ccode(); int getChimeras(Sequence* query); - int print(ostream&, ostream&); + Sequence* print(ostream&, ostream&); #ifdef USE_MPI - int print(MPI_File&, MPI_File&); + Sequence* print(MPI_File&, MPI_File&); #endif private: diff --git a/chimera.h b/chimera.h index 6ac6191..8eec3cd 100644 --- a/chimera.h +++ b/chimera.h @@ -101,10 +101,12 @@ class Chimera { virtual void printHeader(ostream&){}; virtual int getChimeras(Sequence*){ return 0; } virtual int getChimeras(){ return 0; } - virtual int print(ostream&, ostream&){ return 0; } + virtual Sequence* print(ostream&, ostream&){ return NULL; } + virtual int print(ostream&, ostream&, string){ return 0; } #ifdef USE_MPI - virtual int print(MPI_File&, MPI_File&){ return 0; } + virtual Sequence* print(MPI_File&, MPI_File&){ return 0; } + virtual int print(MPI_File&, MPI_File&, string){ return 0; } #endif diff --git a/chimerabellerophoncommand.cpp b/chimerabellerophoncommand.cpp index 3409615..5d7d6d4 100644 --- a/chimerabellerophoncommand.cpp +++ b/chimerabellerophoncommand.cpp @@ -53,6 +53,7 @@ ChimeraBellerophonCommand::ChimeraBellerophonCommand(){ vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ChimeraBellerophonCommand", "ChimeraBellerophonCommand"); @@ -87,6 +88,7 @@ ChimeraBellerophonCommand::ChimeraBellerophonCommand(string option) { vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -241,7 +243,7 @@ int ChimeraBellerophonCommand::execute(){ MPI_File_open(MPI_COMM_WORLD, FileName, outMode, MPI_INFO_NULL, &outMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); - numSeqs = chimera->print(outMPI, outMPIAccnos); + numSeqs = chimera->print(outMPI, outMPIAccnos, ""); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); @@ -254,7 +256,7 @@ int ChimeraBellerophonCommand::execute(){ ofstream out2; m->openOutputFile(accnosFileName, out2); - numSeqs = chimera->print(out, out2); + numSeqs = chimera->print(out, out2, ""); out.close(); out2.close(); diff --git a/chimeraccodecommand.cpp b/chimeraccodecommand.cpp index 286681a..223ddb2 100644 --- a/chimeraccodecommand.cpp +++ b/chimeraccodecommand.cpp @@ -272,7 +272,7 @@ int ChimeraCcodeCommand::execute(){ #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; @@ -550,7 +550,7 @@ int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File if (m->control_pressed) { delete candidateSeq; return 1; } //print results - bool isChimeric = chimera->print(outMPI, outAccMPI); + chimera->print(outMPI, outAccMPI); } } delete candidateSeq; diff --git a/chimeracheckcommand.cpp b/chimeracheckcommand.cpp index abf6b90..3104fac 100644 --- a/chimeracheckcommand.cpp +++ b/chimeracheckcommand.cpp @@ -301,7 +301,7 @@ int ChimeraCheckCommand::execute(){ #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index 1840c38..16d5f92 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -47,7 +47,7 @@ ChimeraCheckRDP::~ChimeraCheckRDP() { } } //*************************************************************************************************************** -int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { +Sequence* ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { try { m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine(); @@ -72,7 +72,7 @@ int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { } } - return 0; + return NULL; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "print"); @@ -81,7 +81,7 @@ int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { +Sequence* ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { try { cout << "Processing: " << querySeq->getName() << endl; @@ -115,7 +115,7 @@ int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { } } - return 0; + return NULL; } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "print"); diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h index f17c7b1..4805ba1 100644 --- a/chimeracheckrdp.h +++ b/chimeracheckrdp.h @@ -29,10 +29,10 @@ class ChimeraCheckRDP : public Chimera { ~ChimeraCheckRDP(); int getChimeras(Sequence*); - int print(ostream&, ostream&); + Sequence* print(ostream&, ostream&); #ifdef USE_MPI - int print(MPI_File&, MPI_File&); + Sequence* print(MPI_File&, MPI_File&); #endif private: diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index abab7a3..c5e579e 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -382,7 +382,7 @@ int ChimeraPintailCommand::execute(){ templateSeqsLength = chimera->getLength(); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; @@ -631,7 +631,7 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi if (m->control_pressed) { delete candidateSeq; return 1; } //print results - bool isChimeric = chimera->print(outMPI, outAccMPI); + chimera->print(outMPI, outAccMPI); } } delete candidateSeq; diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 78c9f9e..020b8a6 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -13,7 +13,7 @@ #include "blastdb.hpp" //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div, +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { try { fastafile = file; @@ -33,6 +33,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num increment = inc; numWanted = numw; realign = r; + trimChimera = trim; decalc = new DeCalculator(); @@ -44,7 +45,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num } } //*************************************************************************************************************** -ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, +ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); @@ -65,6 +66,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, numWanted = numw; realign = r; includeAbunds = abunds; + trimChimera = trim; //read name file and create nameMapRank readNameFile(name); @@ -414,8 +416,11 @@ void ChimeraSlayer::printHeader(ostream& out) { out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n"; } //*************************************************************************************************************** -int ChimeraSlayer::print(ostream& out, ostream& outAcc) { +Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) { try { + Sequence* trim = NULL; + if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); } + if (chimeraFlags == "yes") { string chimeraFlag = "no"; if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) @@ -427,6 +432,21 @@ int ChimeraSlayer::print(ostream& out, ostream& outAcc) { if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) { m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine(); outAcc << querySeq->getName() << endl; + + if (trimChimera) { + int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart]; + int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart]; + + string newAligned = trim->getAligned(); + + if (lengthLeft > lengthRight) { //trim right + for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //trim left + for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; } + } + trim->setAligned(newAligned); + } + } } @@ -434,7 +454,7 @@ int ChimeraSlayer::print(ostream& out, ostream& outAcc) { out << endl; }else { out << querySeq->getName() << "\tno" << endl; } - return 0; + return trim; } catch(exception& e) { @@ -444,13 +464,16 @@ int ChimeraSlayer::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { +Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { try { MPI_Status status; bool results = false; string outAccString = ""; string outputString = ""; + Sequence* trim = NULL; + if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); } + if (chimeraFlags == "yes") { string chimeraFlag = "no"; if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR) @@ -471,6 +494,19 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status); delete buf2; + + if (trimChimera) { + int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart]; + int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart]; + + string newAligned = trim->getAligned(); + if (lengthLeft > lengthRight) { //trim right + for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; } + }else { //trim left + for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; } + } + trim->setAligned(newAligned); + } } } @@ -498,7 +534,7 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { } - return results; + return trim; } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "print"); @@ -510,6 +546,8 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) { //*************************************************************************************************************** int ChimeraSlayer::getChimeras(Sequence* query) { try { + trimQuery = query; + chimeraFlags = "no"; //filter query diff --git a/chimeraslayer.h b/chimeraslayer.h index fbe880f..1e03d92 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -25,22 +25,23 @@ class ChimeraSlayer : public Chimera { public: - ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); - ChimeraSlayer(string, string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); + ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); + ChimeraSlayer(string, string, bool, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool); ~ChimeraSlayer(); int getChimeras(Sequence*); - int print(ostream&, ostream&); + Sequence* print(ostream&, ostream&); void printHeader(ostream&); int doPrep(); #ifdef USE_MPI - int print(MPI_File&, MPI_File&); + Sequence* print(MPI_File&, MPI_File&); #endif private: Sequence* querySeq; + Sequence* trimQuery; DeCalculator* decalc; map spotMap; Database* databaseRight; @@ -49,7 +50,7 @@ class ChimeraSlayer : public Chimera { vector chimeraResults; string chimeraFlags, searchMethod, fastafile, includeAbunds; - bool realign; + bool realign, trimChimera; int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment; float divR; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index b94d445..39483b1 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -14,7 +14,7 @@ //********************************************************************************************************************** vector ChimeraSlayerCommand::getValidParameters(){ try { - string AlignArray[] = {"fasta", "processors", "name","window", "include","template","numwanted", "ksize", "match","mismatch", + string AlignArray[] = {"fasta", "processors","trim", "name","window", "include","template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); return myArray; @@ -30,6 +30,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(){ vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); @@ -69,7 +70,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta", "processors","name", "include","window", "template","numwanted", "ksize", "match","mismatch", + string Array[] = {"fasta", "processors","name", "include","trim", "window", "template","numwanted", "ksize", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); @@ -87,6 +88,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -267,6 +269,9 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { temp = validParameter.validFile(parameters, "realign", false); if (temp == "not found") { temp = "f"; } realign = m->isTrue(temp); + temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found") { temp = "f"; } + trim = m->isTrue(temp); + search = validParameter.validFile(parameters, "search", false); if (search == "not found") { search = "distance"; } temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "100"; } @@ -293,7 +298,7 @@ void ChimeraSlayerCommand::help(){ m->mothurOut("The chimera.slayer command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n"); m->mothurOut("This command was modeled after the chimeraSlayer written by the Broad Institute.\n"); - m->mothurOut("The chimera.slayer command parameters are fasta, name, template, processors, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign, + m->mothurOut("The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment and numwanted.\n"); //realign, m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n"); m->mothurOut("The name parameter allows you to provide a name file, if you are using template=self. \n"); m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"); @@ -302,6 +307,7 @@ void ChimeraSlayerCommand::help(){ #ifdef USE_MPI m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n"); #endif + m->mothurOut("The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest peice, default=F. \n"); m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"); m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=5.\n"); m->mothurOut("The numwanted parameter allows you to specify how many sequences you would each query sequence compared with, default=15.\n"); @@ -346,10 +352,10 @@ int ChimeraSlayerCommand::execute(){ int start = time(NULL); if (templatefile != "self") { //you want to run slayer with a refernce template - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); }else { if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); }else { m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -371,13 +377,14 @@ int ChimeraSlayerCommand::execute(){ string nameFile = filenames["name"][0]; fastaFileNames[s] = filenames["fasta"][0]; - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); } } if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimeras"; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; + string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta"; if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } @@ -389,7 +396,7 @@ int ChimeraSlayerCommand::execute(){ templateSeqsLength = chimera->getLength(); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; @@ -400,6 +407,7 @@ int ChimeraSlayerCommand::execute(){ MPI_File inMPI; MPI_File outMPI; MPI_File outMPIAccnos; + MPI_File outMPIFasta; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; @@ -409,6 +417,9 @@ int ChimeraSlayerCommand::execute(){ char outAccnosFilename[1024]; strcpy(outAccnosFilename, accnosFileName.c_str()); + + char outFastaFilename[1024]; + strcpy(outFastaFilename, trimFastaFileName.c_str()); char inFileName[1024]; strcpy(inFileName, fastaFileNames[s].c_str()); @@ -416,8 +427,9 @@ int ChimeraSlayerCommand::execute(){ MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); + if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); } - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } if (pid == 0) { //you are the root process m->mothurOutEndLine(); @@ -448,9 +460,9 @@ int ChimeraSlayerCommand::execute(){ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); delete chimera; return 0; } }else{ //you are a child process MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); @@ -463,15 +475,16 @@ int ChimeraSlayerCommand::execute(){ if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part - driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); + driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, outMPIFasta, MPIPos); - if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; } } //close files MPI_File_close(&inMPI); MPI_File_close(&outMPI); - MPI_File_close(&outMPIAccnos); + MPI_File_close(&outMPIAccnos); + if (trim) { MPI_File_close(&outMPIFasta); } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else @@ -491,17 +504,18 @@ int ChimeraSlayerCommand::execute(){ //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ - numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); + numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); - if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } }else{ processIDS.resize(0); - numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); + numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str()); + if (trim) { rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); } //append output files for(int i=1;icontrol_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + if (trim) { + for(int i=1;iappendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName); + remove((trimFastaFileName + toString(processIDS[i]) + ".temp").c_str()); + } + } + + if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } } #else - numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); + numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); - if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } #endif @@ -538,6 +559,7 @@ int ChimeraSlayerCommand::execute(){ outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); + if (trim) { outputNames.push_back(trimFastaFileName); outputTypes["fasta"].push_back(trimFastaFileName); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } @@ -557,7 +579,7 @@ int ChimeraSlayerCommand::execute(){ } //********************************************************************************************************************** -int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){ +int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos, string fasta){ try { ofstream out; m->openOutputFile(outputFName, out); @@ -565,6 +587,9 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f ofstream out2; m->openOutputFile(accnos, out2); + ofstream out3; + if (trim) { m->openOutputFile(fasta, out3); } + ifstream inFASTA; m->openInputFile(filename, inFASTA); @@ -575,7 +600,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f while (!done) { - if (m->control_pressed) { return 1; } + if (m->control_pressed) { out.close(); out2.close(); if (trim) { out3.close(); } inFASTA.close(); return 1; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); @@ -590,7 +615,9 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f if (m->control_pressed) { delete candidateSeq; return 1; } //print results - chimera->print(out, out2); + Sequence* trimmed = chimera->print(out, out2); + + if (trim) { trimmed->printSequence(out3); delete trimmed; } } count++; } @@ -611,6 +638,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f out.close(); out2.close(); + if (trim) { out3.close(); } inFASTA.close(); return count; @@ -622,7 +650,7 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f } //********************************************************************************************************************** #ifdef USE_MPI -int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector& MPIPos){ +int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, MPI_File& outFastaMPI, vector& MPIPos){ try { MPI_Status status; int pid; @@ -656,9 +684,22 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil chimera->getChimeras(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 1; } - //cout << "about to print" << endl; + //print results - bool isChimeric = chimera->print(outMPI, outAccMPI); + Sequence* trimmed = chimera->print(outMPI, outAccMPI); + + if (trim) { + string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n"; + + //write to accnos file + int length = outputString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outputString.c_str(), length); + + MPI_File_write_shared(outFastaMPI, buf2, length, MPI_CHAR, &status); + delete buf2; + } + } } delete candidateSeq; @@ -681,7 +722,7 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil /**************************************************************************************************/ -int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos) { +int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; @@ -695,7 +736,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp"); + num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp", fasta + toString(getpid()) + ".temp"); //pass numSeqs to parent ofstream out; diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 9567fca..0de118f 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -41,14 +41,14 @@ private: vector processIDS; //processid vector lines; - int driver(linePair*, string, string, string); - int createProcesses(string, string, string); + int driver(linePair*, string, string, string, string); + int createProcesses(string, string, string, string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); #endif - bool abort, realign; + bool abort, realign, trim; string fastafile, templatefile, outputDir, search, namefile, includeAbunds; int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength; float divR; diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index bbdf811..0a1ef88 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -461,7 +461,7 @@ int ClassifySeqsCommand::execute(){ for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 0aceb8b..c36e280 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -380,7 +380,6 @@ int ClusterSplitCommand::execute(){ //for each file group figure out which process will complete it //want to divide the load intelligently so the big files are spread between processes - int count = 1; for (int i = 0; i < distName.size(); i++) { int processToAssign = (i+1) % processors; if (processToAssign == 0) { processToAssign = processors; } diff --git a/engine.cpp b/engine.cpp index 2b1c802..ad7a7ad 100644 --- a/engine.cpp +++ b/engine.cpp @@ -531,7 +531,6 @@ bool ScriptEngine::getInput(){ mout->executing = true; #ifdef USE_MPI int pid, numProcesses; - MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); MPI_Comm_size(MPI_COMM_WORLD, &numProcesses); diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index b9436f8..f5c7bd6 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -329,7 +329,7 @@ int FilterSeqsCommand::filterSequences() { string filteredFasta = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])) + "filter.fasta"; #ifdef USE_MPI - int pid, start, end, numSeqsPerProcessor, num; + int pid, numSeqsPerProcessor, num; int tag = 2001; vectorMPIPos; @@ -338,7 +338,6 @@ int FilterSeqsCommand::filterSequences() { MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_File outMPI; - MPI_File tempMPI; MPI_File inMPI; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; diff --git a/pairwiseseqscommand.cpp b/pairwiseseqscommand.cpp index f261dd9..7a1ba4d 100644 --- a/pairwiseseqscommand.cpp +++ b/pairwiseseqscommand.cpp @@ -770,7 +770,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); - int startTime = time(NULL); + string outputString = ""; size = 0; @@ -846,7 +846,7 @@ int PairwiseSeqsCommand::driverMPI(int startLine, int endLine, string file, unsi MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); - int startTime = time(NULL); + string outputString = ""; size = 0; diff --git a/pintail.cpp b/pintail.cpp index 2fa1ca7..b04efea 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -249,8 +249,9 @@ int Pintail::doPrep() { } } //*************************************************************************************************************** -int Pintail::print(ostream& out, ostream& outAcc) { +Sequence* Pintail::print(ostream& out, ostream& outAcc) { try { + int index = ceil(deviation); //is your DE value higher than the 95% @@ -279,7 +280,7 @@ int Pintail::print(ostream& out, ostream& outAcc) { for (int m = 0; m < expectedDistance.size(); m++) { out << expectedDistance[m] << '\t'; } out << endl; - return 0; + return NULL; } catch(exception& e) { @@ -289,9 +290,9 @@ int Pintail::print(ostream& out, ostream& outAcc) { } #ifdef USE_MPI //*************************************************************************************************************** -int Pintail::print(MPI_File& out, MPI_File& outAcc) { +Sequence* Pintail::print(MPI_File& out, MPI_File& outAcc) { try { - bool results = false; + string outputString = ""; int index = ceil(deviation); @@ -319,7 +320,7 @@ int Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); delete buf; - results = true; + return NULL; } outputString += "Observed\t"; @@ -339,7 +340,7 @@ int Pintail::print(MPI_File& out, MPI_File& outAcc) { MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); delete buf2; - return results; + return NULL; } catch(exception& e) { m->errorOut(e, "Pintail", "print"); diff --git a/pintail.h b/pintail.h index 3da5245..6bbc1a3 100644 --- a/pintail.h +++ b/pintail.h @@ -28,13 +28,13 @@ class Pintail : public Chimera { ~Pintail(); int getChimeras(Sequence*); - int print(ostream&, ostream&); + Sequence* print(ostream&, ostream&); void setCons(string c) { consfile = c; } void setQuantiles(string q) { quanfile = q; } #ifdef USE_MPI - int print(MPI_File&, MPI_File&); + Sequence* print(MPI_File&, MPI_File&); #endif private: diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index bdc6658..65b5956 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -267,7 +267,7 @@ int ScreenSeqsCommand::execute(){ int start = time(NULL); #ifdef USE_MPI - int pid, end, numSeqsPerProcessor; + int pid, numSeqsPerProcessor; int tag = 2001; vector MPIPos; diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index b80a3ab..79ee042 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -406,7 +406,7 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { //flip it so you can print it int count = 0; for (int r=0; rGroups.size(); r++) { - for (int l = r+1; l < globaldata->Groups.size(); l++) { + for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count][0]; dists[l][r] = utreeScores[count][0]; count++; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index b2fd628..bee10cb 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -527,7 +527,7 @@ void UnifracWeightedCommand::createPhylipFile() { //flip it so you can print it for (int r=0; rGroups.size(); r++) { - for (int l = r+1; l < globaldata->Groups.size(); l++) { + for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count]; dists[l][r] = utreeScores[count]; count++; -- 2.39.2