From 814675616c10a40911886afb9dfe216f0245e3a2 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 14 Jun 2010 14:30:17 +0000 Subject: [PATCH] added fasta method to cluster.split --- clustersplitcommand.cpp | 10 +++++----- splitmatrix.cpp | 34 +++++++++++++++++++++++----------- splitmatrix.h | 4 ++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index f6bf105..8d21d1a 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -113,7 +113,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { fastafile = validParameter.validFile(parameters, "fasta", true); if (fastafile == "not open") { abort = true; } else if (fastafile == "not found") { fastafile = ""; } - else { splitmethod = "fasta"; } + else { distfile = fastafile; splitmethod = "fasta"; } taxFile = validParameter.validFile(parameters, "taxonomy", true); if (taxFile == "not open") { abort = true; } @@ -271,10 +271,10 @@ int ClusterSplitCommand::execute(){ //split matrix into non-overlapping groups SplitMatrix* split; - if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); } - else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); } - else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors); } - else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; } + if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); } + else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); } + else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir); } + else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; } split->split(); diff --git a/splitmatrix.cpp b/splitmatrix.cpp index 9e53c51..6e40e97 100644 --- a/splitmatrix.cpp +++ b/splitmatrix.cpp @@ -24,7 +24,7 @@ SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, stri } /***********************************************************************/ -SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p){ +SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string t, int p, string output){ m = MothurOut::getInstance(); fastafile = ffile; namefile = name; @@ -32,6 +32,7 @@ SplitMatrix::SplitMatrix(string ffile, string name, string tax, float c, string cutoff = c; method = t; processors = p; + outputDir = output; } /***********************************************************************/ @@ -163,7 +164,7 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG query.printSequence(outFile); outFile.close(); - copyGroups.erase(it); + copyGroups.erase(query.getName()); } } } @@ -180,7 +181,7 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG //process each distance file for (int i = 0; i < numGroups; i++) { - string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors); + string options = "fasta=" + (fastafile + "." + toString(i) + ".temp") + ", processors=" + toString(processors) + ", cutoff=" + toString(cutoff); Command* command = new DistanceCommand(options); command->execute(); @@ -219,30 +220,41 @@ int SplitMatrix::createDistanceFilesFromTax(map& seqGroup, int numG } bigNameFile.close(); - remainingNames.close(); - if (!wroteExtra) { - remove(singleton.c_str()); - singleton = "none"; - } - for(int i=0;i temp; temp[tempDistFile] = tempNameFile; dists.push_back(temp); + }else { + ifstream in; + openInputFile(tempNameFile, in); + + while(!in.eof()) { + in >> name >> nameList; gobble(in); + wroteExtra = true; + remainingNames << name << '\t' << nameList << endl; + } + in.close(); + remove(tempNameFile.c_str()); } } fileHandle.close(); } + remainingNames.close(); + if (!wroteExtra) { + remove(singleton.c_str()); + singleton = "none"; + } + if (m->control_pressed) { for (int i = 0; i < dists.size(); i++) { remove((dists[i].begin()->first).c_str()); remove((dists[i].begin()->second).c_str()); } dists.clear(); } return 0; diff --git a/splitmatrix.h b/splitmatrix.h index b98bdfb..acaa8c5 100644 --- a/splitmatrix.h +++ b/splitmatrix.h @@ -20,7 +20,7 @@ class SplitMatrix { public: SplitMatrix(string, string, string, float, string, bool); //column formatted distance file, namesfile, cutoff, method, large - SplitMatrix(string, string, string, float, string, int); //fastafile, namefile, taxFile, cutoff, method, processors + SplitMatrix(string, string, string, float, string, int, string); //fastafile, namefile, taxFile, cutoff, method, processors, outputDir ~SplitMatrix(); int split(); @@ -30,7 +30,7 @@ class SplitMatrix { private: MothurOut* m; - string distFile, namefile, singleton, method, taxFile, fastafile; + string distFile, namefile, singleton, method, taxFile, fastafile, outputDir; vector< map< string, string> > dists; float cutoff; bool large; -- 2.39.2