From 1f0e54b53b714781f3f2fee7d01177fade98a625 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 29 Jan 2013 11:22:12 -0500 Subject: [PATCH] added get.dists and remove.dists commands. fixed bug in trim.seqs windows paralellization that caused blank groups in group file. added list and shared file to get.otulabels and remove.otulabels. added list file to list.otulabels. --- Mothur.xcodeproj/project.pbxproj | 12 + catchallcommand.cpp | 10 +- commandfactory.cpp | 12 +- getdistscommand.cpp | 442 ++++++++++++++++++++++++++++++ getdistscommand.h | 48 ++++ getotulabelscommand.cpp | 348 +++++++++++++++++++++++- getotulabelscommand.h | 11 +- listotulabelscommand.cpp | 143 +++++++++- listotulabelscommand.h | 4 +- removedistscommand.cpp | 450 +++++++++++++++++++++++++++++++ removedistscommand.h | 48 ++++ removeotulabelscommand.cpp | 348 +++++++++++++++++++++++- removeotulabelscommand.h | 11 +- trimseqscommand.h | 2 +- 14 files changed, 1861 insertions(+), 28 deletions(-) create mode 100644 getdistscommand.cpp create mode 100644 getdistscommand.h create mode 100644 removedistscommand.cpp create mode 100644 removedistscommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 6fed2b9..15a5cff 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -17,6 +17,7 @@ A70056E6156A93D000924A2D /* getotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056E5156A93D000924A2D /* getotulabelscommand.cpp */; }; A70056EB156AB6E500924A2D /* removeotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */; }; A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; }; + A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7128B1C16B7002600723BE4 /* getdistscommand.cpp */; }; A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; }; A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; }; @@ -64,6 +65,7 @@ A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; }; A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; }; A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; + A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0231416B8244B006BA09E /* removedistscommand.cpp */; }; A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; }; A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; }; A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; @@ -402,6 +404,8 @@ A70056E9156AB6D400924A2D /* removeotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotulabelscommand.h; sourceTree = ""; }; A70056EA156AB6E500924A2D /* removeotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotulabelscommand.cpp; sourceTree = ""; }; A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = ""; }; + A7128B1A16B7001200723BE4 /* getdistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getdistscommand.h; sourceTree = ""; }; + A7128B1C16B7002600723BE4 /* getdistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getdistscommand.cpp; sourceTree = ""; }; A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = ""; }; A713EBAB12DC7613000092AC /* readphylipvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readphylipvector.cpp; sourceTree = ""; }; A713EBEB12DC7C5E000092AC /* nmdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nmdscommand.h; sourceTree = ""; }; @@ -499,6 +503,8 @@ A7A61F2B130062E000E05B6B /* amovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = amovacommand.h; sourceTree = ""; }; A7A61F2C130062E000E05B6B /* amovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = amovacommand.cpp; sourceTree = ""; }; A7AACFBA132FE008003D6C4D /* currentfile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = currentfile.h; sourceTree = ""; }; + A7B0231416B8244B006BA09E /* removedistscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removedistscommand.cpp; sourceTree = ""; }; + A7B0231716B8245D006BA09E /* removedistscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removedistscommand.h; sourceTree = ""; }; A7BF221214587886000AD524 /* myPerseus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myPerseus.cpp; sourceTree = ""; }; A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = ""; }; A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = ""; }; @@ -1358,6 +1364,8 @@ 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */, A7FE7C3E1330EA1000F7B327 /* getcurrentcommand.h */, A7FE7C3F1330EA1000F7B327 /* getcurrentcommand.cpp */, + A7128B1A16B7001200723BE4 /* getdistscommand.h */, + A7128B1C16B7002600723BE4 /* getdistscommand.cpp */, A7E9B6F312D37EC400DA6239 /* getgroupcommand.h */, A7E9B6F212D37EC400DA6239 /* getgroupcommand.cpp */, A7E9B6F512D37EC400DA6239 /* getgroupscommand.h */, @@ -1466,6 +1474,8 @@ A7E9B7AB12D37EC400DA6239 /* rarefactcommand.cpp */, A7E9B7AF12D37EC400DA6239 /* rarefactsharedcommand.h */, A7E9B7AE12D37EC400DA6239 /* rarefactsharedcommand.cpp */, + A7B0231716B8245D006BA09E /* removedistscommand.h */, + A7B0231416B8244B006BA09E /* removedistscommand.cpp */, A7E9B7C412D37EC400DA6239 /* removegroupscommand.h */, A7E9B7C312D37EC400DA6239 /* removegroupscommand.cpp */, A7E9B7C612D37EC400DA6239 /* removelineagecommand.h */, @@ -2316,6 +2326,8 @@ A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */, A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */, A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */, + A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */, + A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/catchallcommand.cpp b/catchallcommand.cpp index 584f798..a914f73 100644 --- a/catchallcommand.cpp +++ b/catchallcommand.cpp @@ -80,6 +80,7 @@ CatchAllCommand::CatchAllCommand(){ outputTypes["models"] = tempOutNames; outputTypes["bubble"] = tempOutNames; outputTypes["summary"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "CatchAllCommand", "CatchAllCommand"); @@ -118,6 +119,7 @@ CatchAllCommand::CatchAllCommand(string option) { outputTypes["models"] = tempOutNames; outputTypes["bubble"] = tempOutNames; outputTypes["summary"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter @@ -237,7 +239,7 @@ int CatchAllCommand::execute() { catchAllTest = m->getFullPathName(catchAllTest); #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - catchAllCommandExe += "mono " + catchAllTest + " "; + catchAllCommandExe += "mono \"" + catchAllTest + "\" "; #else catchAllCommandExe += "\"" + catchAllTest + "\" "; #endif @@ -291,7 +293,7 @@ int CatchAllCommand::execute() { //create system command string catchAllCommand = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1"; + catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + + "\" 1"; #else //removes extra '\\' catchall doesnt like that vector tempNames; @@ -354,7 +356,7 @@ int CatchAllCommand::execute() { //create system command string catchAllCommand = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1"; + catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + + "\" 1"; #else //removes extra '\\' catchall doesnt like that vector tempNames; @@ -439,7 +441,7 @@ int CatchAllCommand::execute() { //create system command string catchAllCommand = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - catchAllCommand += catchAllCommandExe + filename + " " + outputPath + " 1"; + catchAllCommand += catchAllCommandExe + "\"" + filename + "\" \"" + outputPath + + "\" 1"; #else //removes extra '\\' catchall doesnt like that vector tempNames; diff --git a/commandfactory.cpp b/commandfactory.cpp index ce51d72..42e99d1 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -138,6 +138,8 @@ #include "classifysharedcommand.h" #include "filtersharedcommand.h" #include "primerdesigncommand.h" +#include "getdistscommand.h" +#include "removedistscommand.h" /*******************************************************/ @@ -298,7 +300,9 @@ CommandFactory::CommandFactory(){ commands["quit"] = "MPIEnabled"; commands["classify.shared"] = "classify.shared"; commands["filter.shared"] = "filter.shared"; - commands["primer.design"] = "primer.design"; + commands["primer.design"] = "primer.design"; + commands["get.dists"] = "get.dists"; + commands["remove.dists"] = "remove.dists"; } @@ -516,6 +520,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "classify.shared") { command = new ClassifySharedCommand(optionString); } else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); } + else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); } + else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -674,6 +680,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "classify.shared") { pipecommand = new ClassifySharedCommand(optionString); } else if(commandName == "filter.shared") { pipecommand = new FilterSharedCommand(optionString); } else if(commandName == "primer.design") { pipecommand = new PrimerDesignCommand(optionString); } + else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); } + else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -818,6 +826,8 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "classify.shared") { shellcommand = new ClassifySharedCommand(); } else if(commandName == "filter.shared") { shellcommand = new FilterSharedCommand(); } else if(commandName == "primer.design") { shellcommand = new PrimerDesignCommand(); } + else if(commandName == "get.dists") { shellcommand = new GetDistsCommand(); } + else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/getdistscommand.cpp b/getdistscommand.cpp new file mode 100644 index 0000000..77cfba4 --- /dev/null +++ b/getdistscommand.cpp @@ -0,0 +1,442 @@ +// +// getdistscommand.cpp +// Mothur +// +// Created by Sarah Westcott on 1/28/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "getdistscommand.h" + +//********************************************************************************************************************** +vector GetDistsCommand::setParameters(){ + try { + CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); + CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); + CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string GetDistsCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The get.dists command selects distances from a phylip or column file related to groups or sequences listed in an accnos file.\n"; + helpString += "The get.dists command parameters are accnos, phylip and column.\n"; + helpString += "The get.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n"; + helpString += "Example get.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n"; + helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string GetDistsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "phylip") { pattern = "[filename],pick,[extension]"; } + else if (type == "column") { pattern = "[filename],pick,[extension]"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +GetDistsCommand::GetDistsCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "GetDistsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +GetDistsCommand::GetDistsCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["column"] = tempOutNames; + outputTypes["phylip"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("column"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["column"] = inputDir + it->second; } + } + + it = parameters.find("accnos"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["accnos"] = inputDir + it->second; } + } + } + + + //check for required parameters + accnosfile = validParameter.validFile(parameters, "accnos", true); + if (accnosfile == "not open") { abort = true; } + else if (accnosfile == "not found") { + accnosfile = m->getAccnosFile(); + if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); + abort = true; + } + }else { m->setAccnosFile(accnosfile); } + + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { phylipfile = ""; abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { m->setPhylipFile(phylipfile); } + + columnfile = validParameter.validFile(parameters, "column", true); + if (columnfile == "not open") { columnfile = ""; abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { m->setColumnFile(columnfile); } + + if ((phylipfile == "") && (columnfile == "")) { + //is there are current file available for either of these? + //give priority to column, then phylip + columnfile = m->getColumnFile(); + if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); } + else { + phylipfile = m->getPhylipFile(); + if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); + abort = true; + } + } + } + } + + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "GetDistsCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int GetDistsCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //get names you want to keep + names = m->readAccnos(accnosfile); + + if (m->control_pressed) { return 0; } + + //read through the correct file and output lines you want to keep + if (phylipfile != "") { readPhylip(); } + if (columnfile != "") { readColumn(); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + + if (outputNames.size() != 0) { + m->mothurOutEndLine(); + m->mothurOut("Output File names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("phylip"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); } + } + + itTypes = outputTypes.find("column"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); } + } + } + + return 0; + } + + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** +int GetDistsCommand::readPhylip(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(phylipfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile)); + variables["[extension]"] = m->getExtension(phylipfile); + string outputFileName = getOutputFileName("phylip", variables); + + ifstream in; + m->openInputFile(phylipfile, in); + + float distance; + int square, nseqs; + string name; + unsigned int row; + set rows; //converts names in names to a index + row = 0; + + string numTest; + in >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + if (names.count(name) != 0) { rows.insert(row); } + row++; + + //is the matrix square? + char d; + while((d=in.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + in.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + //map name to row/column + if(square == 0){ + for(int i=1;i> name; + if (names.count(name) != 0) { rows.insert(row); } + row++; + + for(int j=0;jcontrol_pressed) { in.close(); return 0; } + in >> distance; + } + } + } + else{ + for(int i=1;i> name; + if (names.count(name) != 0) { rows.insert(row); } + row++; + for(int j=0;jcontrol_pressed) { in.close(); return 0; } + in >> distance; + } + } + } + in.close(); + + if (m->control_pressed) { return 0; } + + //read through file only printing rows and columns of seqs in names + ifstream inPhylip; + m->openInputFile(phylipfile, inPhylip); + + inPhylip >> numTest; + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); + out << names.size() << endl; + + unsigned int count = 0; + if(square == 0){ + for(int i=0;i> name; + bool ignoreRow = false; + + if (names.count(name) == 0) { ignoreRow = true; } + else{ out << name << '\t'; count++; } + + for(int j=0;jcontrol_pressed) { inPhylip.close(); out.close(); return 0; } + inPhylip >> distance; + if (!ignoreRow) { + //is this a column we want + if(rows.count(j) != 0) { out << distance << '\t'; } + } + } + if (!ignoreRow) { out << endl; } + } + } + else{ + for(int i=0;i> name; + + bool ignoreRow = false; + + if (names.count(name) == 0) { ignoreRow = true; } + else{ out << name << '\t'; count++; } + + for(int j=0;jcontrol_pressed) { inPhylip.close(); out.close(); return 0; } + inPhylip >> distance; + if (!ignoreRow) { + //is this a column we want + if(rows.count(j) != 0) { out << distance << '\t'; } + } + } + if (!ignoreRow) { out << endl; } + } + } + inPhylip.close(); + out.close(); + + if (count == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); } + else if (count != names.size()) { + m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine(); + //rewrite with new number + m->renameFile(outputFileName, outputFileName+".temp"); + ofstream out2; + m->openOutputFile(outputFileName, out2); + out2 << count << endl; + + ifstream in3; + m->openInputFile(outputFileName+".temp", in3); + in3 >> nseqs; m->gobble(in3); + char buffer[4096]; + while (!in3.eof()) { + in3.read(buffer, 4096); + out2.write(buffer, in3.gcount()); + } + in3.close(); + out2.close(); + m->mothurRemove(outputFileName+".temp"); + } + + m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "readPhylip"); + exit(1); + } +} +//********************************************************************************************************************** +int GetDistsCommand::readColumn(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(columnfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile)); + variables["[extension]"] = m->getExtension(columnfile); + string outputFileName = getOutputFileName("column", variables); + outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + + ifstream in; + m->openInputFile(columnfile, in); + + set foundNames; + string firstName, secondName; + float distance; + while (!in.eof()) { + + if (m->control_pressed) { out.close(); in.close(); return 0; } + + in >> firstName >> secondName >> distance; m->gobble(in); + + //are both names in the accnos file + if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) { + out << firstName << '\t' << secondName << '\t' << distance << endl; + foundNames.insert(firstName); + foundNames.insert(secondName); + } + } + in.close(); + out.close(); + + if (foundNames.size() == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); } + else if (foundNames.size() != names.size()) { + m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file."); m->mothurOutEndLine(); + } + + m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetDistsCommand", "readColumn"); + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/getdistscommand.h b/getdistscommand.h new file mode 100644 index 0000000..8767067 --- /dev/null +++ b/getdistscommand.h @@ -0,0 +1,48 @@ +// +// getdistscommand.h +// Mothur +// +// Created by Sarah Westcott on 1/28/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_getdistscommand_h +#define Mothur_getdistscommand_h + +#include "command.hpp" + +class GetDistsCommand : public Command { + +public: + + GetDistsCommand(string); + GetDistsCommand(); + ~GetDistsCommand(){} + + vector setParameters(); + string getCommandName() { return "get.dists"; } + string getCommandCategory() { return "General"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Get.dists"; } + string getDescription() { return "gets distances from a phylip or column file related to groups or sequences listed in an accnos file"; } + + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + set names; + string accnosfile, phylipfile, columnfile, outputDir; + bool abort; + vector outputNames; + + int readPhylip(); + int readColumn(); + +}; + + +#endif diff --git a/getotulabelscommand.cpp b/getotulabelscommand.cpp index 0f042c0..b6253b0 100644 --- a/getotulabelscommand.cpp +++ b/getotulabelscommand.cpp @@ -13,8 +13,11 @@ vector GetOtuLabelsCommand::setParameters(){ try { CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(paccnos); CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy); + CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false, true); parameters.push_back(potucorr); CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false, true); parameters.push_back(pcorraxes); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -31,11 +34,12 @@ vector GetOtuLabelsCommand::setParameters(){ string GetOtuLabelsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; - helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n"; + helpString += "The get.otulabels command can be used to select specific otus with the output from classify.otu, otu.association, or corr.axes commands. It can also be used to select a set of otus from a shared or list file.\n"; + helpString += "The get.otulabels parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n"; helpString += "The constaxonomy parameter is used to input the results of the classify.otu command.\n"; helpString += "The otucorr parameter is used to input the results of the otu.association command.\n"; helpString += "The corraxes parameter is used to input the results of the corr.axes command.\n"; + helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The get.otulabels commmand should be in the following format: \n"; helpString += "get.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n"; return helpString; @@ -50,9 +54,11 @@ string GetOtuLabelsCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } - else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } + if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } + else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } + else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -72,6 +78,8 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(){ outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "GetOtuLabelsCommand", "GetOtuLabelsCommand"); @@ -141,12 +149,30 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["otucorr"] = inputDir + it->second; } } + + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } } vector tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; //check for parameters accnosfile = validParameter.validFile(parameters, "accnos", true); @@ -171,12 +197,26 @@ GetOtuLabelsCommand::GetOtuLabelsCommand(string option) { otucorrfile = validParameter.validFile(parameters, "otucorr", true); if (otucorrfile == "not open") { otucorrfile = ""; abort = true; } else if (otucorrfile == "not found") { otucorrfile = ""; } - + + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { listfile = ""; abort = true; } + else if (listfile == "not found") { listfile = ""; } + else { m->setListFile(listfile); } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { m->setSharedFile(sharedfile); } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; } + if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list."); m->mothurOutEndLine(); abort = true; } + + if ((sharedfile != "") || (listfile != "")) { + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } + } } } @@ -201,6 +241,8 @@ int GetOtuLabelsCommand::execute(){ if (constaxonomyfile != "") { readClassifyOtu(); } if (corraxesfile != "") { readCorrAxes(); } if (otucorrfile != "") { readOtuAssociation(); } + if (listfile != "") { readList(); } + if (sharedfile != "") { readShared(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -210,6 +252,17 @@ int GetOtuLabelsCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + string current = ""; + itTypes = outputTypes.find("list"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } + } + + itTypes = outputTypes.find("shared"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); } + } + return 0; } catch(exception& e) { @@ -383,3 +436,286 @@ int GetOtuLabelsCommand::readCorrAxes(){ } } //********************************************************************************************************************** +int GetOtuLabelsCommand::readShared(){ + try { + + getShared(); + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + vector newLabels; + + //create new "filtered" lookup + vector newLookup; + for (int i = 0; i < lookup.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(lookup[i]->getLabel()); + temp->setGroup(lookup[i]->getGroup()); + newLookup.push_back(temp); + } + + bool wroteSomething = false; + int numSelected = 0; + for (int i = 0; i < lookup[0]->getNumBins(); i++) { + + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } + + //is this otu on the list + if (labels.count(m->currentBinLabels[i]) != 0) { + numSelected++; wroteSomething = true; + newLabels.push_back(m->currentBinLabels[i]); + for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup + newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); + } + } + } + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[extension]"] = m->getExtension(sharedfile); + variables["[distance]"] = lookup[0]->getLabel(); + string outputFileName = getOutputFileName("shared", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); + + for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } + + m->currentBinLabels = newLabels; + + newLookup[0]->printHeaders(out); + + for (int i = 0; i < newLookup.size(); i++) { + out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t'; + newLookup[i]->print(out); + } + out.close(); + + for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } + + if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file."); m->mothurOutEndLine(); } + + m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetOtuLabelsCommand", "readShared"); + exit(1); + } +} +//********************************************************************************************************************** +int GetOtuLabelsCommand::readList(){ + try { + getListVector(); + + if (m->control_pressed) { delete list; return 0;} + + ListVector newList; + newList.setLabel(list->getLabel()); + int selectedCount = 0; + bool wroteSomething = false; + string snumBins = toString(list->getNumBins()); + + for (int i = 0; i < list->getNumBins(); i++) { + + if (m->control_pressed) { delete list; return 0;} + + //create a label for this otu + string otuLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { otuLabel += "0"; } + } + otuLabel += sbinNumber; + + if (labels.count(otuLabel) != 0) { + selectedCount++; + newList.push_back(list->get(i)); + } + } + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); + variables["[extension]"] = m->getExtension(listfile); + variables["[distance]"] = list->getLabel(); + string outputFileName = getOutputFileName("list", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + + delete list; + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); + } + out.close(); + + if (wroteSomething == false) { m->mothurOut("Your file does not contain any OTUs from the .accnos file."); m->mothurOutEndLine(); } + outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); + + m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetOtuLabelsCommand", "readList"); + exit(1); + } + } +//********************************************************************************************************************** +int GetOtuLabelsCommand::getListVector(){ + try { + InputData input(listfile, "list"); + list = input.getListVector(); + string lastLabel = list->getLabel(); + + if (label == "") { label = lastLabel; return 0; } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set labels; labels.insert(label); + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((list != NULL) && (userLabels.size() != 0)) { + if (m->control_pressed) { return 0; } + + if(labels.count(list->getLabel()) == 1){ + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + break; + } + + lastLabel = list->getLabel(); + + //get next line to process + //prevent memory leak + delete list; + list = input.getListVector(); + } + + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + delete list; + list = input.getListVector(lastLabel); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetOtuLabelsCommand", "getListVector"); + exit(1); + } +} +//********************************************************************************************************************** +int GetOtuLabelsCommand::getShared(){ + try { + InputData input(sharedfile, "sharedfile"); + lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + if (label == "") { label = lastLabel; return 0; } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set labels; labels.insert(label); + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && (userLabels.size() != 0)) { + if (m->control_pressed) { return 0; } + + if(labels.count(lookup[0]->getLabel()) == 1){ + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + break; + } + + lastLabel = lookup[0]->getLabel(); + + //get next line to process + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(); + } + + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "GetOtuLabelsCommand", "getShared"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/getotulabelscommand.h b/getotulabelscommand.h index 8ce0300..06ad271 100644 --- a/getotulabelscommand.h +++ b/getotulabelscommand.h @@ -11,6 +11,9 @@ #include "command.hpp" +#include "inputdata.h" +#include "listvector.hpp" +#include "sharedrabundvector.h" /**************************************************************************************************/ @@ -34,13 +37,19 @@ public: private: bool abort; - string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile; + string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label; vector outputNames; set labels; + ListVector* list; + vector lookup; int readClassifyOtu(); int readOtuAssociation(); int readCorrAxes(); + int readList(); + int readShared(); + int getListVector(); + int getShared(); }; /**************************************************************************************************/ diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp index e50196b..037c822 100644 --- a/listotulabelscommand.cpp +++ b/listotulabelscommand.cpp @@ -14,6 +14,7 @@ vector ListOtuLabelsCommand::setParameters(){ try { CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false,true); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false); parameters.push_back(prelabund); + CommandParameter plist("list", "InputTypes", "", "", "SharedRel", "SharedRel", "none","otulabels",false,false); parameters.push_back(plist); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. @@ -33,7 +34,7 @@ vector ListOtuLabelsCommand::setParameters(){ string ListOtuLabelsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The list.otulabels lists otu labels from shared or relabund file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; + helpString += "The list.otulabels lists otu labels from shared, relabund or list file. The results can be used by the get.otulabels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; helpString += "The list.otulabels parameters are: shared, relabund, label and groups.\n"; helpString += "The label parameter is used to analyze specific labels in your input.\n"; helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; @@ -122,6 +123,14 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["shared"] = inputDir + it->second; } } + + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } } vector tempOutNames; @@ -138,7 +147,13 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option) { else if (relabundfile == "not found") { relabundfile = ""; } else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); } - if ((relabundfile == "") && (sharedfile == "")) { + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { abort = true; } + else if (listfile == "not found") { listfile = ""; } + else { inputFileName = listfile; format = "list"; m->setListFile(listfile); } + + + if ((relabundfile == "") && (sharedfile == "") && (listfile== "")) { //is there are current file available for either of these? //give priority to shared, then relabund sharedfile = m->getSharedFile(); @@ -147,8 +162,12 @@ ListOtuLabelsCommand::ListOtuLabelsCommand(string option) { relabundfile = m->getRelAbundFile(); if (relabundfile != "") { inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); - abort = true; + listfile = m->getListFile(); + if (listfile != "") { inputFileName = listfile; format="list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a shared, list or relabund."); m->mothurOutEndLine(); + abort = true; + } } } } @@ -261,7 +280,7 @@ int ListOtuLabelsCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - }else { + }else if (format == "sharedfile") { vector lookup = input.getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); @@ -337,6 +356,81 @@ int ListOtuLabelsCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + }else { + ListVector* list = input.getListVector(); + string lastLabel = list->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + if(allLines == 1 || labels.count(list->getLabel()) == 1){ + + m->mothurOut(list->getLabel()); m->mothurOutEndLine(); + + createList(list); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + m->mothurOut(list->getLabel()); m->mothurOutEndLine(); + + createList(list); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + } + + lastLabel = list->getLabel(); + //prevent memory leak + delete list; list = NULL; + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //get next line to process + list = input.getListVector(); + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + delete list; + list = input.getListVector(lastLabel); + + m->mothurOut(list->getLabel()); m->mothurOutEndLine(); + + createList(list); + + delete list; + } } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -374,7 +468,7 @@ int ListOtuLabelsCommand::createList(vector& lookup){ return 0; } catch(exception& e) { - m->errorOut(e, "ListOtuLabelsCommand", "createTable"); + m->errorOut(e, "ListOtuLabelsCommand", "createList"); exit(1); } } @@ -398,7 +492,42 @@ int ListOtuLabelsCommand::createList(vector& lookup){ return 0; } catch(exception& e) { - m->errorOut(e, "ListOtuLabelsCommand", "createTable"); + m->errorOut(e, "ListOtuLabelsCommand", "createList"); + exit(1); + } +} +//********************************************************************************************************************** +int ListOtuLabelsCommand::createList(ListVector*& list){ + try { + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFileName)); + variables["[distance]"] = list->getLabel(); + string outputFileName = getOutputFileName("otulabels",variables); + outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); + ofstream out; + m->openOutputFile(outputFileName, out); + + string snumBins = toString(list->getNumBins()); + for (int i = 0; i < list->getNumBins(); i++) { + if (m->control_pressed) { break; } + + string otuLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { otuLabel += "0"; } + } + otuLabel += sbinNumber; + + out << otuLabel << endl; + } + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "createList"); exit(1); } } diff --git a/listotulabelscommand.h b/listotulabelscommand.h index b1150a8..d380914 100644 --- a/listotulabelscommand.h +++ b/listotulabelscommand.h @@ -12,6 +12,7 @@ #include "command.hpp" #include "sharedrabundvector.h" +#include "listvector.hpp" /**************************************************************************************************/ @@ -36,13 +37,14 @@ public: private: bool abort, allLines; - string outputDir, sharedfile, relabundfile, label, inputFileName, format; + string outputDir, sharedfile, relabundfile, label, inputFileName, format, listfile; vector outputNames; vector Groups; set labels; int createList(vector&); int createList(vector&); + int createList(ListVector*&); }; diff --git a/removedistscommand.cpp b/removedistscommand.cpp new file mode 100644 index 0000000..2bb8046 --- /dev/null +++ b/removedistscommand.cpp @@ -0,0 +1,450 @@ +// +// removedistscommand.cpp +// Mothur +// +// Created by Sarah Westcott on 1/29/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "removedistscommand.h" + +//********************************************************************************************************************** +vector RemoveDistsCommand::setParameters(){ + try { + CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip); + CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn); + CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string RemoveDistsCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The remove.dists command removes distances from a phylip or column file related to groups or sequences listed in an accnos file.\n"; + helpString += "The remove.dists command parameters are accnos, phylip and column.\n"; + helpString += "The remove.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n"; + helpString += "Example remove.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n"; + helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string RemoveDistsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "phylip") { pattern = "[filename],pick,[extension]"; } + else if (type == "column") { pattern = "[filename],pick,[extension]"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +RemoveDistsCommand::RemoveDistsCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["phylip"] = tempOutNames; + outputTypes["column"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +RemoveDistsCommand::RemoveDistsCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["column"] = tempOutNames; + outputTypes["phylip"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("column"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["column"] = inputDir + it->second; } + } + + it = parameters.find("accnos"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["accnos"] = inputDir + it->second; } + } + } + + + //check for required parameters + accnosfile = validParameter.validFile(parameters, "accnos", true); + if (accnosfile == "not open") { abort = true; } + else if (accnosfile == "not found") { + accnosfile = m->getAccnosFile(); + if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); + abort = true; + } + }else { m->setAccnosFile(accnosfile); } + + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { phylipfile = ""; abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { m->setPhylipFile(phylipfile); } + + columnfile = validParameter.validFile(parameters, "column", true); + if (columnfile == "not open") { columnfile = ""; abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { m->setColumnFile(columnfile); } + + if ((phylipfile == "") && (columnfile == "")) { + //is there are current file available for either of these? + //give priority to column, then phylip + columnfile = m->getColumnFile(); + if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); } + else { + phylipfile = m->getPhylipFile(); + if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); + abort = true; + } + } + } + } + + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int RemoveDistsCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //get names you want to keep + names = m->readAccnos(accnosfile); + + if (m->control_pressed) { return 0; } + + //read through the correct file and output lines you want to keep + if (phylipfile != "") { readPhylip(); } + if (columnfile != "") { readColumn(); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + + if (outputNames.size() != 0) { + m->mothurOutEndLine(); + m->mothurOut("Output File names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("phylip"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); } + } + + itTypes = outputTypes.find("column"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); } + } + } + + return 0; + } + + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** +int RemoveDistsCommand::readPhylip(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(phylipfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile)); + variables["[extension]"] = m->getExtension(phylipfile); + string outputFileName = getOutputFileName("phylip", variables); + + ifstream in; + m->openInputFile(phylipfile, in); + + float distance; + int square, nseqs; + string name; + unsigned int row; + set rows; //converts names in names to a index + row = 0; + + string numTest; + in >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + //not one we want to remove + if (names.count(name) == 0) { rows.insert(row); } + row++; + + //is the matrix square? + char d; + while((d=in.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + in.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + //map name to row/column + if(square == 0){ + for(int i=1;i> name; + if (names.count(name) == 0) { rows.insert(row); } + row++; + + for(int j=0;jcontrol_pressed) { in.close(); return 0; } + in >> distance; + } + } + } + else{ + for(int i=1;i> name; + if (names.count(name) == 0) { rows.insert(row); } + row++; + for(int j=0;jcontrol_pressed) { in.close(); return 0; } + in >> distance; + } + } + } + in.close(); + + if (m->control_pressed) { return 0; } + + //read through file only printing rows and columns of seqs in names + ifstream inPhylip; + m->openInputFile(phylipfile, inPhylip); + + inPhylip >> numTest; + + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName); + out << names.size() << endl; + + unsigned int count = 0; + unsigned int keptCount = 0; + if(square == 0){ + for(int i=0;i> name; + bool ignoreRow = false; + + if (names.count(name) != 0) { ignoreRow = true; count++; } + else{ out << name << '\t'; keptCount++; } + + for(int j=0;jcontrol_pressed) { inPhylip.close(); out.close(); return 0; } + inPhylip >> distance; + if (!ignoreRow) { + //is this a column we want + if(rows.count(j) != 0) { out << distance << '\t'; } + } + } + if (!ignoreRow) { out << endl; } + } + } + else{ + for(int i=0;i> name; + + bool ignoreRow = false; + + if (names.count(name) != 0) { ignoreRow = true; count++; } + else{ out << name << '\t'; keptCount++; } + + for(int j=0;jcontrol_pressed) { inPhylip.close(); out.close(); return 0; } + inPhylip >> distance; + if (!ignoreRow) { + //is this a column we want + if(rows.count(j) != 0) { out << distance << '\t'; } + } + } + if (!ignoreRow) { out << endl; } + } + } + inPhylip.close(); + out.close(); + + if (keptCount == 0) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); } + else if (count != names.size()) { + m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine(); + //rewrite with new number + m->renameFile(outputFileName, outputFileName+".temp"); + ofstream out2; + m->openOutputFile(outputFileName, out2); + out2 << keptCount << endl; + + ifstream in3; + m->openInputFile(outputFileName+".temp", in3); + in3 >> nseqs; m->gobble(in3); + char buffer[4096]; + while (!in3.eof()) { + in3.read(buffer, 4096); + out2.write(buffer, in3.gcount()); + } + in3.close(); + out2.close(); + m->mothurRemove(outputFileName+".temp"); + } + + m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "readPhylip"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveDistsCommand::readColumn(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(columnfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile)); + variables["[extension]"] = m->getExtension(columnfile); + string outputFileName = getOutputFileName("column", variables); + outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + + ifstream in; + m->openInputFile(columnfile, in); + + set removeNames; + string firstName, secondName; + float distance; + bool wrote = false; + while (!in.eof()) { + + if (m->control_pressed) { out.close(); in.close(); return 0; } + + in >> firstName >> secondName >> distance; m->gobble(in); + + //is either names in the accnos file + if (names.count(firstName) != 0) { + removeNames.insert(firstName); + if (names.count(secondName) != 0) { removeNames.insert(secondName); } } + else if (names.count(secondName) != 0) { + removeNames.insert(secondName); + if (names.count(firstName) != 0) { removeNames.insert(firstName); } } + else { + wrote = true; + out << firstName << '\t' << secondName << '\t' << distance << endl; + } + } + in.close(); + out.close(); + + if (!wrote) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); } + else if (removeNames.size() != names.size()) { + m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file."); m->mothurOutEndLine(); + } + + m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "RemoveDistsCommand", "readColumn"); + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/removedistscommand.h b/removedistscommand.h new file mode 100644 index 0000000..513a9e9 --- /dev/null +++ b/removedistscommand.h @@ -0,0 +1,48 @@ +// +// removedistscommand.h +// Mothur +// +// Created by Sarah Westcott on 1/29/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_removedistscommand_h +#define Mothur_removedistscommand_h + +#include "command.hpp" + +class RemoveDistsCommand : public Command { + +public: + + RemoveDistsCommand(string); + RemoveDistsCommand(); + ~RemoveDistsCommand(){} + + vector setParameters(); + string getCommandName() { return "remove.dists"; } + string getCommandCategory() { return "General"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Remove.dists"; } + string getDescription() { return "removes distances from a phylip or column file related to groups or sequences listed in an accnos file"; } + + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + set names; + string accnosfile, phylipfile, columnfile, outputDir; + bool abort; + vector outputNames; + + int readPhylip(); + int readColumn(); + +}; + + +#endif diff --git a/removeotulabelscommand.cpp b/removeotulabelscommand.cpp index 5359db8..148758a 100644 --- a/removeotulabelscommand.cpp +++ b/removeotulabelscommand.cpp @@ -15,6 +15,9 @@ vector RemoveOtuLabelsCommand::setParameters(){ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "FNGLT", "none","constaxonomy",false,false); parameters.push_back(pconstaxonomy); CommandParameter potucorr("otucorr", "InputTypes", "", "", "none", "FNGLT", "none","otucorr",false,false); parameters.push_back(potucorr); CommandParameter pcorraxes("corraxes", "InputTypes", "", "", "none", "FNGLT", "none","corraxes",false,false); parameters.push_back(pcorraxes); + CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -31,11 +34,12 @@ vector RemoveOtuLabelsCommand::setParameters(){ string RemoveOtuLabelsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; - helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, and accnos.\n"; + helpString += "The remove.otulabels command can be used to remove specific otus with the output from classify.otu, otu.association, or corr.axes. It can also be used to select a set of otus from a shared or list file.\n"; + helpString += "The remove.otulabels parameters are: constaxonomy, otucorr, corraxes, shared, list, label and accnos.\n"; helpString += "The constaxonomy parameter is input the results of the classify.otu command.\n"; helpString += "The otucorr parameter is input the results of the otu.association command.\n"; helpString += "The corraxes parameter is input the results of the corr.axes command.\n"; + helpString += "The label parameter is used to analyze specific labels in your input. \n"; helpString += "The remove.otulabels commmand should be in the following format: \n"; helpString += "remove.otulabels(accnos=yourListOfOTULabels, corraxes=yourCorrAxesFile)\n"; return helpString; @@ -50,9 +54,11 @@ string RemoveOtuLabelsCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } - else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } - else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; } + if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; } + else if (type == "otucorr") { pattern = "[filename],pick,[extension]"; } + else if (type == "corraxes") { pattern = "[filename],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; } + else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -71,6 +77,8 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(){ outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "RemoveOtuLabelsCommand", "RemoveOtuLabelsCommand"); @@ -140,12 +148,31 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["otucorr"] = inputDir + it->second; } } + + it = parameters.find("list"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["list"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } } vector tempOutNames; outputTypes["constaxonomy"] = tempOutNames; outputTypes["otucorr"] = tempOutNames; outputTypes["corraxes"] = tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; + //check for parameters accnosfile = validParameter.validFile(parameters, "accnos", true); @@ -171,11 +198,25 @@ RemoveOtuLabelsCommand::RemoveOtuLabelsCommand(string option) { if (otucorrfile == "not open") { otucorrfile = ""; abort = true; } else if (otucorrfile == "not found") { otucorrfile = ""; } + listfile = validParameter.validFile(parameters, "list", true); + if (listfile == "not open") { listfile = ""; abort = true; } + else if (listfile == "not found") { listfile = ""; } + else { m->setListFile(listfile); } + + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { m->setSharedFile(sharedfile); } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } - if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes or otucorr."); m->mothurOutEndLine(); abort = true; } + if ((constaxonomyfile == "") && (corraxesfile == "") && (otucorrfile == "") && (sharedfile == "") && (listfile == "")) { m->mothurOut("You must provide one of the following: constaxonomy, corraxes, otucorr, shared or list."); m->mothurOutEndLine(); abort = true; } + + if ((sharedfile != "") || (listfile != "")) { + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } + } } } @@ -200,6 +241,8 @@ int RemoveOtuLabelsCommand::execute(){ if (constaxonomyfile != "") { readClassifyOtu(); } if (corraxesfile != "") { readCorrAxes(); } if (otucorrfile != "") { readOtuAssociation(); } + if (listfile != "") { readList(); } + if (sharedfile != "") { readShared(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -209,6 +252,17 @@ int RemoveOtuLabelsCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + string current = ""; + itTypes = outputTypes.find("list"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } + } + + itTypes = outputTypes.find("shared"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); } + } + return 0; } catch(exception& e) { @@ -375,6 +429,288 @@ int RemoveOtuLabelsCommand::readCorrAxes(){ } } //********************************************************************************************************************** +int RemoveOtuLabelsCommand::readShared(){ + try { + + getShared(); + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + vector newLabels; + + //create new "filtered" lookup + vector newLookup; + for (int i = 0; i < lookup.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(lookup[i]->getLabel()); + temp->setGroup(lookup[i]->getGroup()); + newLookup.push_back(temp); + } + + bool wroteSomething = false; + int numRemoved = 0; + for (int i = 0; i < lookup[0]->getNumBins(); i++) { + + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; } + + //is this otu on the list + if (labels.count(m->currentBinLabels[i]) == 0) { + wroteSomething = true; + newLabels.push_back(m->currentBinLabels[i]); + for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup + newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup()); + } + }else { numRemoved++; } + } + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[extension]"] = m->getExtension(sharedfile); + variables["[distance]"] = lookup[0]->getLabel(); + string outputFileName = getOutputFileName("shared", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName); + + for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } + + m->currentBinLabels = newLabels; + + newLookup[0]->printHeaders(out); + + for (int i = 0; i < newLookup.size(); i++) { + out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t'; + newLookup[i]->print(out); + } + out.close(); + + for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } + + if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file."); m->mothurOutEndLine(); } + + m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveOtuLabelsCommand", "readShared"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveOtuLabelsCommand::readList(){ + try { + getListVector(); + + if (m->control_pressed) { delete list; return 0;} + + ListVector newList; + newList.setLabel(list->getLabel()); + int removedCount = 0; + bool wroteSomething = false; + string snumBins = toString(list->getNumBins()); + + for (int i = 0; i < list->getNumBins(); i++) { + + if (m->control_pressed) { delete list; return 0;} + + //create a label for this otu + string otuLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { otuLabel += "0"; } + } + otuLabel += sbinNumber; + + if (labels.count(otuLabel) == 0) { + newList.push_back(list->get(i)); + }else { removedCount++; } + } + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); + variables["[extension]"] = m->getExtension(listfile); + variables["[distance]"] = list->getLabel(); + string outputFileName = getOutputFileName("list", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + + delete list; + //print new listvector + if (newList.getNumBins() != 0) { + wroteSomething = true; + newList.print(out); + } + out.close(); + + if (wroteSomething == false) { m->mothurOut("Your file contains only OTUs from the .accnos file."); m->mothurOutEndLine(); } + outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); + + m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file."); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveOtuLabelsCommand", "readList"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveOtuLabelsCommand::getListVector(){ + try { + InputData input(listfile, "list"); + list = input.getListVector(); + string lastLabel = list->getLabel(); + + if (label == "") { label = lastLabel; return 0; } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set labels; labels.insert(label); + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((list != NULL) && (userLabels.size() != 0)) { + if (m->control_pressed) { return 0; } + + if(labels.count(list->getLabel()) == 1){ + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = list->getLabel(); + + delete list; + list = input.getListVector(lastLabel); + + processedLabels.insert(list->getLabel()); + userLabels.erase(list->getLabel()); + + //restore real lastlabel to save below + list->setLabel(saveLabel); + break; + } + + lastLabel = list->getLabel(); + + //get next line to process + //prevent memory leak + delete list; + list = input.getListVector(); + } + + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + delete list; + list = input.getListVector(lastLabel); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveOtuLabelsCommand", "getListVector"); + exit(1); + } +} +//********************************************************************************************************************** +int RemoveOtuLabelsCommand::getShared(){ + try { + InputData input(sharedfile, "sharedfile"); + lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + if (label == "") { label = lastLabel; return 0; } + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set labels; labels.insert(label); + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && (userLabels.size() != 0)) { + if (m->control_pressed) { return 0; } + + if(labels.count(lookup[0]->getLabel()) == 1){ + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + break; + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + break; + } + + lastLabel = lookup[0]->getLabel(); + + //get next line to process + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(); + } + + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveOtuLabelsCommand", "getShared"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/removeotulabelscommand.h b/removeotulabelscommand.h index 03d2da3..fd03d10 100644 --- a/removeotulabelscommand.h +++ b/removeotulabelscommand.h @@ -11,6 +11,9 @@ // #include "command.hpp" +#include "inputdata.h" +#include "listvector.hpp" +#include "sharedrabundvector.h" /**************************************************************************************************/ @@ -34,13 +37,19 @@ public: private: bool abort; - string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile; + string outputDir, accnosfile, constaxonomyfile, otucorrfile, corraxesfile, listfile, sharedfile, label; vector outputNames; set labels; + ListVector* list; + vector lookup; int readClassifyOtu(); int readOtuAssociation(); int readCorrAxes(); + int readList(); + int readShared(); + int getListVector(); + int getShared(); }; /**************************************************************************************************/ diff --git a/trimseqscommand.h b/trimseqscommand.h index 2ab7de3..80e1ebe 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -399,7 +399,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ string thisGroup = ""; if (pDataArray->createGroup) { if(pDataArray->barcodes.size() != 0){ - string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; + thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; if (pDataArray->primers.size() != 0) { if (pDataArray->primerNameVector[primerIndex] != "") { if(thisGroup != "") { -- 2.39.2