From: Sarah Westcott Date: Wed, 29 May 2013 12:11:22 +0000 (-0400) Subject: added rename.seqs command. X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;ds=sidebyside;h=ef4c1b4dfeecc17a0ad5c1c114c748a0443a88ac;p=mothur.git added rename.seqs command. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index e1a4dfa..77cfb8c 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -76,6 +76,7 @@ A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; }; A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; }; + A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */; }; A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; }; A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; }; A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; }; @@ -531,6 +532,8 @@ A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = ""; }; A7C7DAB615DA75760059B0CF /* sffmultiplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffmultiplecommand.h; sourceTree = ""; }; A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffmultiplecommand.cpp; sourceTree = ""; }; + A7CFA42F1755400500D9ED4D /* renameseqscommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = renameseqscommand.h; sourceTree = ""; }; + A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = renameseqscommand.cpp; sourceTree = ""; }; A7D755D71535F665009BF21A /* treereader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treereader.h; sourceTree = ""; }; A7D755D91535F679009BF21A /* treereader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treereader.cpp; sourceTree = ""; }; A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = ""; }; @@ -1516,6 +1519,8 @@ A727864312E9E28C00F86ABA /* removerarecommand.cpp */, A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */, A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */, + A7CFA42F1755400500D9ED4D /* renameseqscommand.h */, + A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */, A7E9B7CE12D37EC400DA6239 /* reversecommand.h */, A7E9B7CD12D37EC400DA6239 /* reversecommand.cpp */, A7E9B7D212D37EC400DA6239 /* screenseqscommand.h */, @@ -2364,6 +2369,7 @@ A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */, A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */, A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */, + A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 387ee10..a0796a3 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -144,6 +144,7 @@ #include "getmetacommunitycommand.h" #include "sparcccommand.h" #include "makelookupcommand.h" +#include "renameseqscommand.h" /*******************************************************/ @@ -311,6 +312,7 @@ CommandFactory::CommandFactory(){ commands["get.metacommunity"] = "get.metacommunity"; commands["sparcc"] = "sparcc"; commands["make.lookup"] = "make.lookup"; + commands["rename.seqs"] = "rename.seqs"; } @@ -534,6 +536,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); } else if(commandName == "sparcc") { command = new SparccCommand(optionString); } else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); } + else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -698,6 +701,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); } else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); } else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); } + else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -848,6 +852,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); } else if(commandName == "sparcc") { shellcommand = new SparccCommand(); } else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); } + else if(commandName == "rename.seqs") { shellcommand = new RenameSeqsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/groupmap.cpp b/groupmap.cpp index 9d27364..9b8aa3f 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -405,7 +405,66 @@ int GroupMap::getNumSeqs(string group) { exit(1); } } - +/************************************************************/ +int GroupMap::renameSeq(string oldName, string newName) { + try { + + map::iterator itName; + + itName = groupmap.find(oldName); + + if (itName == groupmap.end()) { + m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file"); + m->control_pressed = true; + return 0; + }else { + string group = itName->second; + groupmap.erase(itName); + groupmap[newName] = group; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "renameSeq"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + out << itName->first << '\t' << itName->second << endl; + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} +/************************************************************/ +int GroupMap::print(ofstream& out, vector userGroups) { + try { + + for (map::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) { + if (m->inUsersGroups(itName->second, userGroups)) { + out << itName->first << '\t' << itName->second << endl; + } + } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GroupMap", "print"); + exit(1); + } +} /************************************************************/ vector GroupMap::getNamesSeqs(){ try { diff --git a/groupmap.h b/groupmap.h index bdcd61e..5397e02 100644 --- a/groupmap.h +++ b/groupmap.h @@ -40,6 +40,9 @@ public: vector getNamesSeqs(vector); //get names of seqs belonging to a group or set of groups int getNumSeqs(string); //return the number of seqs in a given group int getCopy(GroupMap*); + int renameSeq(string, string); + int print(ofstream&); + int print(ofstream&, vector); //print certain groups map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands. diff --git a/renameseqscommand.cpp b/renameseqscommand.cpp new file mode 100644 index 0000000..449454b --- /dev/null +++ b/renameseqscommand.cpp @@ -0,0 +1,343 @@ +// +// renameseqscommand.cpp +// Mothur +// +// Created by SarahsWork on 5/28/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "renameseqscommand.h" +#include "sequence.hpp" +#include "groupmap.h" + +//********************************************************************************************************************** +vector RenameSeqsCommand::setParameters(){ + try { + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string RenameSeqsCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The rename.seqs command reads a fastafile and groupfile with an optional namefile, and creates files with the sequence names concatenated with the group. For example if a line in the group file is 'seq1 group1', the new sequence name will be seq1_group1.\n"; + helpString += "The rename.seqs command parameters are fasta, name and group. Fasta and group are required, unless a current file is available for both.\n"; + helpString += "The rename.seqs command should be in the following format: \n"; + helpString += "rename.seqs(fasta=yourFastaFile, group=yourGroupFile) \n"; + helpString += "Example rename.seqs(fasta=abrecovery.unique.fasta, group=abrecovery.group).\n"; + helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string RenameSeqsCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],renamed,[extension]"; } + else if (type == "name") { pattern = "[filename],renamed,[extension]"; } + else if (type == "group") { pattern = "[filename],renamed,[extension]"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +RenameSeqsCommand::RenameSeqsCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand"); + exit(1); + } +} +/**************************************************************************************/ +RenameSeqsCommand::RenameSeqsCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + + } + + + //check for required parameters + fastaFile = validParameter.validFile(parameters, "fasta", true); + if (fastaFile == "not open") { abort = true; } + else if (fastaFile == "not found") { + fastaFile = m->getFastaFile(); + if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setFastaFile(fastaFile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it + } + + groupfile = validParameter.validFile(parameters, "group", true); + if (groupfile == "not open") { abort = true; } + else if (groupfile == "not found") { + groupfile = m->getGroupFile(); + if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setGroupFile(groupfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + nameFile = validParameter.validFile(parameters, "name", true); + if (nameFile == "not open") { abort = true; } + else if (nameFile == "not found"){ nameFile =""; } + else { m->setNameFile(nameFile); } + + if (nameFile == "") { + vector files; files.push_back(fastaFile); + parser.getNameFile(files); + } + + } + + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand"); + exit(1); + } +} +/**************************************************************************************/ +int RenameSeqsCommand::execute() { + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + GroupMap groupMap(groupfile); + groupMap.readMap(); + + //prepare filenames and open files + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(fastaFile); } + string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile)); + map variables; + variables["[filename]"] = outFastaFile; + variables["[extension]"] = m->getExtension(fastaFile); + outFastaFile = getOutputFileName("fasta", variables); + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + + ofstream outFasta; + m->openOutputFile(outFastaFile, outFasta); + + ifstream in; + m->openInputFile(fastaFile, in); + + while (!in.eof()) { + if (m->control_pressed) { break; } + + Sequence seq(in); m->gobble(in); + string group = groupMap.getGroup(seq.getName()); + if (group == "not found") { m->mothurOut("[ERROR]: " + seq.getName() + " is not in your group file, please correct.\n"); m->control_pressed = true; } + else { + string newName = seq.getName() + "_" + group; + seq.setName(newName); + seq.printSequence(outFasta); + } + + } + in.close(); + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + bool notDone = true; + if (nameFile != "") { + thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(nameFile); } + string outNameFile = thisOutputDir + m->getRootName(m->getSimpleName(nameFile)); + variables["[filename]"] = outNameFile; + variables["[extension]"] = m->getExtension(nameFile); + outNameFile = getOutputFileName("group", variables); + outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); + + ofstream outName; + m->openOutputFile(outNameFile, outName); + + map > nameMap; + m->readNames(nameFile, nameMap); + + //process name file changing names + for (map >::iterator it = nameMap.begin(); it != nameMap.end(); it++) { + for (int i = 0; i < (it->second).size()-1; i++) { + if (m->control_pressed) { break; } + string group = groupMap.getGroup((it->second)[i]); + if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[i] + " is not in your group file, please correct.\n"); m->control_pressed = true; } + else { + string newName = (it->second)[i] + "_" + group; + groupMap.renameSeq((it->second)[i], newName); //change in group file + (it->second)[i] = newName; //change in namefile + } + if (i == 0) { outName << (it->second)[i] << '\t' << (it->second)[i] << ','; } + else { outName << (it->second)[i] << ','; } + } + + //print last one + if ((it->second).size() == 1) { + string group = groupMap.getGroup((it->second)[0]); + if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[0] + " is not in your group file, please correct.\n"); m->control_pressed = true; } + else { + string newName = (it->second)[0] + "_" + group; + groupMap.renameSeq((it->second)[0], newName); //change in group file + (it->second)[0] = newName; //change in namefile + + outName << (it->second)[0] << '\t' << (it->second)[0] << endl; + } + } + else { + string group = groupMap.getGroup((it->second)[(it->second).size()-1]); + if (group == "not found") { m->mothurOut("[ERROR]: " + (it->second)[(it->second).size()-1] + " is not in your group file, please correct.\n"); m->control_pressed = true; } + else { + string newName = (it->second)[(it->second).size()-1] + "_" + group; + groupMap.renameSeq((it->second)[(it->second).size()-1], newName); //change in group file + (it->second)[(it->second).size()-1] = newName; //change in namefile + + outName << (it->second)[(it->second).size()-1] << endl; + } + } + } + notDone = false; + outName.close(); + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + if (notDone) { + vector seqs = groupMap.getNamesSeqs(); + for (int i = 0; i < seqs.size(); i++) { + if (m->control_pressed) { break; } + string group = groupMap.getGroup(seqs[i]); + string newName = seqs[i] + "_" + group; + groupMap.renameSeq(seqs[i], newName); + } + } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } + string outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); + variables["[filename]"] = outGroupFile; + variables["[extension]"] = m->getExtension(groupfile); + outGroupFile = getOutputFileName("group", variables); + outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); + + ofstream outGroup; + m->openOutputFile(outGroupFile, outGroup); + groupMap.print(outGroup); + outGroup.close(); + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + //set fasta file as new current fastafile + string current = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } + } + + itTypes = outputTypes.find("name"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } + } + + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RenameSeqsCommand", "execute"); + exit(1); + } +} +/**************************************************************************************/ + diff --git a/renameseqscommand.h b/renameseqscommand.h new file mode 100644 index 0000000..a492404 --- /dev/null +++ b/renameseqscommand.h @@ -0,0 +1,46 @@ +// +// renameseqscommand.h +// Mothur +// +// Created by SarahsWork on 5/28/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#ifndef Mothur_renameseqscommand_h +#define Mothur_renameseqscommand_h + +#include "command.hpp" + +class RenameSeqsCommand : public Command { + +public: + RenameSeqsCommand(string); + RenameSeqsCommand(); + ~RenameSeqsCommand() {} + + vector setParameters(); + string getCommandName() { return "rename.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Rename.seqs"; } + string getDescription() { return "rename sequences"; } + + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + + string fastaFile, nameFile, groupfile, outputDir; + vector outputNames; + bool abort; + + map nameMap; +}; + + + +#endif