X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=deuniqueseqscommand.cpp;h=a6c151c38318f4a98917e5b46f81f5b8cd7cfc27;hp=f72a7c9c7e1264265d4d28e1072e087cf501c288;hb=ac03f1f6c27b5bfdf2cfb6d45c3667c3e0281f51;hpb=fe08eacea5a2d34cd4bd27e8451fd2872102a80a diff --git a/deuniqueseqscommand.cpp b/deuniqueseqscommand.cpp index f72a7c9..a6c151c 100644 --- a/deuniqueseqscommand.cpp +++ b/deuniqueseqscommand.cpp @@ -9,12 +9,14 @@ #include "deuniqueseqscommand.h" #include "sequence.hpp" +#include "counttable.h" //********************************************************************************************************************** vector DeUniqueSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname); + CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -31,8 +33,8 @@ vector DeUniqueSeqsCommand::setParameters(){ string DeUniqueSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The deunique.seqs command reads a fastafile and namefile, and creates a fastafile containing all the sequences.\n"; - helpString += "The deunique.seqs command parameters are fasta and name, both are required, unless you have valid current name and fasta files.\n"; + helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n"; + helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n"; helpString += "The deunique.seqs command should be in the following format: \n"; helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n"; helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n"; @@ -49,7 +51,8 @@ string DeUniqueSeqsCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "fasta") { pattern = "[filename],redundant.fasta"; } + if (type == "fasta") { pattern = "[filename],redundant.fasta"; } + else if (type == "group") { pattern = "[filename],redundant.groups"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -66,6 +69,7 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(){ setParameters(); vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["group"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand"); @@ -98,6 +102,7 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["group"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -119,6 +124,14 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } @@ -134,16 +147,31 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option) { //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; - outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it } nameFile = validParameter.validFile(parameters, "name", true); if (nameFile == "not open") { abort = true; } - else if (nameFile == "not found"){ - nameFile = m->getNameFile(); + else if (nameFile == "not found"){ nameFile = ""; } + else { m->setNameFile(nameFile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + + if ((countfile == "") && (nameFile == "")) { //look for currents + nameFile = m->getNameFile(); if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; } - }else { m->setNameFile(nameFile); } + else { + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("[ERROR]: You have no current name or count files one is required."); m->mothurOutEndLine(); abort = true; } + } + } + } } @@ -160,7 +188,9 @@ int DeUniqueSeqsCommand::execute() { //prepare filenames and open files ofstream out; - string outFastaFile = m->getRootName(m->getSimpleName(fastaFile)); + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(fastaFile); } + string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile)); int pos = outFastaFile.find("unique"); if (pos != string::npos) { outFastaFile = outputDir + outFastaFile.substr(0, pos); } else { outFastaFile = outputDir + outFastaFile; } @@ -169,55 +199,92 @@ int DeUniqueSeqsCommand::execute() { outFastaFile = getOutputFileName("fasta", variables); m->openOutputFile(outFastaFile, out); - readNamesFile(); - if (m->control_pressed) { out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; } + map nameMap; + CountTable ct; + ofstream outGroup; + string outGroupFile; + vector groups; + if (nameFile != "") { m->readNames(nameFile, nameMap); } + else { + ct.readTable(countfile, true, false); + + if (ct.hasGroupInfo()) { + thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } + outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); + variables["[filename]"] = outGroupFile; + outGroupFile = getOutputFileName("group", variables); + m->openOutputFile(outGroupFile, outGroup); + groups = ct.getNamesOfGroups(); + } + } + + if (m->control_pressed) { out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; } ifstream in; m->openInputFile(fastaFile, in); while (!in.eof()) { - if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; } + if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; } Sequence seq(in); m->gobble(in); if (seq.getName() != "") { - //look for sequence name in nameMap - map::iterator it = nameMap.find(seq.getName()); - - if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - else { - vector names; - m->splitAtComma(it->second, names); - - //output sequences - for (int i = 0; i < names.size(); i++) { - out << ">" << names[i] << endl; - out << seq.getAligned() << endl; - } - - //remove seq from name map so we can check for seqs in namefile not in fastafile later - nameMap.erase(it); - } + if (nameFile != "") { + //look for sequence name in nameMap + map::iterator it = nameMap.find(seq.getName()); + + if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } + else { + vector names; + m->splitAtComma(it->second, names); + + //output sequences + for (int i = 0; i < names.size(); i++) { + out << ">" << names[i] << endl; + out << seq.getAligned() << endl; + } + + //remove seq from name map so we can check for seqs in namefile not in fastafile later + nameMap.erase(it); + } + }else { + if (ct.hasGroupInfo()) { + vector groupCounts = ct.getGroupCounts(seq.getName()); + int count = 1; + for (int i = 0; i < groups.size(); i++) { + for (int j = 0; j < groupCounts[i]; j++) { + outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++; + } + } + + } + + int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found + for (int i = 0; i < numReps; i++) { + out << ">" << seq.getName()+"_"+toString(i+1) << endl; + out << seq.getAligned() << endl; + } + } } } in.close(); - out.close(); + out.close(); + if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } } - if (nameMap.size() != 0) { //then there are names in the namefile not in the fastafile - for (map::iterator it = nameMap.begin(); it != nameMap.end(); it++) { - m->mothurOut(it->first + " is not in your fasta file, but is in your name file. Please correct."); m->mothurOutEndLine(); - } - } - - if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; } + + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { m->mothurRemove(outGroupFile); } }return 0; } + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); } } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outFastaFile); m->mothurOutEndLine(); - outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); - m->mothurOutEndLine(); + for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + //set fasta file as new current fastafile string current = ""; @@ -225,47 +292,18 @@ int DeUniqueSeqsCommand::execute() { if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "execute"); - exit(1); - } -} -//********************************************************************************************************************** -int DeUniqueSeqsCommand::readNamesFile() { - try { - - ifstream inNames; - m->openInputFile(nameFile, inNames); - - string name, names; - map::iterator it; - - while(inNames){ - - if(m->control_pressed) { break; } - - inNames >> name; m->gobble(inNames); - inNames >> names; - - it = nameMap.find(name); - - if (it == nameMap.end()) { nameMap[name] = names; } - else { m->mothurOut("[ERROR]: Your namefile already contains " + name + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; } - - m->gobble(inNames); + + itTypes = outputTypes.find("group"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } - inNames.close(); + return 0; - } catch(exception& e) { - m->errorOut(e, "DeUniqueSeqsCommand", "readNamesFile"); + m->errorOut(e, "DeUniqueSeqsCommand", "execute"); exit(1); } } - /**************************************************************************************/