X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=deconvolutecommand.cpp;h=98109ef33c488a071b04ad5ada84ffcab9d2e1f8;hp=6f7657f3da640514fb5125527b17cfd779a5cad0;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=f06fdb807822f8e06db003ed809c87250905cfc8 diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp index 6f7657f..98109ef 100644 --- a/deconvolutecommand.cpp +++ b/deconvolutecommand.cpp @@ -13,10 +13,11 @@ //********************************************************************************************************************** vector DeconvoluteCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -31,7 +32,7 @@ vector DeconvoluteCommand::setParameters(){ string DeconvoluteCommand::getHelpString(){ try { string helpString = ""; - helpString += "The unique.seqs command reads a fastafile and creates a namesfile.\n"; + helpString += "The unique.seqs command reads a fastafile and creates a name or count file.\n"; helpString += "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. \n"; helpString += "If the sequence is unique the second column will just contain its name. \n"; helpString += "The unique.seqs command parameters are fasta and name. fasta is required, unless there is a valid current fasta file.\n"; @@ -44,6 +45,24 @@ string DeconvoluteCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string DeconvoluteCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],unique,[extension]"; } + else if (type == "name") { pattern = "[filename],names-[filename],[tag],names"; } + else if (type == "count") { pattern = "[filename],count_table-[filename],[tag],count_table"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "DeconvoluteCommand", "getOutputPattern"); + exit(1); + } +} + //********************************************************************************************************************** DeconvoluteCommand::DeconvoluteCommand(){ try { @@ -52,6 +71,7 @@ DeconvoluteCommand::DeconvoluteCommand(){ vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "DeconvoluteCommand", "DeconvoluteCommand"); @@ -85,6 +105,7 @@ DeconvoluteCommand::DeconvoluteCommand(string option) { vector tempOutNames; outputTypes["fasta"] = tempOutNames; outputTypes["name"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -106,6 +127,14 @@ DeconvoluteCommand::DeconvoluteCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["name"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } @@ -116,7 +145,7 @@ DeconvoluteCommand::DeconvoluteCommand(string option) { inFastaName = m->getFastaFile(); if (inFastaName != "") { m->mothurOut("Using " + inFastaName + " as input file for the fasta parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; } - } + }else { m->setFastaFile(inFastaName); } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ @@ -125,8 +154,25 @@ DeconvoluteCommand::DeconvoluteCommand(string option) { } oldNameMapFName = validParameter.validFile(parameters, "name", true); - if (oldNameMapFName == "not open") { abort = true; } + if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; } else if (oldNameMapFName == "not found"){ oldNameMapFName = ""; } + else { m->setNameFile(oldNameMapFName); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; countfile = ""; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((countfile != "") && (oldNameMapFName != "")) { m->mothurOut("When executing a unique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + + if (countfile == "") { + if (oldNameMapFName == "") { + vector files; files.push_back(inFastaName); + parser.getNameFile(files); + } + } + } } @@ -142,12 +188,35 @@ int DeconvoluteCommand::execute() { if (abort == true) { if (calledHelp) { return 0; } return 2; } //prepare filenames and open files - string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "names"; - string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique" + m->getExtension(inFastaName); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inFastaName)); + string outNameFile = getOutputFileName("name", variables); + string outCountFile = getOutputFileName("count", variables); + variables["[extension]"] = m->getExtension(inFastaName); + string outFastaFile = getOutputFileName("fasta", variables); map nameMap; map::iterator itNames; - if (oldNameMapFName != "") { m->readNames(oldNameMapFName, nameMap); } + if (oldNameMapFName != "") { + m->readNames(oldNameMapFName, nameMap); + if (oldNameMapFName == outNameFile){ + //prepare filenames and open files + map mvariables; + mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inFastaName)); + mvariables["[tag]"] = "unique"; + outNameFile = getOutputFileName("name", mvariables); + } + } + CountTable ct; + if (countfile != "") { + ct.readTable(countfile, true, false); + if (countfile == outCountFile){ + //prepare filenames and open files + map mvariables; + mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inFastaName)); + mvariables["[tag]"] = "unique"; + outCountFile = getOutputFileName("count", mvariables); } + } if (m->control_pressed) { return 0; } @@ -161,10 +230,11 @@ int DeconvoluteCommand::execute() { map::iterator itStrings; set nameInFastaFile; //for sanity checking set::iterator itname; + vector nameFileOrder; int count = 0; while (!in.eof()) { - if (m->control_pressed) { in.close(); outFasta.close(); remove(outFastaFile.c_str()); return 0; } + if (m->control_pressed) { in.close(); outFasta.close(); m->mothurRemove(outFastaFile); return 0; } Sequence seq(in); @@ -188,8 +258,12 @@ int DeconvoluteCommand::execute() { m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine(); }else { sequenceStrings[seq.getAligned()] = itNames->second; + nameFileOrder.push_back(seq.getAligned()); } - }else { sequenceStrings[seq.getAligned()] = seq.getName(); } + }else if (countfile != "") { + ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table + sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); + }else { sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); } }else { //this is a dup if (oldNameMapFName != "") { itNames = nameMap.find(seq.getName()); @@ -199,7 +273,12 @@ int DeconvoluteCommand::execute() { }else { sequenceStrings[seq.getAligned()] += "," + itNames->second; } - }else { sequenceStrings[seq.getAligned()] += "," + seq.getName(); } + }else if (countfile != "") { + int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table + if (num != 0) { //its in the table + ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name + } + }else { sequenceStrings[seq.getAligned()] += "," + seq.getName(); } } count++; @@ -215,33 +294,39 @@ int DeconvoluteCommand::execute() { in.close(); outFasta.close(); - if (m->control_pressed) { remove(outFastaFile.c_str()); return 0; } + if (m->control_pressed) { m->mothurRemove(outFastaFile); return 0; } //print new names file ofstream outNames; - m->openOutputFile(outNameFile, outNames); + if (countfile == "") { m->openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); } + else { m->openOutputFile(outCountFile, outNames); ct.printHeaders(outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); } - for (itStrings = sequenceStrings.begin(); itStrings != sequenceStrings.end(); itStrings++) { - if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); outNames.close(); remove(outNameFile.c_str()); return 0; } + for (int i = 0; i < nameFileOrder.size(); i++) { + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - //get rep name - int pos = (itStrings->second).find_first_of(','); + itStrings = sequenceStrings.find(nameFileOrder[i]); - if (pos == string::npos) { // only reps itself - outNames << itStrings->second << '\t' << itStrings->second << endl; - }else { - outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl; - } + if (itStrings != sequenceStrings.end()) { + if (countfile == "") { + //get rep name + int pos = (itStrings->second).find_first_of(','); + + if (pos == string::npos) { // only reps itself + outNames << itStrings->second << '\t' << itStrings->second << endl; + }else { + outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl; + } + }else { ct.printSeq(outNames, itStrings->second); } + }else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->control_pressed = true; } } outNames.close(); - if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); remove(outNameFile.c_str()); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - m->mothurOut(outFastaFile); m->mothurOutEndLine(); - m->mothurOut(outNameFile); m->mothurOutEndLine(); - outputNames.push_back(outFastaFile); outputNames.push_back(outNameFile); outputTypes["fasta"].push_back(outFastaFile); outputTypes["name"].push_back(outNameFile); + outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile @@ -255,6 +340,11 @@ int DeconvoluteCommand::execute() { if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } + + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } return 0; }