From: Sarah Westcott Date: Thu, 17 Oct 2013 17:45:05 +0000 (-0400) Subject: added fastq to list.seqs, get.seqs and remove.seqs. fixed bug where venn command... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=2ff2d03fbe46ce8cf2738bff4403a52be4f31e2f added fastq to list.seqs, get.seqs and remove.seqs. fixed bug where venn command overwrote sharedotus files. fixed bug with sff.multiple setting processors=1 for future commands. Not using file redirects in commands it runs. --- diff --git a/commandoptionparser.cpp b/commandoptionparser.cpp index dfad533..0038a71 100644 --- a/commandoptionparser.cpp +++ b/commandoptionparser.cpp @@ -31,7 +31,7 @@ CommandOptionParser::CommandOptionParser(string input){ optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")". } else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing ("); m->mothurOutEndLine(); } - else if (closeParen == -1) { m->mothurOut("[ERROR]:You are missing )"); m->mothurOutEndLine(); } + else if (closeParen == -1) { m->mothurOut("[ERROR]: You are missing )"); m->mothurOutEndLine(); } } catch(exception& e) { m->errorOut(e, "CommandOptionParser", "CommandOptionParser"); diff --git a/getseqscommand.cpp b/getseqscommand.cpp index 82f9710..b97bd58 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -16,6 +16,7 @@ vector GetSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); + CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup); @@ -42,9 +43,9 @@ vector GetSeqsCommand::setParameters(){ string GetSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n"; + helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n"; helpString += "It outputs a file containing only the sequences in the .accnos file.\n"; - helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n"; + helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n"; helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n"; helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n"; helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"; @@ -64,6 +65,7 @@ GetSeqsCommand::GetSeqsCommand(){ setParameters(); vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; @@ -84,6 +86,7 @@ string GetSeqsCommand::getOutputPattern(string type) { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } + else if (type == "fastq") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } @@ -127,6 +130,7 @@ GetSeqsCommand::GetSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; @@ -223,6 +227,14 @@ GetSeqsCommand::GetSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["count"] = inputDir + it->second; } } + + it = parameters.find("fastq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fastq"] = inputDir + it->second; } + } } @@ -273,6 +285,10 @@ GetSeqsCommand::GetSeqsCommand(string option) { if (qualfile == "not open") { abort = true; } else if (qualfile == "not found") { qualfile = ""; } else { m->setQualFile(qualfile); } + + fastqfile = validParameter.validFile(parameters, "fastq", true); + if (fastqfile == "not open") { abort = true; } + else if (fastqfile == "not found") { fastqfile = ""; } accnosfile2 = validParameter.validFile(parameters, "accnos2", true); if (accnosfile2 == "not open") { abort = true; } @@ -296,7 +312,7 @@ GetSeqsCommand::GetSeqsCommand(string option) { string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { temp = "true"; usedDups = ""; } dups = m->isTrue(temp); - if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; } + if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; } if (countfile == "") { if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ @@ -333,6 +349,7 @@ int GetSeqsCommand::execute(){ //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } + if (fastqfile != "") { readFastq(); } if (groupfile != "") { readGroup(); } if (countfile != "") { readCount(); } if (alignfile != "") { readAlign(); } @@ -398,6 +415,71 @@ int GetSeqsCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +int GetSeqsCommand::readFastq(){ + try { + bool wroteSomething = false; + int selectedCount = 0; + + ifstream in; + m->openInputFile(fastqfile, in); + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile)); + variables["[extension]"] = m->getExtension(fastqfile); + string outputFileName = getOutputFileName("fastq", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + + + while(!in.eof()){ + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } + + //read sequence name + string input = m->getline(in); m->gobble(in); + + string outputString = input + "\n"; + + if (input[0] == '@') { + //get rest of lines + outputString += m->getline(in) + "\n"; m->gobble(in); + outputString += m->getline(in) + "\n"; m->gobble(in); + outputString += m->getline(in) + "\n"; m->gobble(in); + + vector splits = m->splitWhiteSpace(input); + string name = splits[0]; + name = name.substr(1); + m->checkName(name); + + if (names.count(name) != 0) { + wroteSomething = true; + selectedCount++; + out << outputString; + } + } + + m->gobble(in); + } + in.close(); + out.close(); + + + if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } + outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName); + + m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "GetSeqsCommand", "readFastq"); + exit(1); + } +} //********************************************************************************************************************** int GetSeqsCommand::readFasta(){ diff --git a/getseqscommand.h b/getseqscommand.h index c5b6ca4..9895432 100644 --- a/getseqscommand.h +++ b/getseqscommand.h @@ -36,13 +36,14 @@ class GetSeqsCommand : public Command { private: set names; vector outputNames; - string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir; + string accnosfile, accnosfile2, fastafile, fastqfile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir; bool abort, dups; map uniqueMap; //for debug map > sanity; //maps file type to names chosen for file. something like "fasta" -> vector. If running in debug mode this is filled and we check to make sure all the files have the same names. If they don't we output the differences for the user. int readFasta(); + int readFastq(); int readName(); int readGroup(); int readCount(); diff --git a/listseqscommand.cpp b/listseqscommand.cpp index 343d4fd..0ead54a 100644 --- a/listseqscommand.cpp +++ b/listseqscommand.cpp @@ -16,6 +16,7 @@ //********************************************************************************************************************** vector ListSeqsCommand::setParameters(){ try { + CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount); @@ -39,8 +40,8 @@ vector ListSeqsCommand::setParameters(){ string ListSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n"; - helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n"; + helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq or alignreport file and outputs a .accnos file containing sequence names.\n"; + helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq and alignreport. You must provide one of these parameters.\n"; helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n"; helpString += "Example list.seqs(fasta=amazon.fasta).\n"; helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n"; @@ -169,6 +170,14 @@ ListSeqsCommand::ListSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["count"] = inputDir + it->second; } } + + it = parameters.find("fastq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fastq"] = inputDir + it->second; } + } } //check for required parameters @@ -205,8 +214,12 @@ ListSeqsCommand::ListSeqsCommand(string option) { if (countfile == "not open") { abort = true; } else if (countfile == "not found") { countfile = ""; } else { m->setCountTableFile(countfile); } + + fastqfile = validParameter.validFile(parameters, "fastq", true); + if (fastqfile == "not open") { abort = true; } + else if (fastqfile == "not found") { fastqfile = ""; } - if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; } + if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; } int okay = 1; if (outputDir != "") { okay++; } @@ -230,6 +243,7 @@ int ListSeqsCommand::execute(){ //read functions fill names vector if (fastafile != "") { inputFileName = fastafile; readFasta(); } + else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); } else if (namefile != "") { inputFileName = namefile; readName(); } else if (groupfile != "") { inputFileName = groupfile; readGroup(); } else if (alignfile != "") { inputFileName = alignfile; readAlign(); } @@ -285,6 +299,53 @@ int ListSeqsCommand::execute(){ exit(1); } } +//********************************************************************************************************************** +int ListSeqsCommand::readFastq(){ + try { + + ifstream in; + m->openInputFile(fastqfile, in); + string name; + + //ofstream out; + //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta"; + //m->openOutputFile(newFastaName, out); + int count = 1; + //string lastName = ""; + + while(!in.eof()){ + + if (m->control_pressed) { in.close(); return 0; } + + //read sequence name + string name = m->getline(in); m->gobble(in); + + if (name[0] == '@') { + vector splits = m->splitWhiteSpace(name); + name = splits[0]; + name = name.substr(1); + m->checkName(name); + names.push_back(name); + //get rest of lines + name = m->getline(in); m->gobble(in); + name = m->getline(in); m->gobble(in); + name = m->getline(in); m->gobble(in); + } + + m->gobble(in); + if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + name + "\n"; } + } + in.close(); + //out.close(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ListSeqsCommand", "readFastq"); + exit(1); + } +} //********************************************************************************************************************** int ListSeqsCommand::readFasta(){ diff --git a/listseqscommand.h b/listseqscommand.h index b8a79c0..9d320d9 100644 --- a/listseqscommand.h +++ b/listseqscommand.h @@ -35,7 +35,7 @@ class ListSeqsCommand : public Command { private: vector names, outputNames; - string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile; + string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile, fastqfile; bool abort; int readFasta(); @@ -45,7 +45,7 @@ class ListSeqsCommand : public Command { int readList(); int readTax(); int readCount(); - + int readFastq(); }; #endif diff --git a/mothurout.cpp b/mothurout.cpp index 3d4504d..66ee704 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -3013,6 +3013,64 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){ } } /***********************************************************************/ +bool MothurOut::checkLocations(string& filename, string inputDir){ + try { + filename = getFullPathName(filename); + + int ableToOpen; + ifstream in; + ableToOpen = openInputFile(filename, in, "noerror"); + in.close(); + + //if you can't open it, try input location + if (ableToOpen == 1) { + if (inputDir != "") { //default path is set + string tryPath = inputDir + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (getDefaultPath() != "") { //default path is set + string tryPath = getDefaultPath() + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + } + + //if you can't open it its not in current working directory or inputDir, try mothur excutable location + if (ableToOpen == 1) { + string exepath = argv; + string tempPath = exepath; + for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } + exepath = exepath.substr(0, (tempPath.find_last_of('m'))); + + string tryPath = getFullPathName(exepath) + getSimpleName(filename); + mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine(); + ifstream in2; + ableToOpen = openInputFile(tryPath, in2, "noerror"); + in2.close(); + filename = tryPath; + } + + if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; } + + return true; + } + catch(exception& e) { + errorOut(e, "MothurOut", "checkLocations"); + exit(1); + } +} +/***********************************************************************/ //This function parses the estimator options and puts them in a vector void MothurOut::splitAtChar(string& estim, vector& container, char symbol) { diff --git a/mothurout.h b/mothurout.h index f58f4e8..db931b5 100644 --- a/mothurout.h +++ b/mothurout.h @@ -97,7 +97,9 @@ class MothurOut { int openOutputFileAppend(string, ofstream&); int openOutputFileBinaryAppend(string, ofstream&); int openInputFile(string, ifstream&); - int openInputFile(string, ifstream&, string); //no error given + int openInputFile(string, ifstream&, string); //no error given + + bool checkLocations(string&, string); //filename, inputDir. checks for file in ./, inputdir, default and mothur's exe location. Returns false if cant be found. If found completes name with location string getline(ifstream&); string getline(istringstream&); void gobble(istream&); diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp index fa7e6dd..019a659 100644 --- a/removeseqscommand.cpp +++ b/removeseqscommand.cpp @@ -15,6 +15,7 @@ //********************************************************************************************************************** vector RemoveSeqsCommand::setParameters(){ try { + CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); @@ -41,9 +42,9 @@ vector RemoveSeqsCommand::setParameters(){ string RemoveSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n"; + helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n"; helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n"; - helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n"; + helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos and at least one of the file parameters.\n"; helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n"; helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n"; helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n"; @@ -61,6 +62,7 @@ string RemoveSeqsCommand::getOutputPattern(string type) { string pattern = ""; if (type == "fasta") { pattern = "[filename],pick,[extension]"; } + else if (type == "fastq") { pattern = "[filename],pick,[extension]"; } else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } else if (type == "name") { pattern = "[filename],pick,[extension]"; } else if (type == "group") { pattern = "[filename],pick,[extension]"; } @@ -84,6 +86,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){ setParameters(); vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; @@ -123,6 +126,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["fastq"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; outputTypes["name"] = tempOutNames; outputTypes["group"] = tempOutNames; @@ -210,6 +214,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["count"] = inputDir + it->second; } } + + it = parameters.find("fastq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fastq"] = inputDir + it->second; } + } } @@ -258,6 +270,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option) { if (qualfile == "not open") { abort = true; } else if (qualfile == "not found") { qualfile = ""; } else { m->setQualFile(qualfile); } + + fastqfile = validParameter.validFile(parameters, "fastq", true); + if (fastqfile == "not open") { abort = true; } + else if (fastqfile == "not found") { fastqfile = ""; } string usedDups = "true"; string temp = validParameter.validFile(parameters, "dups", false); @@ -280,7 +296,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option) { m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; } - if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; } + if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; } if (countfile == "") { if ((fastafile != "") && (namefile == "")) { @@ -317,6 +333,7 @@ int RemoveSeqsCommand::execute(){ //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } + if (fastqfile != "") { readFastq(); } if (groupfile != "") { readGroup(); } if (alignfile != "") { readAlign(); } if (listfile != "") { readList(); } @@ -438,6 +455,71 @@ int RemoveSeqsCommand::readFasta(){ } } //********************************************************************************************************************** +int RemoveSeqsCommand::readFastq(){ + try { + bool wroteSomething = false; + int removedCount = 0; + + ifstream in; + m->openInputFile(fastqfile, in); + + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile)); + variables["[extension]"] = m->getExtension(fastqfile); + string outputFileName = getOutputFileName("fastq", variables); + ofstream out; + m->openOutputFile(outputFileName, out); + + + while(!in.eof()){ + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } + + //read sequence name + string input = m->getline(in); m->gobble(in); + + string outputString = input + "\n"; + + if (input[0] == '@') { + //get rest of lines + outputString += m->getline(in) + "\n"; m->gobble(in); + outputString += m->getline(in) + "\n"; m->gobble(in); + outputString += m->getline(in) + "\n"; m->gobble(in); + + vector splits = m->splitWhiteSpace(input); + string name = splits[0]; + name = name.substr(1); + m->checkName(name); + + if (names.count(name) == 0) { + wroteSomething = true; + out << outputString; + }else { removedCount++; } + } + + m->gobble(in); + } + in.close(); + out.close(); + + + if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); } + outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName); + + m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine(); + + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "RemoveSeqsCommand", "readFastq"); + exit(1); + } +} +//********************************************************************************************************************** int RemoveSeqsCommand::readQual(){ try { string thisOutputDir = outputDir; diff --git a/removeseqscommand.h b/removeseqscommand.h index 1a6365d..7aaff17 100644 --- a/removeseqscommand.h +++ b/removeseqscommand.h @@ -35,12 +35,13 @@ class RemoveSeqsCommand : public Command { private: set names; - string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir; + string accnosfile, fastafile, fastqfile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir; bool abort, dups; vector outputNames; map uniqueMap; int readFasta(); + int readFastq(); int readName(); int readGroup(); int readCount(); diff --git a/sffmultiplecommand.cpp b/sffmultiplecommand.cpp index d8a4b96..f75662b 100644 --- a/sffmultiplecommand.cpp +++ b/sffmultiplecommand.cpp @@ -166,7 +166,7 @@ SffMultipleCommand::SffMultipleCommand(string option) { outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { string path; @@ -303,54 +303,9 @@ SffMultipleCommand::SffMultipleCommand(string option) { path += "lookupFiles\\"; #endif lookupFileName = m->getFullPathName(path) + "LookUp_Titanium.pat"; - - int ableToOpen; - ifstream in; - ableToOpen = m->openInputFile(lookupFileName, in, "noerror"); - in.close(); - - //if you can't open it, try input location - if (ableToOpen == 1) { - if (inputDir != "") { //default path is set - string tryPath = inputDir + m->getSimpleName(lookupFileName); - m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - lookupFileName = tryPath; - } - } - - //if you can't open it, try default location - if (ableToOpen == 1) { - if (m->getDefaultPath() != "") { //default path is set - string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName); - m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - lookupFileName = tryPath; - } - } - - //if you can't open it its not in current working directory or inputDir, try mothur excutable location - if (ableToOpen == 1) { - string exepath = m->argv; - string tempPath = exepath; - for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } - exepath = exepath.substr(0, (tempPath.find_last_of('m'))); - - string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName); - m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine(); - ifstream in2; - ableToOpen = m->openInputFile(tryPath, in2, "noerror"); - in2.close(); - lookupFileName = tryPath; - } - - if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; } - } - else if(temp == "not open") { + bool ableToOpen = m->checkLocations(lookupFileName, inputDir); + if (!ableToOpen) { abort=true; } + }else if(temp == "not open") { lookupFileName = validParameter.validFile(parameters, "lookup", false); @@ -384,8 +339,9 @@ int SffMultipleCommand::execute(){ vector sffFiles, oligosFiles; readFile(sffFiles, oligosFiles); - outputDir = m->hasPath(filename); - string fileroot = outputDir + m->getRootName(m->getSimpleName(filename)); + string thisOutputDir = outputDir; + if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); } + string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename)); map variables; variables["[filename]"] = fileroot; string fasta = getOutputFileName("fasta",variables); @@ -414,6 +370,8 @@ int SffMultipleCommand::execute(){ if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); } } + m->setProcessors(toString(processors)); + //report output filenames m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -443,11 +401,11 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo in >> sff; - sff = m->getFullPathName(sff); - //ignore file pairing if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } m->gobble(in); } else { //check for oligos file + bool ableToOpenSff = m->checkLocations(sff, inputDir); + oligos = ""; // get rest of line in case there is a oligos filename @@ -456,11 +414,18 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { oligos += c; } - } - sffFiles.push_back(sff); - if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false; } - if (oligos == "") { allFull = false; } - oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file + } + + if (ableToOpenSff) { + sffFiles.push_back(sff); + if (oligos != "") { + bool ableToOpenOligos = m->checkLocations(oligos, inputDir); + if (ableToOpenOligos) { allBlank = false; } + else { m->mothurOut("Can not find " + oligos + ". Ignoring.\n"); oligos = ""; } + } + if (oligos == "") { allFull = false; } + oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file + }else { m->mothurOut("Can not find " + sff + ". Ignoring.\n"); } } m->gobble(in); } @@ -490,8 +455,12 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n"); //run sff.info + string redirects = ""; + if (inputDir != "") { redirects += ", inputdir=" + inputDir; } + if (outputDir != "") { redirects += ", outputdir=" + outputDir; } string inputString = "sff=" + sff + ", flow=T"; if (trim) { inputString += ", trim=T"; } + if (redirects != "") { inputString += redirects; } m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; @@ -507,6 +476,9 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil m->mothurCalling = false; m->mothurOutEndLine(); + redirects = ""; + if (outputDir != "") { redirects += ", outputdir=" + outputDir; } + //run summary.seqs on the fasta file string fastaFile = ""; map >::iterator it = filenames.find("fasta"); @@ -514,6 +486,7 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil else { m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break; } inputString = "fasta=" + fastaFile + ", processors=1"; + if (redirects != "") { inputString += redirects; } m->mothurOutEndLine(); m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; @@ -542,7 +515,7 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows); inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs); inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1"; - + if (redirects != "") { inputString += redirects; } m->mothurOutEndLine(); m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; @@ -589,7 +562,7 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil inputString += ", sigma=" +toString(sigma); inputString += ", mindelta=" + toString(minDelta); inputString += ", order=" + flowOrder + ", processors=1"; - + if (redirects != "") { inputString += redirects; } //run shhh.flows m->mothurOutEndLine(); m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine(); @@ -637,7 +610,7 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); } if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); } inputString += ", processors=1"; - + if (redirects != "") { inputString += redirects; } //run trim.seqs m->mothurOutEndLine(); m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine(); @@ -688,6 +661,7 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil } inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile; + if (redirects != "") { inputString += redirects; } m->mothurOutEndLine(); m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; @@ -711,13 +685,14 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil m->appendFiles(nameFile, name); if (makeGroup) { m->appendFiles(groupFile, group); } } - count++; + for (it = filenames.begin(); it != filenames.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]); } } + count++; } return count; diff --git a/sffmultiplecommand.h b/sffmultiplecommand.h index ee83d72..1a492a8 100644 --- a/sffmultiplecommand.h +++ b/sffmultiplecommand.h @@ -42,7 +42,8 @@ private: int end; linePair(int i, int j) : start(i), end(j) {} }; - + + string inputDir; string filename, outputDir, flowOrder, lookupFileName, minDelta; vector outputNames; bool abort, trim, large, flip, allFiles, keepforward, append, makeGroup; diff --git a/venn.cpp b/venn.cpp index 66dbb8e..8e91b8a 100644 --- a/venn.cpp +++ b/venn.cpp @@ -162,7 +162,9 @@ vector Venn::getPic(vector lookup, vectorgetName() == "sharedsobs") { singleCalc = new Sobs(); if (sharedOtus && (labels.size() != 0)) { - string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus"; + string groupsTag = ""; + for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup(); + string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus"; outputNames.push_back(filenameShared); ofstream outShared; @@ -455,7 +457,9 @@ vector Venn::getPic(vector lookup, vectorgetName() == "sharedsobs")) { - string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus"; + string groupsTag = ""; + for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup(); + string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus"; outputNames.push_back(filenameShared); @@ -660,7 +664,9 @@ vector Venn::getPic(vector lookup, vectorgetName() == "sharedsobs")) { - string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus"; + string groupsTag = ""; + for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup(); + string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus"; outputNames.push_back(filenameShared);