From: westcott Date: Mon, 22 Nov 2010 13:24:01 +0000 (+0000) Subject: added group option to trim.seqs so you can provide your own groupfile instead of... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=d9b668f68b99f92ecdc71dd8cd363cb4e27107f9 added group option to trim.seqs so you can provide your own groupfile instead of using an oligos file to make one. --- diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 7f6dc21..4c9baee 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -164,13 +164,14 @@ IndicatorCommand::IndicatorCommand(string option) { void IndicatorCommand::help(){ try { - /*m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n"); - m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n"); - m->mothurOut("The read.tree command parameters are tree, group and name.\n"); - m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n"); - m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n"); - m->mothurOut("The name parameter allows you to enter a namefile.\n"); - m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); */ + m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n"); + m->mothurOut("The new tree contains labels at each internal node. The label is the OTU number of the indicator OTU.\n"); + m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n"); + m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree and label parameter are required as well as either shared or relabund.\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed. The groups may be entered separated by dashes.\n"); + m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n"); + m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); } catch(exception& e) { m->errorOut(e, "IndicatorCommand", "help"); diff --git a/mothur b/mothur index bb3b522..ff1c875 100755 Binary files a/mothur and b/mothur differ diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 8aacd9d..a09e402 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -13,7 +13,7 @@ //********************************************************************************************************************** vector TrimSeqsCommand::getValidParameters(){ try { - string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + string Array[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; @@ -74,7 +74,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", + string AlignArray[] = {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -124,6 +124,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["qfile"] = inputDir + it->second; } } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } } @@ -151,6 +159,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { else if(temp == "not open"){ abort = true; } else { oligoFile = temp; } + temp = validParameter.validFile(parameters, "group", true); + if (temp == "not found"){ groupfile = ""; } + else if(temp == "not open"){ abort = true; } + else { groupfile = temp; } + temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; } convert(temp, maxAmbig); @@ -206,8 +219,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); - if(allFiles && oligoFile == ""){ - m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine(); + if ((oligoFile != "") && (groupfile != "")) { + m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true; + } + + + if(allFiles && (oligoFile == "") && (groupfile == "")){ + m->mothurOut("You selected allfiles, but didn't enter an oligos or group file. Ignoring the allfiles request."); m->mothurOutEndLine(); } if((qAverage != 0 && qThreshold != 0) && qFileName == ""){ m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine(); @@ -231,8 +249,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option) { void TrimSeqsCommand::help(){ try { m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n"); - m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); + m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n"); m->mothurOut("The fasta parameter is required.\n"); + m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n"); m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n"); m->mothurOut("The oligos parameter .... The default is "".\n"); m->mothurOut("The maxambig parameter .... The default is -1.\n"); @@ -275,6 +294,8 @@ int TrimSeqsCommand::execute(){ numFPrimers = 0; //this needs to be initialized numRPrimers = 0; + vector fastaFileNames; + vector qualFileNames; string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile); @@ -283,10 +304,35 @@ int TrimSeqsCommand::execute(){ string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual"; if (qFileName != "") { outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile); outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); } - string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; + string groupFile = ""; + if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; } + else{ + groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups"; + outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + + if(allFiles){ + for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { + groupToIndex[groupMap->namesOfGroups[i]] = i; + groupVector.push_back(groupMap->namesOfGroups[i]); + fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupMap->namesOfGroups[i] + ".fasta")); + + //we append later, so we want to clear file + ofstream outRemove; + m->openOutputFile(fastaFileNames[i], outRemove); + outRemove.close(); + if(qFileName != ""){ + qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupMap->namesOfGroups[i] + ".qual")); + ofstream outRemove2; + m->openOutputFile(qualFileNames[i], outRemove2); + outRemove2.close(); + } + } + } + comboStarts = fastaFileNames.size()-1; + } - vector fastaFileNames; - vector qualFileNames; if(oligoFile != ""){ outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile); getOligos(fastaFileNames, qualFileNames); @@ -384,7 +430,7 @@ int TrimSeqsCommand::execute(){ if (m->control_pressed) { return 0; } #endif - + for(int i=0;iisBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); } @@ -643,6 +689,32 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } } } + + if (groupfile != "") { + string thisGroup = groupMap->getGroup(currSeq.getName()); + + if (thisGroup != "not found") { + outGroups << currSeq.getName() << '\t' << thisGroup << endl; + if (allFiles) { + ofstream outTemp; + m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp); + currSeq.printSequence(outTemp); + outTemp.close(); + if(qFileName != ""){ + ofstream outTemp2; + m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); + } + } + }else{ + m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine(); + outGroups << currSeq.getName() << '\t' << "XXX" << endl; + if (allFiles) { + m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine(); + } + } + } } else{ currSeq.setName(currSeq.getName() + '|' + trashCode); diff --git a/trimseqscommand.h b/trimseqscommand.h index 20b11a8..2f8fbd7 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -14,6 +14,7 @@ #include "command.hpp" #include "sequence.hpp" #include "qualityscores.h" +#include "groupmap.h" class TrimSeqsCommand : public Command { public: @@ -28,7 +29,9 @@ public: void help(); private: - + + GroupMap* groupMap; + struct linePair { unsigned long int start; unsigned long int end; @@ -47,7 +50,7 @@ private: map > outputTypes; bool abort; - string fastaFile, oligoFile, qFileName, outputDir; + string fastaFile, oligoFile, qFileName, groupfile, outputDir; bool flip, allFiles, qtrim; int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts; @@ -59,6 +62,7 @@ private: vector groupVector; map primers; map combos; + map groupToIndex; vector processIDS; //processid vector lines;