From: Sarah Westcott Date: Tue, 15 May 2012 17:54:41 +0000 (-0400) Subject: added list.labels command. started work on make.contigs command. fixed fastq.info... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=4c302368ef34f0d897afefc7853edf86fb45b9f3 added list.labels command. started work on make.contigs command. fixed fastq.info and make.fastq quality scores control character. fixed bug in classify.otu that made bootstrap values for "unknown" taxon too high --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ae1df1e..f40cb4b 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -37,6 +37,8 @@ A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; }; A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; }; A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; + A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; }; + A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */; }; A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */; }; A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */; }; A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; @@ -419,6 +421,10 @@ A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = ""; }; A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = ""; }; A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = ""; }; + A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listotulabelscommand.cpp; sourceTree = ""; }; + A7A0671C156294810095C8C5 /* listotulabelscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listotulabelscommand.h; sourceTree = ""; }; + A7A0671D1562AC230095C8C5 /* makecontigscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makecontigscommand.h; sourceTree = ""; }; + A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makecontigscommand.cpp; sourceTree = ""; }; A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sortseqscommand.cpp; sourceTree = ""; }; A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sortseqscommand.h; sourceTree = ""; }; A7A3C8C714D041AD00B1BFBE /* otuassociationcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = otuassociationcommand.cpp; sourceTree = ""; }; @@ -1299,10 +1305,14 @@ A7E9B73B12D37EC400DA6239 /* libshuffcommand.cpp */, A7E9B73E12D37EC400DA6239 /* listseqscommand.h */, A7E9B73D12D37EC400DA6239 /* listseqscommand.cpp */, + A7A0671C156294810095C8C5 /* listotulabelscommand.h */, + A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */, A7FA10001302E096003860FE /* mantelcommand.h */, A7FA10011302E096003860FE /* mantelcommand.cpp */, A724D2B4153C8600000A826F /* makebiomcommand.h */, A724D2B6153C8628000A826F /* makebiomcommand.cpp */, + A7A0671D1562AC230095C8C5 /* makecontigscommand.h */, + A7A0671E1562AC3E0095C8C5 /* makecontigscommand.cpp */, A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */, A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */, A7E9B74412D37EC400DA6239 /* makegroupcommand.h */, @@ -2168,6 +2178,8 @@ A724D2B7153C8628000A826F /* makebiomcommand.cpp in Sources */, 219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */, 219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */, + A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */, + A7A0671F1562AC3E0095C8C5 /* makecontigscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme b/Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme index 5184c66..3ced6e4 100644 --- a/Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme +++ b/Mothur.xcodeproj/xcuserdata/SarahsWork.xcuserdatad/xcschemes/Mothur.xcscheme @@ -14,7 +14,7 @@ @@ -32,7 +32,7 @@ @@ -50,7 +50,7 @@ @@ -68,7 +68,7 @@ diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index c889637..ec22de8 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -475,6 +475,9 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th } } + + //phylotree adds an extra unknown so we want to remove that + if (bestChild.name == "unknown") { bestChildSize--; } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); diff --git a/commandfactory.cpp b/commandfactory.cpp index d70c10a..e73d60b 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -132,6 +132,7 @@ #include "createdatabasecommand.h" #include "makebiomcommand.h" #include "getcoremicrobiomecommand.h" +#include "listotulabelscommand.h" /*******************************************************/ @@ -284,7 +285,8 @@ CommandFactory::CommandFactory(){ commands["pcr.seqs"] = "pcr.seqs"; commands["create.database"] = "create.database"; commands["make.biom"] = "make.biom"; - commands["get.coremicrobiome"] = "get.coremicrobiome"; + commands["get.coremicrobiome"] = "get.coremicrobiome"; + commands["list.otulabels"] = "list.otulabels"; commands["quit"] = "MPIEnabled"; } @@ -496,6 +498,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "create.database") { command = new CreateDatabaseCommand(optionString); } else if(commandName == "make.biom") { command = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { command = new GetCoreMicroBiomeCommand(optionString); } + else if(commandName == "list.otulabels") { command = new ListOtuLabelsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -648,6 +651,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "create.database") { pipecommand = new CreateDatabaseCommand(optionString); } else if(commandName == "make.biom") { pipecommand = new MakeBiomCommand(optionString); } else if(commandName == "get.coremicrobiome") { pipecommand = new GetCoreMicroBiomeCommand(optionString); } + else if(commandName == "list.otulabels") { pipecommand = new ListOtuLabelsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -786,6 +790,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "create.database") { shellcommand = new CreateDatabaseCommand(); } else if(commandName == "make.biom") { shellcommand = new MakeBiomCommand(); } else if(commandName == "get.coremicrobiome") { shellcommand = new GetCoreMicroBiomeCommand(); } + else if(commandName == "list.otulabels") { shellcommand = new ListOtuLabelsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/listotulabelscommand.cpp b/listotulabelscommand.cpp new file mode 100644 index 0000000..becfae3 --- /dev/null +++ b/listotulabelscommand.cpp @@ -0,0 +1,387 @@ +// +// listotucommand.cpp +// Mothur +// +// Created by Sarah Westcott on 5/15/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "listotulabelscommand.h" +#include "inputdata.h" + +//********************************************************************************************************************** +vector ListOtuLabelsCommand::setParameters(){ + try { + CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared); + CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string ListOtuLabelsCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The list.labels lists otu labels from shared or relabund file. The results can be used by the get.labels to select specific otus with the output from classify.otu, otu.association, or corr.axes.\n"; + helpString += "The list.labels parameters are: shared, relabund, label and groups.\n"; + helpString += "The label parameter is used to analyze specific labels in your input.\n"; + helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n"; + helpString += "The list.labels commmand should be in the following format: \n"; + helpString += "list.otulabels(shared=yourSharedFile, groups=yourGroup1-yourGroup2)\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +ListOtuLabelsCommand::ListOtuLabelsCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["otulabels"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +ListOtuLabelsCommand::ListOtuLabelsCommand(string option) { + try { + abort = false; calledHelp = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + //valid paramters for this command + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + + //edit file types below to include only the types you added as parameters + + string path; + it = parameters.find("relabund"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["relabund"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + vector tempOutNames; + outputTypes["otulabels"] = tempOutNames; + + //check for parameters + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { abort = true; } + else if (sharedfile == "not found") { sharedfile = ""; } + else { inputFileName = sharedfile; format = "sharedfile"; m->setSharedFile(sharedfile); } + + relabundfile = validParameter.validFile(parameters, "relabund", true); + if (relabundfile == "not open") { abort = true; } + else if (relabundfile == "not found") { relabundfile = ""; } + else { inputFileName = relabundfile; format = "relabund"; m->setRelAbundFile(relabundfile); } + + if ((relabundfile == "") && (sharedfile == "")) { + //is there are current file available for either of these? + //give priority to shared, then relabund + sharedfile = m->getSharedFile(); + if (sharedfile != "") { inputFileName = sharedfile; format="sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { + relabundfile = m->getRelAbundFile(); + if (relabundfile != "") { inputFileName = relabundfile; format="relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine(); + abort = true; + } + } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = m->hasPath(inputFileName); //if user entered a file with a path then preserve it + } + + string groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { m->splitAtDash(groups, Groups); } + m->setGroups(Groups); + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + } + + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "ListOtuLabelsCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int ListOtuLabelsCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + InputData input(inputFileName, format); + + if (format == "relabund") { + vector lookup = input.getSharedRAbundFloatVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundFloatVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //get next line to process + lookup = input.getSharedRAbundFloatVectors(); + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundFloatVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + }else { + + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + createList(lookup); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +int ListOtuLabelsCommand::createList(vector& lookup){ + try { + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels"; + outputNames.push_back(outputFileName); outputTypes["otulabels"].push_back(outputFileName); + ofstream out; + m->openOutputFile(outputFileName, out); + + for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; } + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "createTable"); + exit(1); + } +} + +//********************************************************************************************************************** + +int ListOtuLabelsCommand::createList(vector& lookup){ + try { + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + lookup[0]->getLabel() + ".otu.labels"; + outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName); + ofstream out; + m->openOutputFile(outputFileName, out); + + for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; } + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ListOtuLabelsCommand", "createTable"); + exit(1); + } +} + +//********************************************************************************************************************** + diff --git a/listotulabelscommand.h b/listotulabelscommand.h new file mode 100644 index 0000000..ea41539 --- /dev/null +++ b/listotulabelscommand.h @@ -0,0 +1,54 @@ +#ifndef Mothur_listotucommand_h +#define Mothur_listotucommand_h + +// +// listotucommand.h +// Mothur +// +// Created by Sarah Westcott on 5/15/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + + +#include "command.hpp" +#include "sharedrabundvector.h" + +/**************************************************************************************************/ + +class ListOtuLabelsCommand : public Command { +public: + ListOtuLabelsCommand(string); + ListOtuLabelsCommand(); + ~ListOtuLabelsCommand(){} + + vector setParameters(); + string getCommandName() { return "list.otulabels"; } + string getCommandCategory() { return "OTU-Based Approaches"; } + //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/List.otulabels"; } + string getDescription() { return "lists otu labels from shared or relabund file. Can be used with output from classify.otu, otu.association, or corr.axes to select specific otus."; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + bool abort, allLines; + string outputDir, sharedfile, relabundfile, label, inputFileName, format; + vector outputNames; + vector Groups; + set labels; + + int createList(vector&); + int createList(vector&); + +}; + +/**************************************************************************************************/ + + + + + + +#endif diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp new file mode 100644 index 0000000..431fc42 --- /dev/null +++ b/makecontigscommand.cpp @@ -0,0 +1,389 @@ +// +// makecontigscommand.cpp +// Mothur +// +// Created by Sarah Westcott on 5/15/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "makecontigscommand.h" + +//********************************************************************************************************************** +vector MakeContigsCommand::setParameters(){ + try { + CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); + CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta); + CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign); + CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch); + CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch); + CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen); + CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string MakeContigsCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The make.contigs command reads a forward fastq file and a reverse fastq file and outputs new fasta and quality files.\n"; + helpString += "The make.contigs command parameters are ffastq, rfastq, align, match, mismatch, gapopen, gapextend and processors.\n"; + helpString += "The ffastq and rfastq parameter is required.\n"; + helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n"; + helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n"; + helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n"; + helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n"; + helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"; + helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; + helpString += "The make.contigs command should be in the following format: \n"; + helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n"; + helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "getHelpString"); + exit(1); + } +} + +//********************************************************************************************************************** +MakeContigsCommand::MakeContigsCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +MakeContigsCommand::MakeContigsCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter("pairwise.seqs"); + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["fasta"] = tempOutNames; + outputTypes["qfile"] = tempOutNames; + + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("ffastq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["ffastq"] = inputDir + it->second; } + } + + it = parameters.find("rfastq"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["rfastq"] = inputDir + it->second; } + } + } + + ffastqfile = validParameter.validFile(parameters, "ffastq", true); + if (ffastqfile == "not open") { ffastqfile = ""; abort = true; } + else if (ffastqfile == "not found") { ffastqfile = ""; abort=true; m->mothurOut("The ffastq parameter is required.\n"); } + + rfastqfile = validParameter.validFile(parameters, "rfastq", true); + if (rfastqfile == "not open") { rfastqfile = ""; abort = true; } + else if (rfastqfile == "not found") { rfastqfile = ""; abort=true; m->mothurOut("The rfastq parameter is required.\n"); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(ffastqfile); } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter.validFile(parameters, "match", false); if (temp == "not found"){ temp = "1.0"; } + m->mothurConvert(temp, match); + + temp = validParameter.validFile(parameters, "mismatch", false); if (temp == "not found"){ temp = "-1.0"; } + m->mothurConvert(temp, misMatch); + if (misMatch > 0) { m->mothurOut("[ERROR]: mismatch must be negative.\n"); abort=true; } + + temp = validParameter.validFile(parameters, "gapopen", false); if (temp == "not found"){ temp = "-2.0"; } + m->mothurConvert(temp, gapOpen); + if (gapOpen > 0) { m->mothurOut("[ERROR]: gapopen must be negative.\n"); abort=true; } + + temp = validParameter.validFile(parameters, "gapextend", false); if (temp == "not found"){ temp = "-1.0"; } + m->mothurConvert(temp, gapExtend); + if (gapExtend > 0) { m->mothurOut("[ERROR]: gapextend must be negative.\n"); abort=true; } + + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + m->mothurConvert(temp, processors); + + align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; } + if ((align != "needleman") && (align != "blast") && (align != "gotoh") && (align != "noalign")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman, blast, gotoh and noalign. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; } + } + + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand"); + exit(1); + } +} +//********************************************************************************************************************** +int MakeContigsCommand::execute(){ + try { + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + //read ffastq and rfastq files creating fasta and qual files. + //this function will create a forward and reverse, fasta and qual files for each processor. + //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual + vector< vector > files = readFastqFiles(); + + + + string currentFasta = ""; + itTypes = outputTypes.find("fasta"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); } + } + + string currentQual = ""; + itTypes = outputTypes.find("qfile"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { currentQual = (itTypes->second)[0]; m->setQualFile(currentQual); } + } + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > MakeContigsCommand::readFastqFiles(){ + try { + vector< vector > files; + + //maps processors number to file pointer + map > tempfiles; //tempfiles[0] = forwardFasta, [1] = forwardQual, [2] = reverseFasta, [3] = reverseQual + map >::iterator it; + + //create files to write to + for (int i = 0; i < processors; i++) { + vector temp; + ofstream* outFF = new ofstream; temp.push_back(outFF); + ofstream* outFQ = new ofstream; temp.push_back(outFQ); + ofstream* outRF = new ofstream; temp.push_back(outRF); + ofstream* outRQ = new ofstream; temp.push_back(outRQ); + tempfiles[i] = temp; + + vector names; + string ffastafilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "ffasta.temp"; + string rfastafilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rfasta.temp"; + string fqualfilename = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + toString(i) + "fqual.temp"; + string rqualfilename = outputDir + m->getRootName(m->getSimpleName(rfastqfile)) + toString(i) + "rqual.temp"; + names.push_back(ffastafilename); names.push_back(fqualfilename); + names.push_back(rfastafilename); names.push_back(rqualfilename); + files.push_back(names); + + m->openOutputFile(ffastafilename, *outFF); + m->openOutputFile(rfastafilename, *outRF); + m->openOutputFile(fqualfilename, *outFQ); + m->openOutputFile(rqualfilename, *outRQ); + } + + if (m->control_pressed) { + //close files, delete ofstreams + for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } + //remove files + for (int i = 0; i < files.size(); i++) { + for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } + } + } + + ifstream inForward; + m->openInputFile(ffastqfile, inForward); + + ifstream inReverse; + m->openInputFile(rfastqfile, inReverse); + + int count = 0; + while ((!inForward.eof()) && (!inReverse.eof())) { + + if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; } + + //get a read from forward and reverse fastq files + fastqRead fread = readFastq(inForward); + fastqRead rread = readFastq(inReverse); + checkReads(fread, rread); + + if (m->control_pressed) { for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } for (int i = 0; i < files.size(); i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } } inForward.close(); inReverse.close(); return files; } + + //if the reads are okay write to output files + int process = count % processors; + + *(tempfiles[process][0]) << ">" << fread.name << endl << fread.sequence << endl; + *(tempfiles[process][1]) << ">" << fread.name << endl; + for (int i = 0; i < fread.scores.size(); i++) { *(tempfiles[process][1]) << fread.scores[i] << " "; } + *(tempfiles[process][1]) << endl; + *(tempfiles[process][2]) << ">" << rread.name << endl << rread.sequence << endl; + *(tempfiles[process][3]) << ">" << rread.name << endl; + for (int i = 0; i < rread.scores.size(); i++) { *(tempfiles[process][3]) << rread.scores[i] << " "; } + *(tempfiles[process][3]) << endl; + + count++; + } + + //close files, delete ofstreams + for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close(); delete (it->second)[i]; } } + inForward.close(); + inReverse.close(); + + //adjust for really large processors or really small files + if (count < processors) { + for (int i = count; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); } files[i].clear(); } + files.resize(count); + processors = count; + } + + return files; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "readFastqFiles"); + exit(1); + } +} + +//********************************************************************************************************************** +fastqRead MakeContigsCommand::readFastq(ifstream& in){ + try { + fastqRead read; + + //read sequence name + string name = m->getline(in); m->gobble(in); + if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; return read; } + else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; } + else { name = name.substr(1); } + + //read sequence + string sequence = m->getline(in); m->gobble(in); + if (sequence == "") { m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; return read; } + + //read sequence name + string name2 = m->getline(in); m->gobble(in); + if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; return read; } + else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; return read; } + else { name2 = name2.substr(1); } + + //read quality scores + string quality = m->getline(in); m->gobble(in); + if (quality == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; return read; } + + //sanity check sequence length and number of quality scores match + if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; return read; } } + if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; return read; } + + vector qualScores; + int controlChar = int('@'); + for (int i = 0; i < quality.length(); i++) { + int temp = int(quality[i]); + temp -= controlChar; + + qualScores.push_back(temp); + } + + read.name = name; + read.sequence = sequence; + read.scores = qualScores; + + return read; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "readFastq"); + exit(1); + } +} +//********************************************************************************************************************** +bool MakeContigsCommand::checkReads(fastqRead& forward, fastqRead& reverse){ + try { + bool good = true; + + //fix names + if ((forward.name.length() > 2) && (reverse.name.length() > 2)) { + forward.name = forward.name.substr(0, forward.name.length()-2); + reverse.name = reverse.name.substr(0, reverse.name.length()-2); + }else { good = false; m->control_pressed = true; } + + //do names match + if (forward.name != reverse.name) { + m->mothurOut("[ERROR]: read " + forward.name + " from " + ffastqfile + ", but read " + reverse.name + " from " + rfastqfile + ".\n"); + good = false; m->control_pressed = true; + } + + //do sequence lengths match + if (forward.sequence.length() != reverse.sequence.length()) { + m->mothurOut("[ERROR]: For sequence " + forward.name + " I read a sequence of length " + toString(forward.sequence.length()) + " from " + ffastqfile + ", but read a sequence of length " + toString(reverse.sequence.length()) + " from " + rfastqfile + ".\n"); + good = false; m->control_pressed = true; + } + + //do number of qual scores match + if (forward.scores.size() != reverse.scores.size()) { + m->mothurOut("[ERROR]: For sequence " + forward.name + " I read " + toString(forward.scores.size()) + " quality scores from " + ffastqfile + ", but read " + toString(reverse.scores.size()) + " quality scores from " + rfastqfile + ".\n"); + good = false; m->control_pressed = true; + } + + return good; + } + catch(exception& e) { + m->errorOut(e, "MakeContigsCommand", "readFastq"); + exit(1); + } +} +//********************************************************************************************************************** + + + + diff --git a/makecontigscommand.h b/makecontigscommand.h new file mode 100644 index 0000000..5d123d5 --- /dev/null +++ b/makecontigscommand.h @@ -0,0 +1,63 @@ +#ifndef Mothur_makecontigscommand_h +#define Mothur_makecontigscommand_h + +// +// makecontigscommand.h +// Mothur +// +// Created by Sarah Westcott on 5/15/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#include "command.hpp" + +struct fastqRead { + vector scores; + string name; + string sequence; + + fastqRead() { name = ""; sequence = ""; scores.clear(); }; + fastqRead(string n, string s, vector sc) : name(n), sequence(s), scores(sc) {}; + ~fastqRead() {}; +}; + +/**************************************************************************************************/ + +class MakeContigsCommand : public Command { +public: + MakeContigsCommand(string); + MakeContigsCommand(); + ~MakeContigsCommand(){} + + vector setParameters(); + string getCommandName() { return "make.contigs"; } + string getCommandCategory() { return "Sequence Processing"; } + //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Make.contigs"; } + string getDescription() { return "description"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + bool abort; + string outputDir, ffastqfile, rfastqfile, align; + float match, misMatch, gapOpen, gapExtend; + int processors, longestBase; + vector outputNames; + + fastqRead readFastq(ifstream&); + vector< vector > readFastqFiles(); + bool checkReads(fastqRead&, fastqRead&); +}; + +/**************************************************************************************************/ + + + + + + + +#endif diff --git a/makefastqcommand.cpp b/makefastqcommand.cpp index 8e1a7f5..0f6e030 100644 --- a/makefastqcommand.cpp +++ b/makefastqcommand.cpp @@ -201,7 +201,7 @@ string MakeFastQCommand::convertQual(vector qual) { try { string qualScores; - int controlChar = int('!'); + int controlChar = int('@'); for (int i = 0; i < qual.size(); i++) { int temp = qual[i] + controlChar; diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp index e6d7ce6..06ea359 100644 --- a/parsefastaqcommand.cpp +++ b/parsefastaqcommand.cpp @@ -216,7 +216,7 @@ vector ParseFastaQCommand::convertQual(string qual) { try { vector qualScores; - int controlChar = int('!'); + int controlChar = int('@'); for (int i = 0; i < qual.length(); i++) { int temp = int(qual[i]);