From: westcott Date: Wed, 21 Jan 2009 16:12:57 +0000 (+0000) Subject: deconvolute command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=67b6343929b6dbed97c4b26c3bb849725d573f6d deconvolute command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 8dcff47..601ab36 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ 372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; }; 372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12C00F2648250095CF7E /* readlistcommand.cpp */; }; 372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; }; + 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; }; 37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; }; 37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; }; 37D928570F21331F001D4494 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BB0F21331F001D4494 /* bootstrap.cpp */; }; @@ -103,6 +104,8 @@ 372E12BF0F2648250095CF7E /* readlistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readlistcommand.h; sourceTree = ""; }; 372E12C00F2648250095CF7E /* readlistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readlistcommand.cpp; sourceTree = ""; }; 372E12EC0F264D320095CF7E /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commandfactory.cpp; sourceTree = ""; }; + 37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = ""; }; + 37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = ""; }; 37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; 37D927B90F21331F001D4494 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; 37D927BA0F21331F001D4494 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = averagelinkage.cpp; sourceTree = ""; }; @@ -277,55 +280,55 @@ children = ( 37D927BA0F21331F001D4494 /* averagelinkage.cpp */, 37D928A60F2133C0001D4494 /* calculators */, - 37D927C10F21331F001D4494 /* cluster.cpp */, 37D927C20F21331F001D4494 /* cluster.hpp */, + 37D927C10F21331F001D4494 /* cluster.cpp */, 37D928A90F2133E5001D4494 /* commands */, - 37D927C50F21331F001D4494 /* collect.cpp */, 37D927C60F21331F001D4494 /* collect.h */, + 37D927C50F21331F001D4494 /* collect.cpp */, 37D927C90F21331F001D4494 /* collectdisplay.h */, 37D928AC0F213420001D4494 /* containers */, 37D927CA0F21331F001D4494 /* collectorscurvedata.h */, 37D927CF0F21331F001D4494 /* commandfactory.hpp */, 372E12EC0F264D320095CF7E /* commandfactory.cpp */, - 37D927D00F21331F001D4494 /* commandoptionparser.cpp */, 37D927D10F21331F001D4494 /* commandoptionparser.hpp */, + 37D927D00F21331F001D4494 /* commandoptionparser.cpp */, 37D927D20F21331F001D4494 /* completelinkage.cpp */, - 37D927D30F21331F001D4494 /* database.cpp */, 37D927D40F21331F001D4494 /* database.hpp */, + 37D927D30F21331F001D4494 /* database.cpp */, 37D927D60F21331F001D4494 /* display.h */, - 37D927D70F21331F001D4494 /* engine.cpp */, 37D927D80F21331F001D4494 /* engine.hpp */, + 37D927D70F21331F001D4494 /* engine.cpp */, 37D928B10F213472001D4494 /* errorcheckor */, - 37D927DD0F21331F001D4494 /* fileoutput.cpp */, 37D927DE0F21331F001D4494 /* fileoutput.h */, - 37D927DF0F21331F001D4494 /* globaldata.cpp */, + 37D927DD0F21331F001D4494 /* fileoutput.cpp */, 37D927E00F21331F001D4494 /* globaldata.hpp */, - 37D927E50F21331F001D4494 /* inputdata.cpp */, + 37D927DF0F21331F001D4494 /* globaldata.cpp */, 37D927E60F21331F001D4494 /* inputdata.h */, - 37D927E90F21331F001D4494 /* kmer.cpp */, + 37D927E50F21331F001D4494 /* inputdata.cpp */, 37D927EA0F21331F001D4494 /* kmer.hpp */, - 37D927EB0F21331F001D4494 /* kmerdb.cpp */, + 37D927E90F21331F001D4494 /* kmer.cpp */, 37D927EC0F21331F001D4494 /* kmerdb.hpp */, + 37D927EB0F21331F001D4494 /* kmerdb.cpp */, 37D927EF0F21331F001D4494 /* mothur.cpp */, - 37D927F00F21331F001D4494 /* nameassignment.cpp */, 37D927F10F21331F001D4494 /* nameassignment.hpp */, + 37D927F00F21331F001D4494 /* nameassignment.cpp */, 37D927F60F21331F001D4494 /* observable.h */, - 37D927FB0F21331F001D4494 /* progress.cpp */, 37D927FC0F21331F001D4494 /* progress.hpp */, - 37D928030F21331F001D4494 /* raredisplay.cpp */, + 37D927FB0F21331F001D4494 /* progress.cpp */, 37D928040F21331F001D4494 /* raredisplay.h */, - 37D928050F21331F001D4494 /* rarefact.cpp */, + 37D928030F21331F001D4494 /* raredisplay.cpp */, 37D928060F21331F001D4494 /* rarefact.h */, + 37D928050F21331F001D4494 /* rarefact.cpp */, 37D928090F21331F001D4494 /* rarefactioncurvedata.h */, - 37D928120F21331F001D4494 /* readmatrix.cpp */, 37D928130F21331F001D4494 /* readmatrix.hpp */, - 37D9281C0F21331F001D4494 /* sequence.cpp */, + 37D928120F21331F001D4494 /* readmatrix.cpp */, 37D9281D0F21331F001D4494 /* sequence.hpp */, - 37D928200F21331F001D4494 /* shared.cpp */, + 37D9281C0F21331F001D4494 /* sequence.cpp */, 37D928210F21331F001D4494 /* shared.h */, + 37D928200F21331F001D4494 /* shared.cpp */, 37D928420F21331F001D4494 /* singlelinkage.cpp */, - 37D928440F21331F001D4494 /* sparsematrix.cpp */, 37D928450F21331F001D4494 /* sparsematrix.hpp */, + 37D928440F21331F001D4494 /* sparsematrix.cpp */, 37D928480F21331F001D4494 /* summarydata.h */, 37D928490F21331F001D4494 /* summarydisplay.h */, 37D9284C0F21331F001D4494 /* utilities.hpp */, @@ -352,41 +355,41 @@ 37D927BB0F21331F001D4494 /* bootstrap.cpp */, 37D927C00F21331F001D4494 /* chao1.h */, 37D927BF0F21331F001D4494 /* chao1.cpp */, - 37D927E70F21331F001D4494 /* jackknife.cpp */, 37D927E80F21331F001D4494 /* jackknife.h */, - 37D927F40F21331F001D4494 /* npshannon.cpp */, + 37D927E70F21331F001D4494 /* jackknife.cpp */, 37D927F50F21331F001D4494 /* npshannon.h */, - 37D928010F21331F001D4494 /* rarecalc.cpp */, + 37D927F40F21331F001D4494 /* npshannon.cpp */, 37D928020F21331F001D4494 /* rarecalc.h */, - 37D9281E0F21331F001D4494 /* shannon.cpp */, + 37D928010F21331F001D4494 /* rarecalc.cpp */, 37D9281F0F21331F001D4494 /* shannon.h */, - 37D928220F21331F001D4494 /* sharedace.cpp */, + 37D9281E0F21331F001D4494 /* shannon.cpp */, 37D928230F21331F001D4494 /* sharedace.h */, - 37D928240F21331F001D4494 /* sharedchao1.cpp */, + 37D928220F21331F001D4494 /* sharedace.cpp */, 37D928250F21331F001D4494 /* sharedchao1.h */, - 37D928280F21331F001D4494 /* sharedjabund.cpp */, + 37D928240F21331F001D4494 /* sharedchao1.cpp */, 37D928290F21331F001D4494 /* sharedjabund.h */, - 37D9282A0F21331F001D4494 /* sharedjclass.cpp */, + 37D928280F21331F001D4494 /* sharedjabund.cpp */, 37D9282B0F21331F001D4494 /* sharedjclass.h */, - 37D9282C0F21331F001D4494 /* sharedjest.cpp */, + 37D9282A0F21331F001D4494 /* sharedjclass.cpp */, 37D9282D0F21331F001D4494 /* sharedjest.h */, - 37D928340F21331F001D4494 /* sharedsobs.cpp */, + 37D9282C0F21331F001D4494 /* sharedjest.cpp */, 37D928350F21331F001D4494 /* sharedsobs.h */, - 37D928360F21331F001D4494 /* sharedsorabund.cpp */, + 37D928340F21331F001D4494 /* sharedsobs.cpp */, 37D928370F21331F001D4494 /* sharedsorabund.h */, - 37D928380F21331F001D4494 /* sharedsorclass.cpp */, + 37D928360F21331F001D4494 /* sharedsorabund.cpp */, 37D928390F21331F001D4494 /* sharedsorclass.h */, - 37D9283A0F21331F001D4494 /* sharedsorest.cpp */, + 37D928380F21331F001D4494 /* sharedsorclass.cpp */, 37D9283B0F21331F001D4494 /* sharedsorest.h */, + 37D9283A0F21331F001D4494 /* sharedsorest.cpp */, 37D9283C0F21331F001D4494 /* sharedthetan.cpp */, 37D9283D0F21331F001D4494 /* sharedthetan.h */, - 37D9283E0F21331F001D4494 /* sharedthetayc.cpp */, 37D9283F0F21331F001D4494 /* sharedthetayc.h */, - 37D928400F21331F001D4494 /* simpson.cpp */, + 37D9283E0F21331F001D4494 /* sharedthetayc.cpp */, 37D928410F21331F001D4494 /* simpson.h */, + 37D928400F21331F001D4494 /* simpson.cpp */, 37D928430F21331F001D4494 /* sobs.h */, - 37D9284D0F21331F001D4494 /* uvest.cpp */, 37D9284E0F21331F001D4494 /* uvest.h */, + 37D9284D0F21331F001D4494 /* uvest.cpp */, ); name = calculators; sourceTree = ""; @@ -399,32 +402,34 @@ 37D927C30F21331F001D4494 /* clustercommand.cpp */, 37D927C80F21331F001D4494 /* collectcommand.h */, 37D927C70F21331F001D4494 /* collectcommand.cpp */, - 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */, 37D927CC0F21331F001D4494 /* collectsharedcommand.h */, - 37D927E30F21331F001D4494 /* helpcommand.cpp */, + 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */, + 37B28F660F27590100808A62 /* deconvolutecommand.h */, + 37B28F670F27590100808A62 /* deconvolutecommand.cpp */, 37D927E40F21331F001D4494 /* helpcommand.h */, + 37D927E30F21331F001D4494 /* helpcommand.cpp */, 37D927F20F21331F001D4494 /* nocommand.cpp */, 37D927F30F21331F001D4494 /* nocommand.h */, - 37D927F90F21331F001D4494 /* parselistcommand.cpp */, 37D927FA0F21331F001D4494 /* parselistcommand.h */, - 37D927FD0F21331F001D4494 /* quitcommand.cpp */, + 37D927F90F21331F001D4494 /* parselistcommand.cpp */, 37D927FE0F21331F001D4494 /* quitcommand.h */, - 37D928070F21331F001D4494 /* rarefactcommand.cpp */, + 37D927FD0F21331F001D4494 /* quitcommand.cpp */, 37D928080F21331F001D4494 /* rarefactcommand.h */, - 37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */, + 37D928070F21331F001D4494 /* rarefactcommand.cpp */, 37D9280B0F21331F001D4494 /* rarefactsharedcommand.h */, + 37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */, 372E12940F263D5A0095CF7E /* readdistcommand.h */, 372E12950F263D5A0095CF7E /* readdistcommand.cpp */, 372E126E0F26365B0095CF7E /* readotucommand.h */, 372E126F0F26365B0095CF7E /* readotucommand.cpp */, 372E12BF0F2648250095CF7E /* readlistcommand.h */, 372E12C00F2648250095CF7E /* readlistcommand.cpp */, - 37D928260F21331F001D4494 /* sharedcommand.cpp */, 37D928270F21331F001D4494 /* sharedcommand.h */, - 37D928460F21331F001D4494 /* summarycommand.cpp */, + 37D928260F21331F001D4494 /* sharedcommand.cpp */, 37D928470F21331F001D4494 /* summarycommand.h */, - 37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */, + 37D928460F21331F001D4494 /* summarycommand.cpp */, 37D9284B0F21331F001D4494 /* summarysharedcommand.h */, + 37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */, ); name = commands; sourceTree = ""; @@ -433,24 +438,24 @@ isa = PBXGroup; children = ( 37D927D50F21331F001D4494 /* datavector.hpp */, - 37D927DB0F21331F001D4494 /* fastamap.cpp */, 37D927DC0F21331F001D4494 /* fastamap.h */, - 37D927E10F21331F001D4494 /* groupmap.cpp */, + 37D927DB0F21331F001D4494 /* fastamap.cpp */, 37D927E20F21331F001D4494 /* groupmap.h */, - 37D927ED0F21331F001D4494 /* listvector.cpp */, + 37D927E10F21331F001D4494 /* groupmap.cpp */, 37D927EE0F21331F001D4494 /* listvector.hpp */, - 37D927F70F21331F001D4494 /* ordervector.cpp */, + 37D927ED0F21331F001D4494 /* listvector.cpp */, 37D927F80F21331F001D4494 /* ordervector.hpp */, - 37D927FF0F21331F001D4494 /* rabundvector.cpp */, + 37D927F70F21331F001D4494 /* ordervector.cpp */, 37D928000F21331F001D4494 /* rabundvector.hpp */, - 37D9281A0F21331F001D4494 /* sabundvector.cpp */, + 37D927FF0F21331F001D4494 /* rabundvector.cpp */, 37D9281B0F21331F001D4494 /* sabundvector.hpp */, - 37D9282E0F21331F001D4494 /* sharedordervector.cpp */, + 37D9281A0F21331F001D4494 /* sabundvector.cpp */, 37D9282F0F21331F001D4494 /* sharedordervector.h */, - 37D928300F21331F001D4494 /* sharedrabundvector.cpp */, + 37D9282E0F21331F001D4494 /* sharedordervector.cpp */, 37D928310F21331F001D4494 /* sharedrabundvector.h */, - 37D928320F21331F001D4494 /* sharedsabundvector.cpp */, + 37D928300F21331F001D4494 /* sharedrabundvector.cpp */, 37D928330F21331F001D4494 /* sharedsabundvector.h */, + 37D928320F21331F001D4494 /* sharedsabundvector.cpp */, ); name = containers; sourceTree = ""; @@ -458,14 +463,14 @@ 37D928B10F213472001D4494 /* errorcheckor */ = { isa = PBXGroup; children = ( - 37D927D90F21331F001D4494 /* errorchecking.cpp */, 37D927DA0F21331F001D4494 /* errorchecking.h */, - 37D9284F0F21331F001D4494 /* validcalculator.cpp */, + 37D927D90F21331F001D4494 /* errorchecking.cpp */, 37D928500F21331F001D4494 /* validcalculator.h */, - 37D928510F21331F001D4494 /* validcommands.cpp */, + 37D9284F0F21331F001D4494 /* validcalculator.cpp */, 37D928520F21331F001D4494 /* validcommands.h */, - 37D928530F21331F001D4494 /* validparameter.cpp */, + 37D928510F21331F001D4494 /* validcommands.cpp */, 37D928540F21331F001D4494 /* validparameter.h */, + 37D928530F21331F001D4494 /* validparameter.cpp */, ); name = errorcheckor; sourceTree = ""; @@ -593,6 +598,7 @@ 372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */, 372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */, 372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */, + 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index edb3fc3..49d197c 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -23,6 +23,7 @@ #include "quitcommand.h" #include "helpcommand.h" #include "commandfactory.hpp" +#include "deconvolutecommand.h" #include @@ -48,10 +49,11 @@ Command* CommandFactory::getCommand(string commandName){ try { delete command; //delete the old command - if(commandName == "read.dist") { command = new ReadDistCommand(); } - else if(commandName == "read.otu") { command = new ReadOtuCommand(); } + if(commandName == "read.dist") { command = new ReadDistCommand(); } + else if(commandName == "read.otu") { command = new ReadOtuCommand(); } else if(commandName == "read.list") { command = new ReadListFileCommand(); } else if(commandName == "cluster") { command = new ClusterCommand(); } + else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); } else if(commandName == "help") { command = new HelpCommand(); } else if(commandName == "quit") { command = new QuitCommand(); } else if(commandName == "collect.single") { command = new CollectCommand(); } diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp new file mode 100644 index 0000000..f2dfdd5 --- /dev/null +++ b/deconvolutecommand.cpp @@ -0,0 +1,45 @@ +/* + * deconvolute.cpp + * Mothur + * + * Created by Sarah Westcott on 1/21/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "deconvolutecommand.h" + +/**************************************************************************************/ +int DeconvoluteCommand::execute() { + try { + globaldata = GlobalData::getInstance(); + + //prepare filenames and open files + filename = globaldata->getFastaFile(); + outputFileName = (getRootName(filename) + "names"); + openInputFile(filename, in); + openOutputFile(outputFileName, out); + + //constructor reads in file and store internally + fastamap = new FastaMap(); + + //two columns separated by tabs sequence name and then sequence + fastamap->readFastaFile(in); + + //print out new names file + //file contains 2 columns separated by tabs. the first column is the groupname(name of first sequence found. + //the second column is the list of names of identical sequences separated by ','. + fastamap->print(out); + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************************/ diff --git a/deconvolutecommand.h b/deconvolutecommand.h new file mode 100644 index 0000000..6b05704 --- /dev/null +++ b/deconvolutecommand.h @@ -0,0 +1,40 @@ +#ifndef DECONVOLUTECOMMAND_H +#define DECONVOLUTECOMMAND_H +/* + * deconvolute.h + * Mothur + * + * Created by Sarah Westcott on 1/21/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include +#include +#include "command.hpp" +#include "utilities.hpp" +#include "fastamap.h" +#include "globaldata.hpp" + +/* The deconvolute command reads a fasta file, finds the duplicate sequences and outputs a names file + containing 2 columns. The first being the groupname and the second the list of identical sequence names. */ + +using namespace std; + +class DeconvoluteCommand : public Command { + +public: + DeconvoluteCommand() {}; + ~DeconvoluteCommand() { delete fastamap; }; + int execute(); + +private: + GlobalData* globaldata; + FastaMap* fastamap; + ifstream in; + ofstream out; + string filename, outputFileName; + +}; + +#endif \ No newline at end of file diff --git a/errorchecking.cpp b/errorchecking.cpp index 903c581..8a56c15 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -26,6 +26,7 @@ ErrorCheck::ErrorCheck() { namefile = globaldata->getNameFile(); groupfile = globaldata->getGroupFile(); orderfile = globaldata->getOrderFile(); + fastafile = globaldata->getFastaFile(); cutoff = globaldata->getCutOff(); format = globaldata->getFormat(); method = globaldata->getMethod(); @@ -77,6 +78,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "sabundfile" ) { sabundfile = value; } if (parameter == "namefile" ) { namefile = value; } if (parameter == "orderfile" ) { orderfile = value; } + if (parameter == "fastafile" ) { fastafile = value; } if (parameter == "groupfile" ) { groupfile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } @@ -145,6 +147,7 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "namefile" ) { namefile = value; } if (parameter == "orderfile" ) { orderfile = value; } if (parameter == "groupfile" ) { groupfile = value; } + if (parameter == "fastafile" ) { fastafile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } if (parameter == "iters" ) { iters = value; } @@ -211,6 +214,8 @@ bool ErrorCheck::checkInput(string input) { validateReadPhil(); }else if (commandName == "read.list") { validateParseFiles(); //checks the listfile and groupfile parameters + }else if (commandName == "deconvolute") { + validateReadFiles(); } //are you trying to cluster before you have read something @@ -285,6 +290,12 @@ void ErrorCheck::validateReadFiles() { //unable to open if (ableToOpen == 1) { errorFree = false; } else { globaldata->inputFileName = sabundfile; } + }else if (fastafile != "") { + ableToOpen = openInputFile(fastafile, filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } + else { globaldata->inputFileName = fastafile; } }else{ //no file given errorFree = false; } diff --git a/errorchecking.h b/errorchecking.h index 58161d4..f7ddfa7 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -36,7 +36,7 @@ class ErrorCheck { void validateReadPhil(); void validateParseFiles(); void clear(); - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, cutoff, format; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, cutoff, format; string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary; string commandName, optionText; bool errorFree; diff --git a/fastamap.cpp b/fastamap.cpp index 4f4bf33..0e6c22e 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -10,65 +10,78 @@ #include "fastamap.h" /*******************************************************************************/ - FastaMap::FastaMap(ifstream& in) { - //int numberOfSequences = 0; +void FastaMap::readFastaFile(ifstream& in) { + try { + string name, sequence, line; + sequence = ""; - string name, sequence, line; - sequence = ""; + getline(in, line); + name = line.substr(1, line.length()); //rips off '>' - getline(in, line); - name = line.substr(1, line.length()); //rips off '>' - - //read through file - while (getline(in, line)) { - if (isalnum(line.at(0))){ //if it's a sequence line - sequence += line; - } - else{ - //input sequence info into map - it = data.find(sequence); - if (it == data.end()) { //it's unique. - data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. - data[sequence].groupnumber = 1; - data[sequence].names = name; - }else { // its a duplicate. - data[sequence].names += "," + name; - data[sequence].groupnumber++; + //read through file + while (getline(in, line)) { + if (isalnum(line.at(0))){ //if it's a sequence line + sequence += line; + } + else{ + //input sequence info into map + it = data.find(sequence); + if (it == data.end()) { //it's unique. + data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. + data[sequence].groupnumber = 1; + data[sequence].names = name; + }else { // its a duplicate. + data[sequence].names += "," + name; + data[sequence].groupnumber++; + } + name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>' + sequence = ""; } - name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>' } - } - //store last sequence and name info. - it = data.find(sequence); - if (it == data.end()) { //it's unique. - data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. - data[sequence].groupnumber = 1; - data[sequence].names = name; - }else { // its a duplicate. - data[sequence].names += "," + name; - data[sequence].groupnumber++; - } + //store last sequence and name info. + it = data.find(sequence); + if (it == data.end()) { //it's unique. + data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. + data[sequence].groupnumber = 1; + data[sequence].names = name; + }else { // its a duplicate. + data[sequence].names += "," + name; + data[sequence].groupnumber++; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /*******************************************************************************/ string FastaMap::getGroupName(string seq) { //pass a sequence name get its group return data[seq].groupname; } /*******************************************************************************/ -int FastaMap::getGroupNumber(string seq) { //pass a sequence name get number of sequence in its group - return data[seq].groupnumber; -} -/*******************************************************************************/ string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s. return data[seq].names; } /*******************************************************************************/ +int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences. + return data[seq].groupnumber; +} +/*******************************************************************************/ void FastaMap::push_back(string seq, string Name) {//sequencename, name data[seq].groupname = Name; - data[seq].groupnumber = 1; data[seq].names = Name; } /*******************************************************************************/ +void FastaMap::set(string seq, string groupName, string Names) { + data[seq].groupname = groupName; + data[seq].names = Names; +} +/*******************************************************************************/ void FastaMap::clear() { //clears out data data.clear(); } @@ -77,7 +90,20 @@ int FastaMap::size(){ //returns datas size which is the number of unique sequenc return data.size(); } /*******************************************************************************/ -void FastaMap::print(ostream&){ //prints data - +void FastaMap::print(ostream& out){ //prints data + try { + // two column file created with groupname and them list of identical sequence names + for (it = data.begin(); it != data.end(); it++) { + out << it->second.groupname << '\t' << it->second.names << endl; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /*******************************************************************************/ diff --git a/fastamap.h b/fastamap.h index bd262c4..fbda6ae 100644 --- a/fastamap.h +++ b/fastamap.h @@ -23,26 +23,27 @@ class FastaMap { public: FastaMap() {}; - FastaMap(ifstream&); ~FastaMap() {}; string getGroupName(string); //pass a sequence name get its group int getGroupNumber(string); //pass a sequence name get number of sequence in its group string getNames(string); //pass a sequence get the string of names in the group separated by ','s. void push_back(string, string); //sequencename, groupname + void set(string, string, string); //sequencename, groupname, groupnumber, names. void clear(); int size(); //returns number of unique sequences void print(ostream&); + void readFastaFile(ifstream&); private: struct group { string groupname; //the group name for identical sequences, will be set to the first sequence found. - int groupnumber; //the number of sequences in that group. + int groupnumber; //the number of sequence names with the same sequence. string names; //the names of the sequence separated by ','. }; - map data; //sequence, group - map::iterator it; + map data; //sequence, groupinfo + map::iterator it; }; #endif \ No newline at end of file diff --git a/globaldata.cpp b/globaldata.cpp index aba9944..6d67d0d 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -79,8 +79,10 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ allLines = 1; commandName = commandString; //save command name to be used by other classes - //clears out data from previous read and sets format - setReadFormat(commandName); + //clears out data from previous read + if ((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.list")) { + clear(); + } //saves help request if (commandName =="help") { @@ -99,6 +101,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } if (key == "namefile" ) { namefile = value; } if (key == "orderfile" ) { orderfile = value; } if (key == "groupfile" ) { groupfile = value; } @@ -162,6 +165,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } if (key == "namefile" ) { namefile = value; } if (key == "orderfile" ) { orderfile = value; } if (key == "groupfile" ) { groupfile = value; } @@ -253,30 +257,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } /*******************************************************/ -/******************************************************/ -void GlobalData::setReadFormat(string command){ - try { - if (command == "read.dist") { - clear(); - }else if (command == "read.otu") { - clear(); - }else if (command == "read.shared") { - clear(); - format = "shared"; - }else if (command == "parselist") { format = "list"; } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the GlobalData class function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - -/*******************************************************/ - /******************************************************/ // These functions give you the option parameters of the commands string GlobalData::getPhylipFile() { return phylipfile; } @@ -287,6 +267,7 @@ string GlobalData::getSabundFile() { return sabundfile; } string GlobalData::getNameFile() { return namefile; } string GlobalData::getGroupFile() { return groupfile; } string GlobalData::getOrderFile() { return orderfile; } +string GlobalData::getFastaFile() { return fastafile; } string GlobalData::getCutOff() { return cutoff; } string GlobalData::getFormat() { return format; } string GlobalData::getPrecision() { return precision; } @@ -328,6 +309,7 @@ void GlobalData::clear() { namefile = ""; groupfile = ""; orderfile = ""; + fastafile = ""; cutoff = "10.00"; format = ""; precision = "100"; diff --git a/globaldata.hpp b/globaldata.hpp index a5477ff..a2b9173 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -39,6 +39,7 @@ public: string getNameFile(); string getGroupFile(); string getOrderFile(); + string getFastaFile(); string getCutOff(); string getFormat(); string getPrecision(); @@ -68,7 +69,7 @@ public: void splitAtDash(string&, set&); private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, line, label; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile,line, label; string cutoff, format, precision, method, fileroot, iters, jumble, freq, single, rarefaction, shared, summary, sharedsummary, sharedrarefaction; static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor @@ -77,9 +78,8 @@ private: ~GlobalData(); ListVector* gListVector; SparseMatrix* gSparseMatrix; - void setReadFormat(string); - }; +}; //********************************************************************************************************************** diff --git a/helpcommand.cpp b/helpcommand.cpp index b2acc37..565d742 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -50,6 +50,13 @@ int HelpCommand::execute(){ cout << "The cluster command should be in the following format: " << "\n"; cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n"; cout << "The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed." << "\n" << "\n"; + }else if (globaldata->helpRequest == "deconvolute") { + cout << "The deconvolute command reads a fastafile and creates a namesfile." << "\n"; + cout << "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. " << "\n"; + cout << "If the sequence is unique the second column will just contain its name. " << "\n"; + cout << "The deconvolute command parameter is fastafile and it is required." << "\n"; + cout << "The deconvolute command should be in the following format: " << "\n"; + cout << "deconvolute(fastafile=yourFastaFile) " << "\n"; }else if (globaldata->helpRequest == "collect.single") { cout << "The collect.single command can only be executed after a successful read.list read.rabund or rad.sabund command. WITH ONE EXECEPTION. " << "\n"; cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; @@ -114,7 +121,7 @@ int HelpCommand::execute(){ cout << "Valid commands are read.dist(), read.list(), read.otu(), cluster(), collect.single(), rarefaction.single(), summary.single(), collect.shared(), rarefaction.shared(), summary.shared(), quit(), help()." << "\n"; cout << "For more information about a specific command type 'help(commandName)' i.e. 'help(read.phylip)'" << endl; }else { - cout << "not a valid command" << endl; + cout << globaldata->helpRequest << " is not a valid command" << endl; } cout << endl << "For further assistance please refer to the Mothur manual, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; diff --git a/validcommands.cpp b/validcommands.cpp index 72a38f7..70a6502 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -18,6 +18,7 @@ ValidCommands::ValidCommands() { commands["read.otu"] = "read.otu"; commands["read.list"] = "read.list"; commands["cluster"] = "cluster"; + commands["deconvolute"] = "deconvolute"; commands["help"] = "help"; commands["quit"] = "quit"; commands["collect.single"] = "collect.single"; diff --git a/validparameter.cpp b/validparameter.cpp index dc4ec62..b58a8ff 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -22,6 +22,7 @@ ValidParameters::ValidParameters() { parameters["namefile"] = "namefile"; parameters["groupfile"] = "groupfile"; parameters["orderfile"] = "orderfile"; + parameters["fastafile"] = "fastafile"; parameters["fileroot"] = "fileroot"; parameters["cutoff"] = "cutoff"; parameters["method"] = "method";