]> git.donarmstrong.com Git - mothur.git/commitdiff
deconvolute command
authorwestcott <westcott>
Wed, 21 Jan 2009 16:12:57 +0000 (16:12 +0000)
committerwestcott <westcott>
Wed, 21 Jan 2009 16:12:57 +0000 (16:12 +0000)
13 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
deconvolutecommand.cpp [new file with mode: 0644]
deconvolutecommand.h [new file with mode: 0644]
errorchecking.cpp
errorchecking.h
fastamap.cpp
fastamap.h
globaldata.cpp
globaldata.hpp
helpcommand.cpp
validcommands.cpp
validparameter.cpp

index 8dcff47032eb79dfd55eaff7658f1b52083d6abb..601ab369e55fe1d9509a4b44893ebc42320fadaa 100644 (file)
@@ -11,6 +11,7 @@
                372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
                372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12C00F2648250095CF7E /* readlistcommand.cpp */; };
                372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; };
+               37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
                37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
                37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; };
                37D928570F21331F001D4494 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BB0F21331F001D4494 /* bootstrap.cpp */; };
                372E12BF0F2648250095CF7E /* readlistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readlistcommand.h; sourceTree = "<group>"; };
                372E12C00F2648250095CF7E /* readlistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readlistcommand.cpp; sourceTree = "<group>"; };
                372E12EC0F264D320095CF7E /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commandfactory.cpp; sourceTree = "<group>"; };
+               37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = "<group>"; };
+               37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = "<group>"; };
                37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                37D927B90F21331F001D4494 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                37D927BA0F21331F001D4494 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = averagelinkage.cpp; sourceTree = "<group>"; };
                        children = (
                                37D927BA0F21331F001D4494 /* averagelinkage.cpp */,
                                37D928A60F2133C0001D4494 /* calculators */,
-                               37D927C10F21331F001D4494 /* cluster.cpp */,
                                37D927C20F21331F001D4494 /* cluster.hpp */,
+                               37D927C10F21331F001D4494 /* cluster.cpp */,
                                37D928A90F2133E5001D4494 /* commands */,
-                               37D927C50F21331F001D4494 /* collect.cpp */,
                                37D927C60F21331F001D4494 /* collect.h */,
+                               37D927C50F21331F001D4494 /* collect.cpp */,
                                37D927C90F21331F001D4494 /* collectdisplay.h */,
                                37D928AC0F213420001D4494 /* containers */,
                                37D927CA0F21331F001D4494 /* collectorscurvedata.h */,
                                37D927CF0F21331F001D4494 /* commandfactory.hpp */,
                                372E12EC0F264D320095CF7E /* commandfactory.cpp */,
-                               37D927D00F21331F001D4494 /* commandoptionparser.cpp */,
                                37D927D10F21331F001D4494 /* commandoptionparser.hpp */,
+                               37D927D00F21331F001D4494 /* commandoptionparser.cpp */,
                                37D927D20F21331F001D4494 /* completelinkage.cpp */,
-                               37D927D30F21331F001D4494 /* database.cpp */,
                                37D927D40F21331F001D4494 /* database.hpp */,
+                               37D927D30F21331F001D4494 /* database.cpp */,
                                37D927D60F21331F001D4494 /* display.h */,
-                               37D927D70F21331F001D4494 /* engine.cpp */,
                                37D927D80F21331F001D4494 /* engine.hpp */,
+                               37D927D70F21331F001D4494 /* engine.cpp */,
                                37D928B10F213472001D4494 /* errorcheckor */,
-                               37D927DD0F21331F001D4494 /* fileoutput.cpp */,
                                37D927DE0F21331F001D4494 /* fileoutput.h */,
-                               37D927DF0F21331F001D4494 /* globaldata.cpp */,
+                               37D927DD0F21331F001D4494 /* fileoutput.cpp */,
                                37D927E00F21331F001D4494 /* globaldata.hpp */,
-                               37D927E50F21331F001D4494 /* inputdata.cpp */,
+                               37D927DF0F21331F001D4494 /* globaldata.cpp */,
                                37D927E60F21331F001D4494 /* inputdata.h */,
-                               37D927E90F21331F001D4494 /* kmer.cpp */,
+                               37D927E50F21331F001D4494 /* inputdata.cpp */,
                                37D927EA0F21331F001D4494 /* kmer.hpp */,
-                               37D927EB0F21331F001D4494 /* kmerdb.cpp */,
+                               37D927E90F21331F001D4494 /* kmer.cpp */,
                                37D927EC0F21331F001D4494 /* kmerdb.hpp */,
+                               37D927EB0F21331F001D4494 /* kmerdb.cpp */,
                                37D927EF0F21331F001D4494 /* mothur.cpp */,
-                               37D927F00F21331F001D4494 /* nameassignment.cpp */,
                                37D927F10F21331F001D4494 /* nameassignment.hpp */,
+                               37D927F00F21331F001D4494 /* nameassignment.cpp */,
                                37D927F60F21331F001D4494 /* observable.h */,
-                               37D927FB0F21331F001D4494 /* progress.cpp */,
                                37D927FC0F21331F001D4494 /* progress.hpp */,
-                               37D928030F21331F001D4494 /* raredisplay.cpp */,
+                               37D927FB0F21331F001D4494 /* progress.cpp */,
                                37D928040F21331F001D4494 /* raredisplay.h */,
-                               37D928050F21331F001D4494 /* rarefact.cpp */,
+                               37D928030F21331F001D4494 /* raredisplay.cpp */,
                                37D928060F21331F001D4494 /* rarefact.h */,
+                               37D928050F21331F001D4494 /* rarefact.cpp */,
                                37D928090F21331F001D4494 /* rarefactioncurvedata.h */,
-                               37D928120F21331F001D4494 /* readmatrix.cpp */,
                                37D928130F21331F001D4494 /* readmatrix.hpp */,
-                               37D9281C0F21331F001D4494 /* sequence.cpp */,
+                               37D928120F21331F001D4494 /* readmatrix.cpp */,
                                37D9281D0F21331F001D4494 /* sequence.hpp */,
-                               37D928200F21331F001D4494 /* shared.cpp */,
+                               37D9281C0F21331F001D4494 /* sequence.cpp */,
                                37D928210F21331F001D4494 /* shared.h */,
+                               37D928200F21331F001D4494 /* shared.cpp */,
                                37D928420F21331F001D4494 /* singlelinkage.cpp */,
-                               37D928440F21331F001D4494 /* sparsematrix.cpp */,
                                37D928450F21331F001D4494 /* sparsematrix.hpp */,
+                               37D928440F21331F001D4494 /* sparsematrix.cpp */,
                                37D928480F21331F001D4494 /* summarydata.h */,
                                37D928490F21331F001D4494 /* summarydisplay.h */,
                                37D9284C0F21331F001D4494 /* utilities.hpp */,
                                37D927BB0F21331F001D4494 /* bootstrap.cpp */,
                                37D927C00F21331F001D4494 /* chao1.h */,
                                37D927BF0F21331F001D4494 /* chao1.cpp */,
-                               37D927E70F21331F001D4494 /* jackknife.cpp */,
                                37D927E80F21331F001D4494 /* jackknife.h */,
-                               37D927F40F21331F001D4494 /* npshannon.cpp */,
+                               37D927E70F21331F001D4494 /* jackknife.cpp */,
                                37D927F50F21331F001D4494 /* npshannon.h */,
-                               37D928010F21331F001D4494 /* rarecalc.cpp */,
+                               37D927F40F21331F001D4494 /* npshannon.cpp */,
                                37D928020F21331F001D4494 /* rarecalc.h */,
-                               37D9281E0F21331F001D4494 /* shannon.cpp */,
+                               37D928010F21331F001D4494 /* rarecalc.cpp */,
                                37D9281F0F21331F001D4494 /* shannon.h */,
-                               37D928220F21331F001D4494 /* sharedace.cpp */,
+                               37D9281E0F21331F001D4494 /* shannon.cpp */,
                                37D928230F21331F001D4494 /* sharedace.h */,
-                               37D928240F21331F001D4494 /* sharedchao1.cpp */,
+                               37D928220F21331F001D4494 /* sharedace.cpp */,
                                37D928250F21331F001D4494 /* sharedchao1.h */,
-                               37D928280F21331F001D4494 /* sharedjabund.cpp */,
+                               37D928240F21331F001D4494 /* sharedchao1.cpp */,
                                37D928290F21331F001D4494 /* sharedjabund.h */,
-                               37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
+                               37D928280F21331F001D4494 /* sharedjabund.cpp */,
                                37D9282B0F21331F001D4494 /* sharedjclass.h */,
-                               37D9282C0F21331F001D4494 /* sharedjest.cpp */,
+                               37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
                                37D9282D0F21331F001D4494 /* sharedjest.h */,
-                               37D928340F21331F001D4494 /* sharedsobs.cpp */,
+                               37D9282C0F21331F001D4494 /* sharedjest.cpp */,
                                37D928350F21331F001D4494 /* sharedsobs.h */,
-                               37D928360F21331F001D4494 /* sharedsorabund.cpp */,
+                               37D928340F21331F001D4494 /* sharedsobs.cpp */,
                                37D928370F21331F001D4494 /* sharedsorabund.h */,
-                               37D928380F21331F001D4494 /* sharedsorclass.cpp */,
+                               37D928360F21331F001D4494 /* sharedsorabund.cpp */,
                                37D928390F21331F001D4494 /* sharedsorclass.h */,
-                               37D9283A0F21331F001D4494 /* sharedsorest.cpp */,
+                               37D928380F21331F001D4494 /* sharedsorclass.cpp */,
                                37D9283B0F21331F001D4494 /* sharedsorest.h */,
+                               37D9283A0F21331F001D4494 /* sharedsorest.cpp */,
                                37D9283C0F21331F001D4494 /* sharedthetan.cpp */,
                                37D9283D0F21331F001D4494 /* sharedthetan.h */,
-                               37D9283E0F21331F001D4494 /* sharedthetayc.cpp */,
                                37D9283F0F21331F001D4494 /* sharedthetayc.h */,
-                               37D928400F21331F001D4494 /* simpson.cpp */,
+                               37D9283E0F21331F001D4494 /* sharedthetayc.cpp */,
                                37D928410F21331F001D4494 /* simpson.h */,
+                               37D928400F21331F001D4494 /* simpson.cpp */,
                                37D928430F21331F001D4494 /* sobs.h */,
-                               37D9284D0F21331F001D4494 /* uvest.cpp */,
                                37D9284E0F21331F001D4494 /* uvest.h */,
+                               37D9284D0F21331F001D4494 /* uvest.cpp */,
                        );
                        name = calculators;
                        sourceTree = "<group>";
                                37D927C30F21331F001D4494 /* clustercommand.cpp */,
                                37D927C80F21331F001D4494 /* collectcommand.h */,
                                37D927C70F21331F001D4494 /* collectcommand.cpp */,
-                               37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */,
                                37D927CC0F21331F001D4494 /* collectsharedcommand.h */,
-                               37D927E30F21331F001D4494 /* helpcommand.cpp */,
+                               37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */,
+                               37B28F660F27590100808A62 /* deconvolutecommand.h */,
+                               37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
                                37D927E40F21331F001D4494 /* helpcommand.h */,
+                               37D927E30F21331F001D4494 /* helpcommand.cpp */,
                                37D927F20F21331F001D4494 /* nocommand.cpp */,
                                37D927F30F21331F001D4494 /* nocommand.h */,
-                               37D927F90F21331F001D4494 /* parselistcommand.cpp */,
                                37D927FA0F21331F001D4494 /* parselistcommand.h */,
-                               37D927FD0F21331F001D4494 /* quitcommand.cpp */,
+                               37D927F90F21331F001D4494 /* parselistcommand.cpp */,
                                37D927FE0F21331F001D4494 /* quitcommand.h */,
-                               37D928070F21331F001D4494 /* rarefactcommand.cpp */,
+                               37D927FD0F21331F001D4494 /* quitcommand.cpp */,
                                37D928080F21331F001D4494 /* rarefactcommand.h */,
-                               37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */,
+                               37D928070F21331F001D4494 /* rarefactcommand.cpp */,
                                37D9280B0F21331F001D4494 /* rarefactsharedcommand.h */,
+                               37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */,
                                372E12940F263D5A0095CF7E /* readdistcommand.h */,
                                372E12950F263D5A0095CF7E /* readdistcommand.cpp */,
                                372E126E0F26365B0095CF7E /* readotucommand.h */,
                                372E126F0F26365B0095CF7E /* readotucommand.cpp */,
                                372E12BF0F2648250095CF7E /* readlistcommand.h */,
                                372E12C00F2648250095CF7E /* readlistcommand.cpp */,
-                               37D928260F21331F001D4494 /* sharedcommand.cpp */,
                                37D928270F21331F001D4494 /* sharedcommand.h */,
-                               37D928460F21331F001D4494 /* summarycommand.cpp */,
+                               37D928260F21331F001D4494 /* sharedcommand.cpp */,
                                37D928470F21331F001D4494 /* summarycommand.h */,
-                               37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */,
+                               37D928460F21331F001D4494 /* summarycommand.cpp */,
                                37D9284B0F21331F001D4494 /* summarysharedcommand.h */,
+                               37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */,
                        );
                        name = commands;
                        sourceTree = "<group>";
                        isa = PBXGroup;
                        children = (
                                37D927D50F21331F001D4494 /* datavector.hpp */,
-                               37D927DB0F21331F001D4494 /* fastamap.cpp */,
                                37D927DC0F21331F001D4494 /* fastamap.h */,
-                               37D927E10F21331F001D4494 /* groupmap.cpp */,
+                               37D927DB0F21331F001D4494 /* fastamap.cpp */,
                                37D927E20F21331F001D4494 /* groupmap.h */,
-                               37D927ED0F21331F001D4494 /* listvector.cpp */,
+                               37D927E10F21331F001D4494 /* groupmap.cpp */,
                                37D927EE0F21331F001D4494 /* listvector.hpp */,
-                               37D927F70F21331F001D4494 /* ordervector.cpp */,
+                               37D927ED0F21331F001D4494 /* listvector.cpp */,
                                37D927F80F21331F001D4494 /* ordervector.hpp */,
-                               37D927FF0F21331F001D4494 /* rabundvector.cpp */,
+                               37D927F70F21331F001D4494 /* ordervector.cpp */,
                                37D928000F21331F001D4494 /* rabundvector.hpp */,
-                               37D9281A0F21331F001D4494 /* sabundvector.cpp */,
+                               37D927FF0F21331F001D4494 /* rabundvector.cpp */,
                                37D9281B0F21331F001D4494 /* sabundvector.hpp */,
-                               37D9282E0F21331F001D4494 /* sharedordervector.cpp */,
+                               37D9281A0F21331F001D4494 /* sabundvector.cpp */,
                                37D9282F0F21331F001D4494 /* sharedordervector.h */,
-                               37D928300F21331F001D4494 /* sharedrabundvector.cpp */,
+                               37D9282E0F21331F001D4494 /* sharedordervector.cpp */,
                                37D928310F21331F001D4494 /* sharedrabundvector.h */,
-                               37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
+                               37D928300F21331F001D4494 /* sharedrabundvector.cpp */,
                                37D928330F21331F001D4494 /* sharedsabundvector.h */,
+                               37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
                        );
                        name = containers;
                        sourceTree = "<group>";
                37D928B10F213472001D4494 /* errorcheckor */ = {
                        isa = PBXGroup;
                        children = (
-                               37D927D90F21331F001D4494 /* errorchecking.cpp */,
                                37D927DA0F21331F001D4494 /* errorchecking.h */,
-                               37D9284F0F21331F001D4494 /* validcalculator.cpp */,
+                               37D927D90F21331F001D4494 /* errorchecking.cpp */,
                                37D928500F21331F001D4494 /* validcalculator.h */,
-                               37D928510F21331F001D4494 /* validcommands.cpp */,
+                               37D9284F0F21331F001D4494 /* validcalculator.cpp */,
                                37D928520F21331F001D4494 /* validcommands.h */,
-                               37D928530F21331F001D4494 /* validparameter.cpp */,
+                               37D928510F21331F001D4494 /* validcommands.cpp */,
                                37D928540F21331F001D4494 /* validparameter.h */,
+                               37D928530F21331F001D4494 /* validparameter.cpp */,
                        );
                        name = errorcheckor;
                        sourceTree = "<group>";
                                372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */,
                                372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */,
                                372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */,
+                               37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index edb3fc358b78394f24634c1398e76872324f6df3..49d197c40b9fe215f8067fa8d8a8f6ddb262f45c 100644 (file)
@@ -23,6 +23,7 @@
 #include "quitcommand.h"
 #include "helpcommand.h"
 #include "commandfactory.hpp"
+#include "deconvolutecommand.h"
 #include <exception>
 
 
@@ -48,10 +49,11 @@ Command* CommandFactory::getCommand(string commandName){
        try {
                delete command;   //delete the old command
 
-                        if(commandName == "read.dist")                         {       command = new ReadDistCommand();        }
-               else if(commandName == "read.otu")                              {       command = new ReadOtuCommand(); }
+                        if(commandName == "read.dist")                         {       command = new ReadDistCommand();                }
+               else if(commandName == "read.otu")                              {       command = new ReadOtuCommand();                 }
                else if(commandName == "read.list")                             {       command = new ReadListFileCommand();    }
                else if(commandName == "cluster")                               {       command = new ClusterCommand();                 }
+               else if(commandName == "deconvolute")                   {       command = new DeconvoluteCommand();             }
                else if(commandName == "help")                                  {       command = new HelpCommand();                    }
                else if(commandName == "quit")                                  {       command = new QuitCommand();                    }
                else if(commandName == "collect.single")                {       command = new CollectCommand();                 }
diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp
new file mode 100644 (file)
index 0000000..f2dfdd5
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ *  deconvolute.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 1/21/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "deconvolutecommand.h"
+
+/**************************************************************************************/
+int DeconvoluteCommand::execute() {    
+       try {
+               globaldata = GlobalData::getInstance();
+       
+               //prepare filenames and open files
+               filename = globaldata->getFastaFile();
+               outputFileName = (getRootName(filename) + "names");
+               openInputFile(filename, in);
+               openOutputFile(outputFileName, out);
+       
+               //constructor reads in file and store internally
+               fastamap = new FastaMap();
+       
+               //two columns separated by tabs sequence name and then sequence
+               fastamap->readFastaFile(in);
+               
+               //print out new names file 
+               //file contains 2 columns separated by tabs.  the first column is the groupname(name of first sequence found.
+               //the second column is the list of names of identical sequences separated by ','.
+               fastamap->print(out);
+       
+               return 0;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************************/
diff --git a/deconvolutecommand.h b/deconvolutecommand.h
new file mode 100644 (file)
index 0000000..6b05704
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef DECONVOLUTECOMMAND_H
+#define DECONVOLUTECOMMAND_H
+/*
+ *  deconvolute.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 1/21/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include <iostream>
+#include <fstream>
+#include "command.hpp"
+#include "utilities.hpp"
+#include "fastamap.h"
+#include "globaldata.hpp"
+
+/* The deconvolute command reads a fasta file, finds the duplicate sequences and outputs a names file
+       containing 2 columns.  The first being the groupname and the second the list of identical sequence names. */ 
+
+using namespace std;
+
+class DeconvoluteCommand : public Command {
+
+public:
+       DeconvoluteCommand() {};        
+       ~DeconvoluteCommand() { delete fastamap; };
+       int execute();  
+       
+private:
+       GlobalData* globaldata;
+       FastaMap* fastamap;
+       ifstream in;
+       ofstream out;
+       string filename, outputFileName;
+
+};
+
+#endif
\ No newline at end of file
index 903c58137c1de81036dd43adf6cc3619a861a3c6..8a56c15c9d977de80c230922405c97589251324a 100644 (file)
@@ -26,6 +26,7 @@ ErrorCheck::ErrorCheck() {
        namefile = globaldata->getNameFile();
        groupfile = globaldata->getGroupFile();
        orderfile = globaldata->getOrderFile();
+       fastafile = globaldata->getFastaFile();
        cutoff = globaldata->getCutOff();
        format = globaldata->getFormat();
        method = globaldata->getMethod();
@@ -77,6 +78,7 @@ bool ErrorCheck::checkInput(string input) {
                                if (parameter == "sabundfile" )         { sabundfile = value; }
                                if (parameter == "namefile" )           { namefile = value; }
                                if (parameter == "orderfile" )          { orderfile = value; }
+                               if (parameter == "fastafile" )          { fastafile = value; }
                                if (parameter == "groupfile" )          { groupfile = value; }
                                if (parameter == "cutoff" )                     { cutoff = value; }
                                if (parameter == "precision" )          { precision = value; }
@@ -145,6 +147,7 @@ bool ErrorCheck::checkInput(string input) {
                                if (parameter == "namefile" )           { namefile = value; }
                                if (parameter == "orderfile" )          { orderfile = value; }
                                if (parameter == "groupfile" )          { groupfile = value; }
+                               if (parameter == "fastafile" )          { fastafile = value; }
                                if (parameter == "cutoff" )                     { cutoff = value; }
                                if (parameter == "precision" )          { precision = value; }
                                if (parameter == "iters" )                      { iters = value; }
@@ -211,6 +214,8 @@ bool ErrorCheck::checkInput(string input) {
                        validateReadPhil();     
                }else if (commandName == "read.list") { 
                        validateParseFiles(); //checks the listfile and groupfile parameters
+               }else if (commandName == "deconvolute") {
+                       validateReadFiles();
                }
                
                //are you trying to cluster before you have read something                      
@@ -285,6 +290,12 @@ void ErrorCheck::validateReadFiles() {
                        //unable to open
                        if (ableToOpen == 1) {  errorFree = false; }
                        else { globaldata->inputFileName = sabundfile; }
+               }else if (fastafile != "") {
+                       ableToOpen = openInputFile(fastafile, filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
+                       else { globaldata->inputFileName = fastafile; }
                }else{ //no file given
                        errorFree = false;
                }
index 58161d46886a6b99b73f853d0a36465a6f4b056e..f7ddfa7f8cb4aa6ddf7d2d967e77bd7a8c23bc7e 100644 (file)
@@ -36,7 +36,7 @@ class ErrorCheck {
                void validateReadPhil();
                void validateParseFiles();
                void clear();
-               string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, cutoff, format; 
+               string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, cutoff, format; 
                string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary;
                string commandName, optionText;
                bool errorFree;
index 4f4bf33206d77e437dd32bd2ff14b7f5f1290042..0e6c22e21f48aec62b4a365db1e2454cd65ce7c9 100644 (file)
 #include "fastamap.h"
 
 /*******************************************************************************/
- FastaMap::FastaMap(ifstream& in) {
-       //int numberOfSequences = 0;
+void FastaMap::readFastaFile(ifstream& in) {
+       try {
+               string name, sequence, line;
+               sequence = "";
        
-       string name, sequence, line;
-       sequence = "";
+               getline(in, line);
+               name = line.substr(1, line.length());  //rips off '>'
        
-       getline(in, line);
-       name = line.substr(1, line.length());  //rips off '>'
-       
-       //read through file
-       while (getline(in, line)) {
-               if (isalnum(line.at(0))){  //if it's a sequence line
-                       sequence += line;
-               }
-               else{
-                       //input sequence info into map
-                       it = data.find(sequence);
-                       if (it == data.end()) {         //it's unique.
-                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-                               data[sequence].groupnumber = 1;
-                               data[sequence].names = name;
-                       }else { // its a duplicate.
-                               data[sequence].names += "," + name;
-                               data[sequence].groupnumber++;   
+               //read through file
+               while (getline(in, line)) {
+                       if (isalnum(line.at(0))){  //if it's a sequence line
+                               sequence += line;
+                       }
+                       else{
+                               //input sequence info into map
+                               it = data.find(sequence);
+                               if (it == data.end()) {         //it's unique.
+                                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                                       data[sequence].groupnumber = 1;
+                                       data[sequence].names = name;
+                               }else { // its a duplicate.
+                                       data[sequence].names += "," + name;
+                                       data[sequence].groupnumber++;
+                               }
+                               name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
+                               sequence = "";
                        }
-                       name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
                }
-       }
        
-       //store last sequence and name info.
-       it = data.find(sequence);
-       if (it == data.end()) {         //it's unique.
-               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-               data[sequence].groupnumber = 1;
-               data[sequence].names = name;
-       }else { // its a duplicate.
-               data[sequence].names += "," + name;
-               data[sequence].groupnumber++;   
-       }       
+               //store last sequence and name info.
+               it = data.find(sequence);
+               if (it == data.end()) {         //it's unique.
+                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                       data[sequence].groupnumber = 1;
+                       data[sequence].names = name;
+               }else { // its a duplicate.
+                       data[sequence].names += "," + name;
+                       data[sequence].groupnumber++;
+               }       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
 }
 /*******************************************************************************/
 string FastaMap::getGroupName(string seq) {  //pass a sequence name get its group
        return data[seq].groupname;
 }
 /*******************************************************************************/
-int FastaMap::getGroupNumber(string seq) {  //pass a sequence name get number of sequence in its group
-       return data[seq].groupnumber;
-}
-/*******************************************************************************/
 string FastaMap::getNames(string seq) {        //pass a sequence get the string of names in the group separated by ','s.
        return data[seq].names;
 }
 /*******************************************************************************/
+int FastaMap::getGroupNumber(string seq) {     //pass a sequence get the number of identical sequences.
+       return data[seq].groupnumber;
+}
+/*******************************************************************************/
 void FastaMap::push_back(string seq, string Name) {//sequencename, name
        data[seq].groupname = Name;
-       data[seq].groupnumber = 1;
        data[seq].names = Name;
 }
 /*******************************************************************************/
+void FastaMap::set(string seq, string groupName, string Names) {
+       data[seq].groupname = groupName;
+       data[seq].names = Names;
+}
+/*******************************************************************************/
 void FastaMap::clear() { //clears out data
        data.clear();
 }
@@ -77,7 +90,20 @@ int FastaMap::size(){ //returns datas size which is the number of unique sequenc
        return data.size();
 }
 /*******************************************************************************/
-void FastaMap::print(ostream&){ //prints data
-
+void FastaMap::print(ostream& out){ //prints data
+       try {
+               // two column file created with groupname and them list of identical sequence names
+               for (it = data.begin(); it != data.end(); it++) {
+                       out << it->second.groupname << '\t' << it->second.names << endl;
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
 }
 /*******************************************************************************/
index bd262c4d0660671899d7d576327e3065051a04aa..fbda6aee6cd6b383da0d577f3d1b98d8ec1614e7 100644 (file)
@@ -23,26 +23,27 @@ class FastaMap  {
 
 public:
        FastaMap() {};
-       FastaMap(ifstream&);
        ~FastaMap() {};
        
        string getGroupName(string);  //pass a sequence name get its group
        int getGroupNumber(string);  //pass a sequence name get number of sequence in its group
        string getNames(string);        //pass a sequence get the string of names in the group separated by ','s.
        void push_back(string, string); //sequencename, groupname
+       void set(string, string, string); //sequencename, groupname, groupnumber, names.
        void clear();
        int size();                                     //returns number of unique sequences
        void print(ostream&);
+       void readFastaFile(ifstream&);
 
 private:
        struct group {
                string groupname;                                       //the group name for identical sequences, will be set to the first sequence found.
-               int groupnumber;                                        //the number of sequences in that group.
+               int groupnumber;                                        //the number of sequence names with the same sequence.
                string names;                                           //the names of the sequence separated by ','.
        };
 
-       map<string, group>  data;  //sequence, group
-       map<string, group>::iterator it;        
+       map<string, group>  data;  //sequence, groupinfo
+       map<string, group>::iterator it;
 };
 
 #endif
\ No newline at end of file
index aba99444049cbad7c94576314e6c1347469e0d2f..6d67d0da0fd41c2978a073a5f377746331b0d9a6 100644 (file)
@@ -79,8 +79,10 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                allLines = 1;
                commandName = commandString; //save command name to be used by other classes
                
-               //clears out data from previous read and sets format
-               setReadFormat(commandName);
+               //clears out data from previous read
+               if ((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.list")) { 
+                       clear();
+               }
                
                //saves help request
                if (commandName =="help") {
@@ -99,6 +101,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                                if (key == "listfile" )         { listfile = value; inputFileName = value; fileroot = value; format = "list"; }
                                if (key == "rabundfile" )       { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
                                if (key == "sabundfile" )       { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } 
+                               if (key == "fastafile" )        { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } 
                                if (key == "namefile" )         { namefile = value; }
                                if (key == "orderfile" )        { orderfile = value; }
                                if (key == "groupfile" )        { groupfile = value; }
@@ -162,6 +165,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        if (key == "listfile" )         { listfile = value; inputFileName = value; fileroot = value; format = "list"; }
                        if (key == "rabundfile" )       { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
                        if (key == "sabundfile" )       { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; }
+                       if (key == "fastafile" )        { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } 
                        if (key == "namefile" )         { namefile = value; }
                        if (key == "orderfile" )        { orderfile = value; }
                        if (key == "groupfile" )        { groupfile = value; }
@@ -253,30 +257,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
 }
 /*******************************************************/
 
-/******************************************************/
-void GlobalData::setReadFormat(string command){
-       try {
-               if (command == "read.dist") { 
-                       clear();
-               }else if (command == "read.otu") { 
-                       clear();
-               }else if (command == "read.shared") { 
-                       clear();
-                       format = "shared";
-               }else if (command == "parselist") { format = "list"; }
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the GlobalData class function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-}
-
-/*******************************************************/
-
 /******************************************************/
 // These functions give you the option parameters of the commands
 string GlobalData::getPhylipFile()             {       return phylipfile;      }
@@ -287,6 +267,7 @@ string GlobalData::getSabundFile()          {       return sabundfile;      }
 string GlobalData::getNameFile()               {       return namefile;        }
 string GlobalData::getGroupFile()              {       return groupfile;       }
 string GlobalData::getOrderFile()              {       return orderfile;       }
+string GlobalData::getFastaFile()              {       return fastafile;       }
 string GlobalData::getCutOff()                 {       return cutoff;          }
 string GlobalData::getFormat()                 {       return format;          }
 string GlobalData::getPrecision()              {       return precision;       }
@@ -328,6 +309,7 @@ void GlobalData::clear() {
        namefile                =       "";
        groupfile               =       ""; 
        orderfile               =       "";
+       fastafile               =   "";
        cutoff                  =       "10.00";
        format                  =       "";
        precision               =       "100";
index a5477fff1d06b95aa35b6c1dc7d697bb0288246a..a2b91738e15d29d02ccdc87559b3e4441c5349db 100644 (file)
@@ -39,6 +39,7 @@ public:
        string getNameFile();
        string getGroupFile();
        string getOrderFile();
+       string getFastaFile();
        string getCutOff();
        string getFormat();
        string getPrecision();
@@ -68,7 +69,7 @@ public:
        void splitAtDash(string&, set<string>&);
        
 private:
-       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, line, label;
+       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile,line, label;
        string cutoff, format, precision, method, fileroot, iters, jumble, freq, single, rarefaction, shared, summary, sharedsummary, sharedrarefaction;
        static GlobalData* _uniqueInstance;
        GlobalData( const GlobalData& ); // Disable copy constructor
@@ -77,9 +78,8 @@ private:
        ~GlobalData();
        ListVector* gListVector;
        SparseMatrix* gSparseMatrix;
-       void setReadFormat(string);
        
-       };
+};
 
 //**********************************************************************************************************************
 
index b2acc375ed9bbaf60f5cec2caeeb34fa59e0d905..565d742a0fda164d559be4315b0e114a01050211 100644 (file)
@@ -50,6 +50,13 @@ int HelpCommand::execute(){
                cout << "The cluster command should be in the following format: " << "\n";
                cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
                cout << "The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed." << "\n" << "\n";
+       }else if (globaldata->helpRequest == "deconvolute") {
+               cout << "The deconvolute command reads a fastafile and creates a namesfile." << "\n";
+               cout << "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. " << "\n";
+               cout << "If the sequence is unique the second column will just contain its name. " << "\n";
+               cout << "The deconvolute command parameter is fastafile and it is required." << "\n";
+               cout << "The deconvolute command should be in the following format: " << "\n";
+               cout << "deconvolute(fastafile=yourFastaFile) " << "\n";
        }else if (globaldata->helpRequest == "collect.single") {
                cout << "The collect.single command can only be executed after a successful read.list read.rabund or rad.sabund command. WITH ONE EXECEPTION. " << "\n";
                cout << "The collect.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
@@ -114,7 +121,7 @@ int HelpCommand::execute(){
                cout << "Valid commands are read.dist(), read.list(), read.otu(), cluster(), collect.single(), rarefaction.single(), summary.single(), collect.shared(), rarefaction.shared(), summary.shared(), quit(), help()." << "\n";
                cout << "For more information about a specific command type 'help(commandName)' i.e. 'help(read.phylip)'" << endl;
        }else {
-               cout << "not a valid command" << endl;
+               cout << globaldata->helpRequest << " is not a valid command" << endl;
        }
        
        cout << endl << "For further assistance please refer to the Mothur manual, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
index 72a38f71977e949e38d9adfecfd1729b29dab061..70a6502424a5524bbd517d53aa791c6924f552a5 100644 (file)
@@ -18,6 +18,7 @@ ValidCommands::ValidCommands() {
                commands["read.otu"]                    = "read.otu"; 
                commands["read.list"]                   = "read.list"; 
                commands["cluster"]                             = "cluster"; 
+               commands["deconvolute"]                 = "deconvolute"; 
                commands["help"]                                = "help"; 
                commands["quit"]                                = "quit"; 
                commands["collect.single"]              = "collect.single"; 
index dc4ec6276a5b23d38358096373c802830c8e3756..b58a8ff8878b76da789a0f2a1ebc1784a11781bf 100644 (file)
@@ -22,6 +22,7 @@ ValidParameters::ValidParameters() {
                parameters["namefile"]                  = "namefile"; 
                parameters["groupfile"]                 = "groupfile"; 
                parameters["orderfile"]                 = "orderfile"; 
+               parameters["fastafile"]                 = "fastafile"; 
                parameters["fileroot"]                  = "fileroot";
                parameters["cutoff"]                    = "cutoff"; 
                parameters["method"]                    = "method";