372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12C00F2648250095CF7E /* readlistcommand.cpp */; };
372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; };
+ 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; };
37D928570F21331F001D4494 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BB0F21331F001D4494 /* bootstrap.cpp */; };
372E12BF0F2648250095CF7E /* readlistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readlistcommand.h; sourceTree = "<group>"; };
372E12C00F2648250095CF7E /* readlistcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readlistcommand.cpp; sourceTree = "<group>"; };
372E12EC0F264D320095CF7E /* commandfactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = commandfactory.cpp; sourceTree = "<group>"; };
+ 37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = "<group>"; };
+ 37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = "<group>"; };
37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
37D927B90F21331F001D4494 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
37D927BA0F21331F001D4494 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = averagelinkage.cpp; sourceTree = "<group>"; };
children = (
37D927BA0F21331F001D4494 /* averagelinkage.cpp */,
37D928A60F2133C0001D4494 /* calculators */,
- 37D927C10F21331F001D4494 /* cluster.cpp */,
37D927C20F21331F001D4494 /* cluster.hpp */,
+ 37D927C10F21331F001D4494 /* cluster.cpp */,
37D928A90F2133E5001D4494 /* commands */,
- 37D927C50F21331F001D4494 /* collect.cpp */,
37D927C60F21331F001D4494 /* collect.h */,
+ 37D927C50F21331F001D4494 /* collect.cpp */,
37D927C90F21331F001D4494 /* collectdisplay.h */,
37D928AC0F213420001D4494 /* containers */,
37D927CA0F21331F001D4494 /* collectorscurvedata.h */,
37D927CF0F21331F001D4494 /* commandfactory.hpp */,
372E12EC0F264D320095CF7E /* commandfactory.cpp */,
- 37D927D00F21331F001D4494 /* commandoptionparser.cpp */,
37D927D10F21331F001D4494 /* commandoptionparser.hpp */,
+ 37D927D00F21331F001D4494 /* commandoptionparser.cpp */,
37D927D20F21331F001D4494 /* completelinkage.cpp */,
- 37D927D30F21331F001D4494 /* database.cpp */,
37D927D40F21331F001D4494 /* database.hpp */,
+ 37D927D30F21331F001D4494 /* database.cpp */,
37D927D60F21331F001D4494 /* display.h */,
- 37D927D70F21331F001D4494 /* engine.cpp */,
37D927D80F21331F001D4494 /* engine.hpp */,
+ 37D927D70F21331F001D4494 /* engine.cpp */,
37D928B10F213472001D4494 /* errorcheckor */,
- 37D927DD0F21331F001D4494 /* fileoutput.cpp */,
37D927DE0F21331F001D4494 /* fileoutput.h */,
- 37D927DF0F21331F001D4494 /* globaldata.cpp */,
+ 37D927DD0F21331F001D4494 /* fileoutput.cpp */,
37D927E00F21331F001D4494 /* globaldata.hpp */,
- 37D927E50F21331F001D4494 /* inputdata.cpp */,
+ 37D927DF0F21331F001D4494 /* globaldata.cpp */,
37D927E60F21331F001D4494 /* inputdata.h */,
- 37D927E90F21331F001D4494 /* kmer.cpp */,
+ 37D927E50F21331F001D4494 /* inputdata.cpp */,
37D927EA0F21331F001D4494 /* kmer.hpp */,
- 37D927EB0F21331F001D4494 /* kmerdb.cpp */,
+ 37D927E90F21331F001D4494 /* kmer.cpp */,
37D927EC0F21331F001D4494 /* kmerdb.hpp */,
+ 37D927EB0F21331F001D4494 /* kmerdb.cpp */,
37D927EF0F21331F001D4494 /* mothur.cpp */,
- 37D927F00F21331F001D4494 /* nameassignment.cpp */,
37D927F10F21331F001D4494 /* nameassignment.hpp */,
+ 37D927F00F21331F001D4494 /* nameassignment.cpp */,
37D927F60F21331F001D4494 /* observable.h */,
- 37D927FB0F21331F001D4494 /* progress.cpp */,
37D927FC0F21331F001D4494 /* progress.hpp */,
- 37D928030F21331F001D4494 /* raredisplay.cpp */,
+ 37D927FB0F21331F001D4494 /* progress.cpp */,
37D928040F21331F001D4494 /* raredisplay.h */,
- 37D928050F21331F001D4494 /* rarefact.cpp */,
+ 37D928030F21331F001D4494 /* raredisplay.cpp */,
37D928060F21331F001D4494 /* rarefact.h */,
+ 37D928050F21331F001D4494 /* rarefact.cpp */,
37D928090F21331F001D4494 /* rarefactioncurvedata.h */,
- 37D928120F21331F001D4494 /* readmatrix.cpp */,
37D928130F21331F001D4494 /* readmatrix.hpp */,
- 37D9281C0F21331F001D4494 /* sequence.cpp */,
+ 37D928120F21331F001D4494 /* readmatrix.cpp */,
37D9281D0F21331F001D4494 /* sequence.hpp */,
- 37D928200F21331F001D4494 /* shared.cpp */,
+ 37D9281C0F21331F001D4494 /* sequence.cpp */,
37D928210F21331F001D4494 /* shared.h */,
+ 37D928200F21331F001D4494 /* shared.cpp */,
37D928420F21331F001D4494 /* singlelinkage.cpp */,
- 37D928440F21331F001D4494 /* sparsematrix.cpp */,
37D928450F21331F001D4494 /* sparsematrix.hpp */,
+ 37D928440F21331F001D4494 /* sparsematrix.cpp */,
37D928480F21331F001D4494 /* summarydata.h */,
37D928490F21331F001D4494 /* summarydisplay.h */,
37D9284C0F21331F001D4494 /* utilities.hpp */,
37D927BB0F21331F001D4494 /* bootstrap.cpp */,
37D927C00F21331F001D4494 /* chao1.h */,
37D927BF0F21331F001D4494 /* chao1.cpp */,
- 37D927E70F21331F001D4494 /* jackknife.cpp */,
37D927E80F21331F001D4494 /* jackknife.h */,
- 37D927F40F21331F001D4494 /* npshannon.cpp */,
+ 37D927E70F21331F001D4494 /* jackknife.cpp */,
37D927F50F21331F001D4494 /* npshannon.h */,
- 37D928010F21331F001D4494 /* rarecalc.cpp */,
+ 37D927F40F21331F001D4494 /* npshannon.cpp */,
37D928020F21331F001D4494 /* rarecalc.h */,
- 37D9281E0F21331F001D4494 /* shannon.cpp */,
+ 37D928010F21331F001D4494 /* rarecalc.cpp */,
37D9281F0F21331F001D4494 /* shannon.h */,
- 37D928220F21331F001D4494 /* sharedace.cpp */,
+ 37D9281E0F21331F001D4494 /* shannon.cpp */,
37D928230F21331F001D4494 /* sharedace.h */,
- 37D928240F21331F001D4494 /* sharedchao1.cpp */,
+ 37D928220F21331F001D4494 /* sharedace.cpp */,
37D928250F21331F001D4494 /* sharedchao1.h */,
- 37D928280F21331F001D4494 /* sharedjabund.cpp */,
+ 37D928240F21331F001D4494 /* sharedchao1.cpp */,
37D928290F21331F001D4494 /* sharedjabund.h */,
- 37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
+ 37D928280F21331F001D4494 /* sharedjabund.cpp */,
37D9282B0F21331F001D4494 /* sharedjclass.h */,
- 37D9282C0F21331F001D4494 /* sharedjest.cpp */,
+ 37D9282A0F21331F001D4494 /* sharedjclass.cpp */,
37D9282D0F21331F001D4494 /* sharedjest.h */,
- 37D928340F21331F001D4494 /* sharedsobs.cpp */,
+ 37D9282C0F21331F001D4494 /* sharedjest.cpp */,
37D928350F21331F001D4494 /* sharedsobs.h */,
- 37D928360F21331F001D4494 /* sharedsorabund.cpp */,
+ 37D928340F21331F001D4494 /* sharedsobs.cpp */,
37D928370F21331F001D4494 /* sharedsorabund.h */,
- 37D928380F21331F001D4494 /* sharedsorclass.cpp */,
+ 37D928360F21331F001D4494 /* sharedsorabund.cpp */,
37D928390F21331F001D4494 /* sharedsorclass.h */,
- 37D9283A0F21331F001D4494 /* sharedsorest.cpp */,
+ 37D928380F21331F001D4494 /* sharedsorclass.cpp */,
37D9283B0F21331F001D4494 /* sharedsorest.h */,
+ 37D9283A0F21331F001D4494 /* sharedsorest.cpp */,
37D9283C0F21331F001D4494 /* sharedthetan.cpp */,
37D9283D0F21331F001D4494 /* sharedthetan.h */,
- 37D9283E0F21331F001D4494 /* sharedthetayc.cpp */,
37D9283F0F21331F001D4494 /* sharedthetayc.h */,
- 37D928400F21331F001D4494 /* simpson.cpp */,
+ 37D9283E0F21331F001D4494 /* sharedthetayc.cpp */,
37D928410F21331F001D4494 /* simpson.h */,
+ 37D928400F21331F001D4494 /* simpson.cpp */,
37D928430F21331F001D4494 /* sobs.h */,
- 37D9284D0F21331F001D4494 /* uvest.cpp */,
37D9284E0F21331F001D4494 /* uvest.h */,
+ 37D9284D0F21331F001D4494 /* uvest.cpp */,
);
name = calculators;
sourceTree = "<group>";
37D927C30F21331F001D4494 /* clustercommand.cpp */,
37D927C80F21331F001D4494 /* collectcommand.h */,
37D927C70F21331F001D4494 /* collectcommand.cpp */,
- 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */,
37D927CC0F21331F001D4494 /* collectsharedcommand.h */,
- 37D927E30F21331F001D4494 /* helpcommand.cpp */,
+ 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */,
+ 37B28F660F27590100808A62 /* deconvolutecommand.h */,
+ 37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
37D927E40F21331F001D4494 /* helpcommand.h */,
+ 37D927E30F21331F001D4494 /* helpcommand.cpp */,
37D927F20F21331F001D4494 /* nocommand.cpp */,
37D927F30F21331F001D4494 /* nocommand.h */,
- 37D927F90F21331F001D4494 /* parselistcommand.cpp */,
37D927FA0F21331F001D4494 /* parselistcommand.h */,
- 37D927FD0F21331F001D4494 /* quitcommand.cpp */,
+ 37D927F90F21331F001D4494 /* parselistcommand.cpp */,
37D927FE0F21331F001D4494 /* quitcommand.h */,
- 37D928070F21331F001D4494 /* rarefactcommand.cpp */,
+ 37D927FD0F21331F001D4494 /* quitcommand.cpp */,
37D928080F21331F001D4494 /* rarefactcommand.h */,
- 37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */,
+ 37D928070F21331F001D4494 /* rarefactcommand.cpp */,
37D9280B0F21331F001D4494 /* rarefactsharedcommand.h */,
+ 37D9280A0F21331F001D4494 /* rarefactsharedcommand.cpp */,
372E12940F263D5A0095CF7E /* readdistcommand.h */,
372E12950F263D5A0095CF7E /* readdistcommand.cpp */,
372E126E0F26365B0095CF7E /* readotucommand.h */,
372E126F0F26365B0095CF7E /* readotucommand.cpp */,
372E12BF0F2648250095CF7E /* readlistcommand.h */,
372E12C00F2648250095CF7E /* readlistcommand.cpp */,
- 37D928260F21331F001D4494 /* sharedcommand.cpp */,
37D928270F21331F001D4494 /* sharedcommand.h */,
- 37D928460F21331F001D4494 /* summarycommand.cpp */,
+ 37D928260F21331F001D4494 /* sharedcommand.cpp */,
37D928470F21331F001D4494 /* summarycommand.h */,
- 37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */,
+ 37D928460F21331F001D4494 /* summarycommand.cpp */,
37D9284B0F21331F001D4494 /* summarysharedcommand.h */,
+ 37D9284A0F21331F001D4494 /* summarysharedcommand.cpp */,
);
name = commands;
sourceTree = "<group>";
isa = PBXGroup;
children = (
37D927D50F21331F001D4494 /* datavector.hpp */,
- 37D927DB0F21331F001D4494 /* fastamap.cpp */,
37D927DC0F21331F001D4494 /* fastamap.h */,
- 37D927E10F21331F001D4494 /* groupmap.cpp */,
+ 37D927DB0F21331F001D4494 /* fastamap.cpp */,
37D927E20F21331F001D4494 /* groupmap.h */,
- 37D927ED0F21331F001D4494 /* listvector.cpp */,
+ 37D927E10F21331F001D4494 /* groupmap.cpp */,
37D927EE0F21331F001D4494 /* listvector.hpp */,
- 37D927F70F21331F001D4494 /* ordervector.cpp */,
+ 37D927ED0F21331F001D4494 /* listvector.cpp */,
37D927F80F21331F001D4494 /* ordervector.hpp */,
- 37D927FF0F21331F001D4494 /* rabundvector.cpp */,
+ 37D927F70F21331F001D4494 /* ordervector.cpp */,
37D928000F21331F001D4494 /* rabundvector.hpp */,
- 37D9281A0F21331F001D4494 /* sabundvector.cpp */,
+ 37D927FF0F21331F001D4494 /* rabundvector.cpp */,
37D9281B0F21331F001D4494 /* sabundvector.hpp */,
- 37D9282E0F21331F001D4494 /* sharedordervector.cpp */,
+ 37D9281A0F21331F001D4494 /* sabundvector.cpp */,
37D9282F0F21331F001D4494 /* sharedordervector.h */,
- 37D928300F21331F001D4494 /* sharedrabundvector.cpp */,
+ 37D9282E0F21331F001D4494 /* sharedordervector.cpp */,
37D928310F21331F001D4494 /* sharedrabundvector.h */,
- 37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
+ 37D928300F21331F001D4494 /* sharedrabundvector.cpp */,
37D928330F21331F001D4494 /* sharedsabundvector.h */,
+ 37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
);
name = containers;
sourceTree = "<group>";
37D928B10F213472001D4494 /* errorcheckor */ = {
isa = PBXGroup;
children = (
- 37D927D90F21331F001D4494 /* errorchecking.cpp */,
37D927DA0F21331F001D4494 /* errorchecking.h */,
- 37D9284F0F21331F001D4494 /* validcalculator.cpp */,
+ 37D927D90F21331F001D4494 /* errorchecking.cpp */,
37D928500F21331F001D4494 /* validcalculator.h */,
- 37D928510F21331F001D4494 /* validcommands.cpp */,
+ 37D9284F0F21331F001D4494 /* validcalculator.cpp */,
37D928520F21331F001D4494 /* validcommands.h */,
- 37D928530F21331F001D4494 /* validparameter.cpp */,
+ 37D928510F21331F001D4494 /* validcommands.cpp */,
37D928540F21331F001D4494 /* validparameter.h */,
+ 37D928530F21331F001D4494 /* validparameter.cpp */,
);
name = errorcheckor;
sourceTree = "<group>";
372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */,
372E12C10F2648250095CF7E /* readlistcommand.cpp in Sources */,
372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */,
+ 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "quitcommand.h"
#include "helpcommand.h"
#include "commandfactory.hpp"
+#include "deconvolutecommand.h"
#include <exception>
try {
delete command; //delete the old command
- if(commandName == "read.dist") { command = new ReadDistCommand(); }
- else if(commandName == "read.otu") { command = new ReadOtuCommand(); }
+ if(commandName == "read.dist") { command = new ReadDistCommand(); }
+ else if(commandName == "read.otu") { command = new ReadOtuCommand(); }
else if(commandName == "read.list") { command = new ReadListFileCommand(); }
else if(commandName == "cluster") { command = new ClusterCommand(); }
+ else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); }
else if(commandName == "help") { command = new HelpCommand(); }
else if(commandName == "quit") { command = new QuitCommand(); }
else if(commandName == "collect.single") { command = new CollectCommand(); }
--- /dev/null
+/*
+ * deconvolute.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 1/21/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "deconvolutecommand.h"
+
+/**************************************************************************************/
+int DeconvoluteCommand::execute() {
+ try {
+ globaldata = GlobalData::getInstance();
+
+ //prepare filenames and open files
+ filename = globaldata->getFastaFile();
+ outputFileName = (getRootName(filename) + "names");
+ openInputFile(filename, in);
+ openOutputFile(outputFileName, out);
+
+ //constructor reads in file and store internally
+ fastamap = new FastaMap();
+
+ //two columns separated by tabs sequence name and then sequence
+ fastamap->readFastaFile(in);
+
+ //print out new names file
+ //file contains 2 columns separated by tabs. the first column is the groupname(name of first sequence found.
+ //the second column is the list of names of identical sequences separated by ','.
+ fastamap->print(out);
+
+ return 0;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/**************************************************************************************/
--- /dev/null
+#ifndef DECONVOLUTECOMMAND_H
+#define DECONVOLUTECOMMAND_H
+/*
+ * deconvolute.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 1/21/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include <iostream>
+#include <fstream>
+#include "command.hpp"
+#include "utilities.hpp"
+#include "fastamap.h"
+#include "globaldata.hpp"
+
+/* The deconvolute command reads a fasta file, finds the duplicate sequences and outputs a names file
+ containing 2 columns. The first being the groupname and the second the list of identical sequence names. */
+
+using namespace std;
+
+class DeconvoluteCommand : public Command {
+
+public:
+ DeconvoluteCommand() {};
+ ~DeconvoluteCommand() { delete fastamap; };
+ int execute();
+
+private:
+ GlobalData* globaldata;
+ FastaMap* fastamap;
+ ifstream in;
+ ofstream out;
+ string filename, outputFileName;
+
+};
+
+#endif
\ No newline at end of file
namefile = globaldata->getNameFile();
groupfile = globaldata->getGroupFile();
orderfile = globaldata->getOrderFile();
+ fastafile = globaldata->getFastaFile();
cutoff = globaldata->getCutOff();
format = globaldata->getFormat();
method = globaldata->getMethod();
if (parameter == "sabundfile" ) { sabundfile = value; }
if (parameter == "namefile" ) { namefile = value; }
if (parameter == "orderfile" ) { orderfile = value; }
+ if (parameter == "fastafile" ) { fastafile = value; }
if (parameter == "groupfile" ) { groupfile = value; }
if (parameter == "cutoff" ) { cutoff = value; }
if (parameter == "precision" ) { precision = value; }
if (parameter == "namefile" ) { namefile = value; }
if (parameter == "orderfile" ) { orderfile = value; }
if (parameter == "groupfile" ) { groupfile = value; }
+ if (parameter == "fastafile" ) { fastafile = value; }
if (parameter == "cutoff" ) { cutoff = value; }
if (parameter == "precision" ) { precision = value; }
if (parameter == "iters" ) { iters = value; }
validateReadPhil();
}else if (commandName == "read.list") {
validateParseFiles(); //checks the listfile and groupfile parameters
+ }else if (commandName == "deconvolute") {
+ validateReadFiles();
}
//are you trying to cluster before you have read something
//unable to open
if (ableToOpen == 1) { errorFree = false; }
else { globaldata->inputFileName = sabundfile; }
+ }else if (fastafile != "") {
+ ableToOpen = openInputFile(fastafile, filehandle);
+ filehandle.close();
+ //unable to open
+ if (ableToOpen == 1) { errorFree = false; }
+ else { globaldata->inputFileName = fastafile; }
}else{ //no file given
errorFree = false;
}
void validateReadPhil();
void validateParseFiles();
void clear();
- string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, cutoff, format;
+ string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, cutoff, format;
string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary;
string commandName, optionText;
bool errorFree;
#include "fastamap.h"
/*******************************************************************************/
- FastaMap::FastaMap(ifstream& in) {
- //int numberOfSequences = 0;
+void FastaMap::readFastaFile(ifstream& in) {
+ try {
+ string name, sequence, line;
+ sequence = "";
- string name, sequence, line;
- sequence = "";
+ getline(in, line);
+ name = line.substr(1, line.length()); //rips off '>'
- getline(in, line);
- name = line.substr(1, line.length()); //rips off '>'
-
- //read through file
- while (getline(in, line)) {
- if (isalnum(line.at(0))){ //if it's a sequence line
- sequence += line;
- }
- else{
- //input sequence info into map
- it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
- data[sequence].groupnumber++;
+ //read through file
+ while (getline(in, line)) {
+ if (isalnum(line.at(0))){ //if it's a sequence line
+ sequence += line;
+ }
+ else{
+ //input sequence info into map
+ it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+ data[sequence].groupnumber = 1;
+ data[sequence].names = name;
+ }else { // its a duplicate.
+ data[sequence].names += "," + name;
+ data[sequence].groupnumber++;
+ }
+ name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
+ sequence = "";
}
- name = (line.substr(1, (line.npos))); //The line you just read is a new name so rip off '>'
}
- }
- //store last sequence and name info.
- it = data.find(sequence);
- if (it == data.end()) { //it's unique.
- data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
- data[sequence].groupnumber = 1;
- data[sequence].names = name;
- }else { // its a duplicate.
- data[sequence].names += "," + name;
- data[sequence].groupnumber++;
- }
+ //store last sequence and name info.
+ it = data.find(sequence);
+ if (it == data.end()) { //it's unique.
+ data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found.
+ data[sequence].groupnumber = 1;
+ data[sequence].names = name;
+ }else { // its a duplicate.
+ data[sequence].names += "," + name;
+ data[sequence].groupnumber++;
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FastaMap class function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
}
/*******************************************************************************/
string FastaMap::getGroupName(string seq) { //pass a sequence name get its group
return data[seq].groupname;
}
/*******************************************************************************/
-int FastaMap::getGroupNumber(string seq) { //pass a sequence name get number of sequence in its group
- return data[seq].groupnumber;
-}
-/*******************************************************************************/
string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s.
return data[seq].names;
}
/*******************************************************************************/
+int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences.
+ return data[seq].groupnumber;
+}
+/*******************************************************************************/
void FastaMap::push_back(string seq, string Name) {//sequencename, name
data[seq].groupname = Name;
- data[seq].groupnumber = 1;
data[seq].names = Name;
}
/*******************************************************************************/
+void FastaMap::set(string seq, string groupName, string Names) {
+ data[seq].groupname = groupName;
+ data[seq].names = Names;
+}
+/*******************************************************************************/
void FastaMap::clear() { //clears out data
data.clear();
}
return data.size();
}
/*******************************************************************************/
-void FastaMap::print(ostream&){ //prints data
-
+void FastaMap::print(ostream& out){ //prints data
+ try {
+ // two column file created with groupname and them list of identical sequence names
+ for (it = data.begin(); it != data.end(); it++) {
+ out << it->second.groupname << '\t' << it->second.names << endl;
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
}
/*******************************************************************************/
public:
FastaMap() {};
- FastaMap(ifstream&);
~FastaMap() {};
string getGroupName(string); //pass a sequence name get its group
int getGroupNumber(string); //pass a sequence name get number of sequence in its group
string getNames(string); //pass a sequence get the string of names in the group separated by ','s.
void push_back(string, string); //sequencename, groupname
+ void set(string, string, string); //sequencename, groupname, groupnumber, names.
void clear();
int size(); //returns number of unique sequences
void print(ostream&);
+ void readFastaFile(ifstream&);
private:
struct group {
string groupname; //the group name for identical sequences, will be set to the first sequence found.
- int groupnumber; //the number of sequences in that group.
+ int groupnumber; //the number of sequence names with the same sequence.
string names; //the names of the sequence separated by ','.
};
- map<string, group> data; //sequence, group
- map<string, group>::iterator it;
+ map<string, group> data; //sequence, groupinfo
+ map<string, group>::iterator it;
};
#endif
\ No newline at end of file
allLines = 1;
commandName = commandString; //save command name to be used by other classes
- //clears out data from previous read and sets format
- setReadFormat(commandName);
+ //clears out data from previous read
+ if ((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.list")) {
+ clear();
+ }
//saves help request
if (commandName =="help") {
if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; }
if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; }
+ if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; }
if (key == "namefile" ) { namefile = value; }
if (key == "orderfile" ) { orderfile = value; }
if (key == "groupfile" ) { groupfile = value; }
if (key == "listfile" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; }
if (key == "rabundfile" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; }
if (key == "sabundfile" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; }
+ if (key == "fastafile" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; }
if (key == "namefile" ) { namefile = value; }
if (key == "orderfile" ) { orderfile = value; }
if (key == "groupfile" ) { groupfile = value; }
}
/*******************************************************/
-/******************************************************/
-void GlobalData::setReadFormat(string command){
- try {
- if (command == "read.dist") {
- clear();
- }else if (command == "read.otu") {
- clear();
- }else if (command == "read.shared") {
- clear();
- format = "shared";
- }else if (command == "parselist") { format = "list"; }
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the GlobalData class Function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the GlobalData class function setReadFormat. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
-
-/*******************************************************/
-
/******************************************************/
// These functions give you the option parameters of the commands
string GlobalData::getPhylipFile() { return phylipfile; }
string GlobalData::getNameFile() { return namefile; }
string GlobalData::getGroupFile() { return groupfile; }
string GlobalData::getOrderFile() { return orderfile; }
+string GlobalData::getFastaFile() { return fastafile; }
string GlobalData::getCutOff() { return cutoff; }
string GlobalData::getFormat() { return format; }
string GlobalData::getPrecision() { return precision; }
namefile = "";
groupfile = "";
orderfile = "";
+ fastafile = "";
cutoff = "10.00";
format = "";
precision = "100";
string getNameFile();
string getGroupFile();
string getOrderFile();
+ string getFastaFile();
string getCutOff();
string getFormat();
string getPrecision();
void splitAtDash(string&, set<string>&);
private:
- string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, line, label;
+ string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile,line, label;
string cutoff, format, precision, method, fileroot, iters, jumble, freq, single, rarefaction, shared, summary, sharedsummary, sharedrarefaction;
static GlobalData* _uniqueInstance;
GlobalData( const GlobalData& ); // Disable copy constructor
~GlobalData();
ListVector* gListVector;
SparseMatrix* gSparseMatrix;
- void setReadFormat(string);
- };
+};
//**********************************************************************************************************************
cout << "The cluster command should be in the following format: " << "\n";
cout << "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
cout << "The acceptable cluster methods are furthest, nearest and average. If no method is provided then furthest is assumed." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "deconvolute") {
+ cout << "The deconvolute command reads a fastafile and creates a namesfile." << "\n";
+ cout << "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. " << "\n";
+ cout << "If the sequence is unique the second column will just contain its name. " << "\n";
+ cout << "The deconvolute command parameter is fastafile and it is required." << "\n";
+ cout << "The deconvolute command should be in the following format: " << "\n";
+ cout << "deconvolute(fastafile=yourFastaFile) " << "\n";
}else if (globaldata->helpRequest == "collect.single") {
cout << "The collect.single command can only be executed after a successful read.list read.rabund or rad.sabund command. WITH ONE EXECEPTION. " << "\n";
cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n";
cout << "Valid commands are read.dist(), read.list(), read.otu(), cluster(), collect.single(), rarefaction.single(), summary.single(), collect.shared(), rarefaction.shared(), summary.shared(), quit(), help()." << "\n";
cout << "For more information about a specific command type 'help(commandName)' i.e. 'help(read.phylip)'" << endl;
}else {
- cout << "not a valid command" << endl;
+ cout << globaldata->helpRequest << " is not a valid command" << endl;
}
cout << endl << "For further assistance please refer to the Mothur manual, or contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
commands["read.otu"] = "read.otu";
commands["read.list"] = "read.list";
commands["cluster"] = "cluster";
+ commands["deconvolute"] = "deconvolute";
commands["help"] = "help";
commands["quit"] = "quit";
commands["collect.single"] = "collect.single";
parameters["namefile"] = "namefile";
parameters["groupfile"] = "groupfile";
parameters["orderfile"] = "orderfile";
+ parameters["fastafile"] = "fastafile";
parameters["fileroot"] = "fileroot";
parameters["cutoff"] = "cutoff";
parameters["method"] = "method";