objects = {
/* Begin PBXBuildFile section */
+ 370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */; };
372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; };
372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; };
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
+ 370B88050F8A4EE4005AB382 /* getoturepcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getoturepcommand.h; sourceTree = "<group>"; };
+ 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getoturepcommand.cpp; sourceTree = "<group>"; };
372E126E0F26365B0095CF7E /* readotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotucommand.h; sourceTree = "<group>"; };
372E126F0F26365B0095CF7E /* readotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readotucommand.cpp; sourceTree = "<group>"; };
372E12940F263D5A0095CF7E /* readdistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readdistcommand.h; sourceTree = "<group>"; };
A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */,
A70B53A90F4CD7AD0064797E /* getlinecommand.h */,
A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
+ 370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
+ 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
375873F10F7D64800040F377 /* heatmapcommand.h */,
375873F00F7D64800040F377 /* heatmapcommand.cpp */,
37D927E40F21331F001D4494 /* helpcommand.h */,
37519AA10F810D0200FED5E8 /* venncommand.cpp in Sources */,
37519AB50F810FAE00FED5E8 /* venn.cpp in Sources */,
37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */,
+ 370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
catch(...) {
- cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
exit(1);
}
}
//**********************************************************************************************************************
+
+
+
#include "mothur.h"
#include "nocommands.h"
#include "binsequencecommand.h"
+#include "getoturepcommand.h"
/***********************************************************/
else if(commandName == "heatmap") { command = new HeatMapCommand(); }
else if(commandName == "venn") { command = new VennCommand(); }
else if(commandName == "bin.seqs") { command = new BinSeqCommand(); }
+ else if(commandName == "get.oturep") { command = new GetOTURepCommand(); }
else { command = new NoCommand(); }
return command;
if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; }
validateBinFiles();
}
+
+ if ((commandName == "get.oturep")) {
+ if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
+ cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl;
+ errorFree = false;
+ }
+ if (listfile == "") { cout << "list is a required parameter for the get.oturep command." << endl; errorFree = false; }
+ validateBinFiles();
+ }
+
return errorFree;
}
int ableToOpen;
if (fastafile == "") {
- cout << "fasta is a required parameter for bin.seqs." << endl; errorFree = false;
+ cout << "fasta is a required parameter for bin.seqs and get.oturep commands." << endl; errorFree = false;
}else if (fastafile != "") {
//is it a valid filename'
ableToOpen = openInputFile(fastafile, filehandle);
filehandle.close();
//unable to open
if (ableToOpen == 1) { errorFree = false; }
+ }else if (listfile != "") {
+ //is it a valid filename'
+ ableToOpen = openInputFile(listfile, filehandle);
+ filehandle.close();
+ //unable to open
+ if (ableToOpen == 1) { errorFree = false; }
}else if (globaldata->getNameFile() != "") {
//is it a valid filename'
ifstream filehandle;
--- /dev/null
+/*
+ * getoturepcommand.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 4/6/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "getoturepcommand.h"
+
+//**********************************************************************************************************************
+GetOTURepCommand::GetOTURepCommand(){
+ try{
+ globaldata = GlobalData::getInstance();
+
+ if(globaldata->gSparseMatrix != NULL) { matrix = new SparseMatrix(*globaldata->gSparseMatrix); }
+
+ //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix
+ if(globaldata->gListVector != NULL) {
+ listOfNames = new ListVector(*globaldata->gListVector);
+
+ //map names to rows in sparsematrix
+ for (int i = 0; i < listOfNames->size(); i++) {
+ nameToIndex[listOfNames->get(i)] = i;
+ }
+ }else { cout << "error" << endl; }
+
+
+ fastafile = globaldata->getFastaFile();
+ namesfile = globaldata->getNameFile();
+ openInputFile(fastafile, in);
+
+ fasta = new FastaMap();
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetOTURepCommand class function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+GetOTURepCommand::~GetOTURepCommand(){
+ delete matrix;
+ delete list;
+ delete input;
+ delete read;
+ delete fasta;
+}
+
+//**********************************************************************************************************************
+
+int GetOTURepCommand::execute(){
+ try {
+ int count = 1;
+ string nameRep, name, sequence;
+
+ //read fastafile
+ fasta->readFastaFile(in);
+
+ //set format to list so input can get listvector
+ globaldata->setFormat("list");
+
+ //if user gave a namesfile then use it
+ if (namesfile != "") {
+ readNamesFile();
+ }
+
+ //read list file
+ read = new ReadPhilFile(globaldata->getListFile());
+ read->read(&*globaldata);
+
+ input = globaldata->ginput;
+ list = globaldata->gListVector;
+
+ while(list != NULL){
+
+ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
+
+ //create output file
+ string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fastarep";
+ openOutputFile(outputFileName, out);
+
+ cout << list->getLabel() << '\t' << count << endl;
+
+ //for each bin in the list vector
+ for (int i = 0; i < list->size(); i++) {
+ nameRep = FindRep(i);
+
+ //print out name and sequence for that bin
+ sequence = fasta->getSequence(nameRep);
+
+ if (sequence != "not found") {
+ nameRep = nameRep + "bin" + toString(i+1);
+ out << ">" << nameRep << endl;
+ out << sequence << endl;
+ }else {
+ cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl;
+ remove(outputFileName.c_str());
+ return 0;
+ }
+ }
+ }
+
+ list = input->getListVector();
+ count++;
+ }
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetOTURepCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+//**********************************************************************************************************************
+void GetOTURepCommand::readNamesFile() {
+ try {
+ vector<string> dupNames;
+ openInputFile(namesfile, inNames);
+
+ string name, names, sequence;
+
+ while(inNames){
+ inNames >> name; //read from first column A
+ inNames >> names; //read from second column A,B,C,D
+
+ dupNames.clear();
+
+ //parse names into vector
+ splitAtComma(names, dupNames);
+
+ //store names in fasta map
+ sequence = fasta->getSequence(name);
+ for (int i = 0; i < dupNames.size(); i++) {
+ fasta->push_back(dupNames[i], sequence);
+ }
+
+ gobble(inNames);
+ }
+ inNames.close();
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetOTURepCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string GetOTURepCommand::FindRep(int bin) {
+ try{
+ vector<string> names;
+ map<string, float> sums;
+ map<string, float>::iterator it4;
+ map<int, string> binMap; //subset of namesToIndex - just member of this bin
+ string binnames;
+ float min = 10000;
+ string minName;
+
+ binnames = list->get(bin);
+
+ //parse names into vector
+ splitAtComma(binnames, names);
+
+ //if only 1 sequence in bin then that's the rep
+ if (names.size() == 1) { return names[0]; }
+ else {
+ //fill binMap
+ for (int i = 0; i < names.size(); i++) {
+ for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
+ if (it3->first == names[i]) {
+ binMap[it3->second] = it3->first;
+
+ //initialize sums map
+ sums[it3->first] = 0.0;
+ break;
+ }
+ }
+ }
+
+ //go through each cell in the sparsematrix
+ for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
+ //is this a distance between 2 members of this bin
+ it = binMap.find(currentCell->row);
+ it2 = binMap.find(currentCell->column);
+
+ //sum the distance of the sequences in the bin to eachother
+ if ((it != binMap.end()) && (it2 != binMap.end())) {
+ //this is a cell that repesents the distance between to of this bins members
+ sums[it->second] += currentCell->dist;
+ sums[it2->second] += currentCell->dist;
+ }
+ }
+
+ //smallest sum is the representative
+ for (it4 = sums.begin(); it4 != sums.end(); it4++) {
+ if (it4->second < min) {
+ min = it4->second;
+ minName = it4->first;
+ }
+
+ }
+
+ return minName;
+ }
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the GetOTURepCommand class function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+
+
+
+
--- /dev/null
+#ifndef GETOTUREPCOMMAND_H
+#define GETOTUREPCOMMAND_H
+/*
+ * getoturepcommand.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 4/6/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "globaldata.hpp"
+#include "sparsematrix.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
+#include "readmatrix.hpp"
+#include "fastamap.h"
+
+
+class GlobalData;
+
+typedef list<PCell>::iterator MatData;
+
+class GetOTURepCommand : public Command {
+
+public:
+ GetOTURepCommand();
+ ~GetOTURepCommand();
+ int execute();
+
+private:
+ GlobalData* globaldata;
+ SparseMatrix* matrix;
+ ListVector* list;
+ ListVector* listOfNames;
+ ReadMatrix* read;
+ InputData* input;
+ FastaMap* fasta;
+ string filename, fastafile, namesfile;
+ ofstream out;
+ ifstream in, inNames;
+
+
+ map<string, int> nameToIndex; //maps sequence name to index in sparsematrix
+ map<int, string>::iterator it;
+ map<int, string>::iterator it2;
+ map<string, int>::iterator it3;
+
+ void readNamesFile();
+ string FindRep(int); // returns name of "representative" sequence of given bin.
+
+};
+
+#endif
+
cout << "The default value for line and label are all lines in your inputfile." << "\n";
cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n";
cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "get.oturep") {
+ cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n";
+ cout << "The get.oturep command parameters are list, fasta, name, line and label. The fasta and list parameters are required, and you may not use line and label at the same time." << "\n";
+ cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
+ cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n";
+ cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, line=1-3-5, name=amazon.names)." << "\n";
+ cout << "The default value for line and label are all lines in your inputfile." << "\n";
+ cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
}else if (globaldata->helpRequest == "quit") {
cout << "The quit command will terminate Dotur and should be in the following format: " << "\n";
cout << "quit()" << "\n" << "\n";
commands["read.otu"] = "read.otu";
commands["read.tree"] = "read.tree";
commands["bin.seqs"] = "bin.seqs";
+ commands["get.oturep"] = "get.oturep";
commands["cluster"] = "cluster";
commands["deconvolute"] = "deconvolute";
commands["collect.single"] = "collect.single";
string binseqsArray[] = {"fasta","line","label","name"};
commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string));
+ string getOTURepArray[] = {"fasta","list","line","label","name"};
+ commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string));
+
string quitArray[] = {};
commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string));