]> git.donarmstrong.com Git - mothur.git/commitdiff
added get.oturep command
authorwestcott <westcott>
Mon, 6 Apr 2009 18:21:45 +0000 (18:21 +0000)
committerwestcott <westcott>
Mon, 6 Apr 2009 18:21:45 +0000 (18:21 +0000)
Mothur.xcodeproj/project.pbxproj
binsequencecommand.cpp
commandfactory.cpp
errorchecking.cpp
getoturepcommand.cpp [new file with mode: 0644]
getoturepcommand.h [new file with mode: 0644]
helpcommand.cpp
validcommands.cpp
validparameter.cpp

index 619737b080a84c6175b6d9ac7628ee467495b175..b63f1088509f56feac3399ebf925a6f095d69af3 100644 (file)
@@ -7,6 +7,7 @@
        objects = {
 
 /* Begin PBXBuildFile section */
+               370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */; };
                372E12700F26365B0095CF7E /* readotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E126F0F26365B0095CF7E /* readotucommand.cpp */; };
                372E12960F263D5A0095CF7E /* readdistcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12950F263D5A0095CF7E /* readdistcommand.cpp */; };
                372E12ED0F264D320095CF7E /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 372E12EC0F264D320095CF7E /* commandfactory.cpp */; };
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
+               370B88050F8A4EE4005AB382 /* getoturepcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getoturepcommand.h; sourceTree = "<group>"; };
+               370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getoturepcommand.cpp; sourceTree = "<group>"; };
                372E126E0F26365B0095CF7E /* readotucommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readotucommand.h; sourceTree = "<group>"; };
                372E126F0F26365B0095CF7E /* readotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readotucommand.cpp; sourceTree = "<group>"; };
                372E12940F263D5A0095CF7E /* readdistcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readdistcommand.h; sourceTree = "<group>"; };
                                A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */,
                                A70B53A90F4CD7AD0064797E /* getlinecommand.h */,
                                A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
+                               370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
+                               370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
                                375873F10F7D64800040F377 /* heatmapcommand.h */,
                                375873F00F7D64800040F377 /* heatmapcommand.cpp */,
                                37D927E40F21331F001D4494 /* helpcommand.h */,
                                37519AA10F810D0200FED5E8 /* venncommand.cpp in Sources */,
                                37519AB50F810FAE00FED5E8 /* venn.cpp in Sources */,
                                37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */,
+                               370B88070F8A4EE4005AB382 /* getoturepcommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 4db41bf4c1ef91d8fd6a2c11d349aa167bef9bcb..5db29329cb28a569542581bc4a47b1cb38b6a7bf 100644 (file)
@@ -153,12 +153,15 @@ void BinSeqCommand::readNamesFile() {
 
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }       
 }
 //**********************************************************************************************************************
+
+
+
index 354b5a3ba4de65d6576e408ff880930a11c0c1e0..5155095526d1a3d08f4b72ec5236c882400ba5ec 100644 (file)
@@ -35,6 +35,7 @@
 #include "mothur.h"
 #include "nocommands.h"
 #include "binsequencecommand.h"
+#include "getoturepcommand.h"
 
 
 /***********************************************************/
@@ -81,6 +82,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "heatmap")                               {   command = new HeatMapCommand();                             }
                else if(commandName == "venn")                                  {   command = new VennCommand();                                }
                else if(commandName == "bin.seqs")                              {   command = new BinSeqCommand();                              }
+               else if(commandName == "get.oturep")                    {   command = new GetOTURepCommand();                   }
                else                                                                                    {       command = new NoCommand();                                      }
 
                return command;
index da1d1bbb614ffd78f03484c3ca956e3f5aba21ac..abd2a9ffeca07f4f11e9d69f283f09afc535a056 100644 (file)
@@ -233,6 +233,16 @@ bool ErrorCheck::checkInput(string input) {
                        if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; }
                        validateBinFiles();
                }
+               
+               if ((commandName == "get.oturep")) { 
+                       if ((globaldata->gSparseMatrix == NULL) || (globaldata->gListVector == NULL)) {
+                               cout << "Before you use the get.oturep command, you first need to read in a distance matrix." << endl; 
+                               errorFree = false;
+                       }
+                       if (listfile == "") { cout << "list is a required parameter for the get.oturep command." << endl; errorFree = false; }
+                       validateBinFiles();
+               } 
+
 
                return errorFree;
 }
@@ -484,13 +494,19 @@ void ErrorCheck::validateBinFiles() {
                int ableToOpen;
                
                if (fastafile == "") {
-                               cout << "fasta is a required parameter for bin.seqs." << endl; errorFree = false; 
+                               cout << "fasta is a required parameter for bin.seqs and get.oturep commands." << endl; errorFree = false; 
                }else if (fastafile != "") {
                        //is it a valid filename'
                        ableToOpen = openInputFile(fastafile, filehandle);
                        filehandle.close();
                        //unable to open
                        if (ableToOpen == 1) {  errorFree = false; }
+               }else if (listfile != "") {
+                       //is it a valid filename'
+                       ableToOpen = openInputFile(listfile, filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
                }else if (globaldata->getNameFile() != "") {
                        //is it a valid filename'
                        ifstream filehandle;
diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp
new file mode 100644 (file)
index 0000000..ea53f25
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ *  getoturepcommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 4/6/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "getoturepcommand.h"
+
+//**********************************************************************************************************************
+GetOTURepCommand::GetOTURepCommand(){
+       try{
+               globaldata = GlobalData::getInstance();
+       
+               if(globaldata->gSparseMatrix != NULL)   {       matrix = new SparseMatrix(*globaldata->gSparseMatrix);          }
+               
+               //listOfNames bin 0 = first name read in distance matrix, listOfNames bin 1 = second name read in distance matrix
+               if(globaldata->gListVector != NULL)             {       
+                       listOfNames = new ListVector(*globaldata->gListVector); 
+                       
+                       //map names to rows in sparsematrix
+                       for (int i = 0; i < listOfNames->size(); i++) {
+                               nameToIndex[listOfNames->get(i)] = i;
+                       }
+               }else { cout << "error" << endl; }
+
+               
+               fastafile = globaldata->getFastaFile();
+               namesfile = globaldata->getNameFile();
+               openInputFile(fastafile, in);
+               
+               fasta = new FastaMap();
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GetOTURepCommand class function GetOTURepCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+GetOTURepCommand::~GetOTURepCommand(){
+       delete matrix;
+       delete list;
+       delete input;
+       delete read;
+       delete fasta;
+}
+
+//**********************************************************************************************************************
+
+int GetOTURepCommand::execute(){
+       try {
+               int count = 1;
+               string nameRep, name, sequence;
+               
+               //read fastafile
+               fasta->readFastaFile(in);
+               
+               //set format to list so input can get listvector
+               globaldata->setFormat("list");
+               
+               //if user gave a namesfile then use it
+               if (namesfile != "") {
+                       readNamesFile();
+               }
+               
+               //read list file
+               read = new ReadPhilFile(globaldata->getListFile());     
+               read->read(&*globaldata); 
+               
+               input = globaldata->ginput;
+               list = globaldata->gListVector;
+               
+               while(list != NULL){
+                       
+                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
+                               
+                               //create output file
+                               string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fastarep";
+                               openOutputFile(outputFileName, out);
+
+                               cout << list->getLabel() << '\t' << count << endl;
+                               
+                               //for each bin in the list vector
+                               for (int i = 0; i < list->size(); i++) {
+                                       nameRep = FindRep(i);
+                                       
+                                       //print out name and sequence for that bin
+                                       sequence = fasta->getSequence(nameRep);
+
+                                       if (sequence != "not found") {
+                                               nameRep = nameRep + "bin" + toString(i+1);
+                                               out << ">" << nameRep << endl;
+                                               out << sequence << endl;
+                                       }else { 
+                                               cout << nameRep << " is missing from your fasta or name file. Please correct. " << endl; 
+                                               remove(outputFileName.c_str());
+                                               return 0;
+                                       }
+                               }
+                       }
+                       
+                       list = input->getListVector();
+                       count++;
+               }
+
+               
+               return 0;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GetOTURepCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+
+//**********************************************************************************************************************
+void GetOTURepCommand::readNamesFile() {
+       try {
+               vector<string> dupNames;
+               openInputFile(namesfile, inNames);
+               
+               string name, names, sequence;
+       
+               while(inNames){
+                       inNames >> name;                        //read from first column  A
+                       inNames >> names;               //read from second column  A,B,C,D
+                       
+                       dupNames.clear();
+                       
+                       //parse names into vector
+                       splitAtComma(names, dupNames);
+                       
+                       //store names in fasta map
+                       sequence = fasta->getSequence(name);
+                       for (int i = 0; i < dupNames.size(); i++) {
+                               fasta->push_back(dupNames[i], sequence);
+                       }
+               
+                       gobble(inNames);
+               }
+               inNames.close();
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GetOTURepCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+//**********************************************************************************************************************
+string GetOTURepCommand::FindRep(int bin) {
+       try{
+               vector<string> names;
+               map<string, float> sums;
+               map<string, float>::iterator it4;
+               map<int, string> binMap; //subset of namesToIndex - just member of this bin
+               string binnames;
+               float min = 10000;
+               string minName;
+               
+               binnames = list->get(bin);
+               
+               //parse names into vector
+               splitAtComma(binnames, names);
+               
+               //if only 1 sequence in bin then that's the rep
+               if (names.size() == 1) { return names[0]; }
+               else {
+                       //fill binMap
+                       for (int i = 0; i < names.size(); i++) {
+                               for (it3 = nameToIndex.begin(); it3 != nameToIndex.end(); it3++) {
+                                       if (it3->first == names[i]) {  
+                                               binMap[it3->second] = it3->first;
+
+                                               //initialize sums map
+                                               sums[it3->first] = 0.0;
+                                               break;
+                                       }
+                               }
+                       }
+                       
+                       //go through each cell in the sparsematrix
+                       for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
+                               //is this a distance between 2 members of this bin
+                               it = binMap.find(currentCell->row);
+                               it2 = binMap.find(currentCell->column);
+                               
+                               //sum the distance of the sequences in the bin to eachother
+                               if ((it != binMap.end()) && (it2 != binMap.end())) {
+                                       //this is a cell that repesents the distance between to of this bins members
+                                       sums[it->second] += currentCell->dist;
+                                       sums[it2->second] += currentCell->dist;
+                               }
+                       }
+                       
+                       //smallest sum is the representative
+                       for (it4 = sums.begin(); it4 != sums.end(); it4++) {
+                               if (it4->second < min) {
+                                       min = it4->second;
+                                       minName = it4->first;
+                               }
+
+                       }
+                       
+                       return minName;
+               }
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the GetOTURepCommand class Function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the GetOTURepCommand class function FindRep. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+
+
+
+
+
diff --git a/getoturepcommand.h b/getoturepcommand.h
new file mode 100644 (file)
index 0000000..d0343da
--- /dev/null
@@ -0,0 +1,56 @@
+#ifndef GETOTUREPCOMMAND_H
+#define GETOTUREPCOMMAND_H
+/*
+ *  getoturepcommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 4/6/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "globaldata.hpp"
+#include "sparsematrix.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
+#include "readmatrix.hpp"
+#include "fastamap.h"
+
+
+class GlobalData;
+
+typedef list<PCell>::iterator MatData;
+
+class GetOTURepCommand : public Command {
+       
+public:
+       GetOTURepCommand();     
+       ~GetOTURepCommand();
+       int execute();  
+       
+private:
+       GlobalData* globaldata;
+       SparseMatrix* matrix;
+       ListVector* list;
+       ListVector* listOfNames;
+       ReadMatrix* read;
+       InputData* input;
+       FastaMap* fasta;
+       string filename, fastafile, namesfile;
+       ofstream out;
+       ifstream in, inNames;
+       
+        
+       map<string, int> nameToIndex;  //maps sequence name to index in sparsematrix
+       map<int, string>::iterator it;
+       map<int, string>::iterator it2;
+       map<string, int>::iterator it3;
+       
+       void readNamesFile();
+       string FindRep(int); // returns name of "representative" sequence of given bin.
+
+};
+
+#endif
+
index 9f5b4150d7e9ed1c9446321a8c747e69216cfa3f..8e28cdeb1b1c41c9a9b353f3d8b94439ef3c5c86 100644 (file)
@@ -218,6 +218,15 @@ int HelpCommand::execute(){
                cout << "The default value for line and label are all lines in your inputfile." << "\n";
                cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
+       }else if (globaldata->helpRequest == "get.oturep") { 
+               cout << "The get.oturep command can only be executed after a successful read.dist command." << "\n";
+               cout << "The get.oturep command parameters are list, fasta, name, line and label.  The fasta and list parameters are required, and you may not use line and label at the same time." << "\n";
+               cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
+               cout << "The get.oturep command should be in the following format: get.oturep(fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n";
+               cout << "Example get.oturep(fasta=amazon.fasta, list=amazon.fn.list, line=1-3-5, name=amazon.names)." << "\n";
+               cout << "The default value for line and label are all lines in your inputfile." << "\n";
+               cout << "The get.oturep command outputs a .fastarep file for each distance you specify, selecting one OTU representative for each bin." << "\n";
+               cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "quit") {
                cout << "The quit command will terminate Dotur and should be in the following format: " << "\n";
                cout << "quit()" << "\n" << "\n";
index 2adbc0ee298ccc7b177d2c87be0c11cfb7df4f77..fed10caf2f62808bc0747a272dcf818989c5fb38 100644 (file)
@@ -18,6 +18,7 @@ ValidCommands::ValidCommands() {
                commands["read.otu"]                    = "read.otu";
                commands["read.tree"]                   = "read.tree"; 
                commands["bin.seqs"]                    = "bin.seqs"; 
+               commands["get.oturep"]                  = "get.oturep";
                commands["cluster"]                             = "cluster"; 
                commands["deconvolute"]                 = "deconvolute"; 
                commands["collect.single"]              = "collect.single"; 
index 5278d51fa3ef2da9429f12c89f4e12d4923d67fe..cb6db162b5ea52262fa51fc520ed6a41f99cb30b 100644 (file)
@@ -292,6 +292,9 @@ void ValidParameters::initCommandParameters() {
                string binseqsArray[] =  {"fasta","line","label","name"};
                commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string));
                
+               string getOTURepArray[] =  {"fasta","list","line","label","name"};
+               commandParameters["get.oturep"] = addParameters(getOTURepArray, sizeof(getOTURepArray)/sizeof(string));
+               
                string quitArray[] = {};
                commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string));