]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed memory leak in parsimony calculator and added progress bars to parsimony and...
authorwestcott <westcott>
Mon, 6 Apr 2009 14:48:16 +0000 (14:48 +0000)
committerwestcott <westcott>
Mon, 6 Apr 2009 14:48:16 +0000 (14:48 +0000)
28 files changed:
Mothur.xcodeproj/project.pbxproj
binsequencecommand.cpp [new file with mode: 0644]
binsequencecommand.h [new file with mode: 0644]
collectcommand.cpp
collectcommand.h
commandfactory.cpp
deconvolutecommand.cpp
errorchecking.cpp
errorchecking.h
fastamap.cpp
fastamap.h
helpcommand.cpp
inputdata.cpp
parsimony.cpp
parsimonycommand.cpp
parsimonycommand.h
readmatrix.cpp
readmatrix.hpp
tree.cpp
tree.h
treenode.h
unifracunweightedcommand.cpp
unifracunweightedcommand.h
unifracweightedcommand.cpp
unifracweightedcommand.h
utilities.hpp
validcommands.cpp
validparameter.cpp

index 17bf8aedee96bbcddc1e10729a9a8a30a68ee2a4..619737b080a84c6175b6d9ac7628ee467495b175 100644 (file)
@@ -37,6 +37,7 @@
                37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; };
                37AFC71F0F445386005F492D /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */; };
                37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
+               37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */; };
                37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
                37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; };
                37D928570F21331F001D4494 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BB0F21331F001D4494 /* bootstrap.cpp */; };
                37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedsobscollectsummary.cpp; sourceTree = "<group>"; };
                37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = "<group>"; };
                37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = "<group>"; };
+               37C1D9710F86506E0059E3F0 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binsequencecommand.h; sourceTree = "<group>"; };
+               37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binsequencecommand.cpp; sourceTree = "<group>"; };
                37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
                37D927B90F21331F001D4494 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
                37D927BA0F21331F001D4494 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = averagelinkage.cpp; sourceTree = "<group>"; };
                                37D928210F21331F001D4494 /* shared.h */,
                                37D928200F21331F001D4494 /* shared.cpp */,
                                37D928420F21331F001D4494 /* singlelinkage.cpp */,
-                               37D928450F21331F001D4494 /* sparsematrix.hpp */,
-                               37D928440F21331F001D4494 /* sparsematrix.cpp */,
                                37D928480F21331F001D4494 /* summarydata.h */,
                                37D928490F21331F001D4494 /* summarydisplay.h */,
                                37D9284C0F21331F001D4494 /* utilities.hpp */,
                        isa = PBXGroup;
                        children = (
                                37D927CD0F21331F001D4494 /* command.hpp */,
+                               37C1D9710F86506E0059E3F0 /* binsequencecommand.h */,
+                               37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */,
                                37D927C40F21331F001D4494 /* clustercommand.h */,
                                37D927C30F21331F001D4494 /* clustercommand.cpp */,
                                37D927C80F21331F001D4494 /* collectcommand.h */,
                                375873F00F7D64800040F377 /* heatmapcommand.cpp */,
                                37D927E40F21331F001D4494 /* helpcommand.h */,
                                37D927E30F21331F001D4494 /* helpcommand.cpp */,
-                               375873F30F7D648F0040F377 /* libshuffcommand.cpp */,
                                375873F40F7D648F0040F377 /* libshuffcommand.h */,
+                               375873F30F7D648F0040F377 /* libshuffcommand.cpp */,
                                375873F60F7D649C0040F377 /* nocommands.cpp */,
                                375873F70F7D649C0040F377 /* nocommands.h */,
                                37D927FA0F21331F001D4494 /* parselistcommand.h */,
                                37D928300F21331F001D4494 /* sharedrabundvector.cpp */,
                                37D928330F21331F001D4494 /* sharedsabundvector.h */,
                                37D928320F21331F001D4494 /* sharedsabundvector.cpp */,
+                               37D928450F21331F001D4494 /* sparsematrix.hpp */,
+                               37D928440F21331F001D4494 /* sparsematrix.cpp */,
                                37AD4DB90F28E2FE00AA2D49 /* tree.h */,
                                37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */,
                                379293C10F2DE73400B9034A /* treemap.h */,
                                37519A6B0F80E6EB00FED5E8 /* sharedanderbergs.cpp in Sources */,
                                37519AA10F810D0200FED5E8 /* venncommand.cpp in Sources */,
                                37519AB50F810FAE00FED5E8 /* venn.cpp in Sources */,
+                               37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp
new file mode 100644 (file)
index 0000000..4db41bf
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ *  binsequencecommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 4/3/09.
+ *  Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved.
+ *
+ */
+
+#include "binsequencecommand.h"
+
+//**********************************************************************************************************************
+BinSeqCommand::BinSeqCommand(){
+       try {
+               globaldata = GlobalData::getInstance();
+               fastafile = globaldata->getFastaFile();
+               namesfile = globaldata->getNameFile();
+               openInputFile(fastafile, in);
+               
+               fasta = new FastaMap();
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+
+//**********************************************************************************************************************
+
+BinSeqCommand::~BinSeqCommand(){
+       delete input;
+       delete read;
+       delete fasta;
+       delete list;
+}
+
+//**********************************************************************************************************************
+
+int BinSeqCommand::execute(){
+       try {
+               int count = 1;
+               string binnames, name, sequence;
+               
+               //read fastafile
+               fasta->readFastaFile(in);
+               
+               //set format to list so input can get listvector
+               globaldata->setFormat("list");
+               
+               //if user gave a namesfile then use it
+               if (namesfile != "") {
+                       readNamesFile();
+               }
+               
+               //read list file
+               read = new ReadPhilFile(globaldata->getListFile());     
+               read->read(&*globaldata); 
+               
+               input = globaldata->ginput;
+               list = globaldata->gListVector;
+                               
+               while(list != NULL){
+                       
+                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
+                               
+                               //create output file
+                               string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
+                               openOutputFile(outputFileName, out);
+
+                               cout << list->getLabel() << '\t' << count << endl;
+                               
+                               //for each bin in the list vector
+                               for (int i = 0; i < list->size(); i++) {
+                                       binnames = list->get(i);
+                                       while (binnames.find_first_of(',') != -1) { 
+                                               name = binnames.substr(0,binnames.find_first_of(','));
+                                               binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
+                                               
+                                               //do work for that name
+                                               sequence = fasta->getSequence(name);
+                                               if (sequence != "not found") {
+                                                       name = name + "bin" + toString(i+1);
+                                                       out << ">" << name << endl;
+                                                       out << sequence << endl;
+                                               }else { 
+                                                       cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
+                                                       remove(outputFileName.c_str());
+                                                       return 0;
+                                               }
+                                               
+                                       }
+                                       
+                                       //get last name
+                                       sequence = fasta->getSequence(binnames);
+                                       if (sequence != "not found") {
+                                               name = binnames + "bin" + toString(i+1);
+                                               out << ">" << name << endl;
+                                               out << sequence << endl;
+                                       }else { 
+                                               cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
+                                               remove(outputFileName.c_str());
+                                               return 0;
+                                       }
+                               }
+                       }
+                       
+                       list = input->getListVector();
+                       count++;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+
+//**********************************************************************************************************************
+void BinSeqCommand::readNamesFile() {
+       try {
+               vector<string> dupNames;
+               openInputFile(namesfile, inNames);
+               
+               string name, names, sequence;
+       
+               while(inNames){
+                       inNames >> name;                        //read from first column  A
+                       inNames >> names;               //read from second column  A,B,C,D
+                       
+                       dupNames.clear();
+                       
+                       //parse names into vector
+                       splitAtComma(names, dupNames);
+                       
+                       //store names in fasta map
+                       sequence = fasta->getSequence(name);
+                       for (int i = 0; i < dupNames.size(); i++) {
+                               fasta->push_back(dupNames[i], sequence);
+                       }
+               
+                       gobble(inNames);
+               }
+               inNames.close();
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+//**********************************************************************************************************************
diff --git a/binsequencecommand.h b/binsequencecommand.h
new file mode 100644 (file)
index 0000000..22847dc
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef BINSEQCOMMAND_H
+#define BINSEQCOMMAND_H
+/*
+ *  binsequencecommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 4/3/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "readmatrix.hpp"
+#include "fastamap.h"
+
+
+class GlobalData;
+
+class BinSeqCommand : public Command {
+       
+public:
+       BinSeqCommand();        
+       ~BinSeqCommand();
+       int execute();  
+       
+private:
+       GlobalData* globaldata;
+       ListVector* list;
+       ReadMatrix* read;
+       InputData* input;
+       FastaMap* fasta;
+       string filename, fastafile, namesfile;
+       ofstream out;
+       ifstream in, inNames;
+       
+       void readNamesFile();
+};
+
+#endif
index b3e8d8fbfa817900f6f1802310d9c18680bc1c15..5994af594ae182f63087cd9c08ca6f96ec4a1187 100644 (file)
@@ -25,8 +25,6 @@
 
 
 //**********************************************************************************************************************
-
-
 CollectCommand::CollectCommand(){
        try {
                globaldata = GlobalData::getInstance();
index 93b56073dee7955dc60d410347da82b4bf6d2dc2..9c24339d3d3745a1b2a6591af023c0fbb7ab3901 100644 (file)
@@ -12,7 +12,7 @@
 #include "command.hpp"
 #include "ordervector.hpp"
 #include "inputdata.h"
-#include "groupmap.h"
+//#include "groupmap.h"
 #include "collect.h"
 #include "display.h"
 #include "readmatrix.hpp"
index 732fc8576b791b35bb8c5ee50a75c1cc19992de1..354b5a3ba4de65d6576e408ff880930a11c0c1e0 100644 (file)
@@ -34,6 +34,7 @@
 #include "venncommand.h"
 #include "mothur.h"
 #include "nocommands.h"
+#include "binsequencecommand.h"
 
 
 /***********************************************************/
@@ -79,6 +80,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "libshuff")              {   command = new LibShuffCommand();                    }
                else if(commandName == "heatmap")                               {   command = new HeatMapCommand();                             }
                else if(commandName == "venn")                                  {   command = new VennCommand();                                }
+               else if(commandName == "bin.seqs")                              {   command = new BinSeqCommand();                              }
                else                                                                                    {       command = new NoCommand();                                      }
 
                return command;
index cf28221b7c61576954a73643d039f3055002a853..36cc7766f03097235fa274f12330611d616cdacb 100644 (file)
@@ -32,7 +32,7 @@ int DeconvoluteCommand::execute() {
                //print out new names file 
                //file contains 2 columns separated by tabs.  the first column is the groupname(name of first sequence found.
                //the second column is the list of names of identical sequences separated by ','.
-               fastamap->print(out);
+               fastamap->printNamesFile(out);
                fastamap->printCondensedFasta(outFasta);
        
                return 0;
index 59cfb66fcc3d0f3e7136eabfebeacb4cd419d76c..da1d1bbb614ffd78f03484c3ca956e3f5aba21ac 100644 (file)
@@ -228,6 +228,11 @@ bool ErrorCheck::checkInput(string input) {
                                 cout << "You must read a list, or a list and a group, or a shared before you can use the heatmap or venn commands." << endl; return false; 
                        }
                }
+               
+               if ((commandName == "bin.seqs")) { 
+                       if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; }
+                       validateBinFiles();
+               }
 
                return errorFree;
 }
@@ -470,6 +475,52 @@ void ErrorCheck::validateReadPhil() {
 }
 /*******************************************************/
 
+/******************************************************/
+//This function checks to make sure the user entered appropriate
+// format parameters on a bin.seq command
+void ErrorCheck::validateBinFiles() {
+       try {
+               ifstream filehandle;
+               int ableToOpen;
+               
+               if (fastafile == "") {
+                               cout << "fasta is a required parameter for bin.seqs." << endl; errorFree = false; 
+               }else if (fastafile != "") {
+                       //is it a valid filename'
+                       ableToOpen = openInputFile(fastafile, filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
+               }else if (globaldata->getNameFile() != "") {
+                       //is it a valid filename'
+                       ifstream filehandle;
+                       int ableToOpen = openInputFile(globaldata->getNameFile(), filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
+               }else if (namefile != "") {
+                       //is it a valid filename'
+                       ifstream filehandle;
+                       int ableToOpen = openInputFile(namefile, filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
+               }
+
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the ErrorCheck class function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+
+/*******************************************************/
+
 /******************************************************/
 
 void ErrorCheck::clear() {
index 42188c06f18f2eaeec08fbc4fc3f855e95d87cb2..0d6da746edaf0d7d5aad0158aaf8fd41c7656572 100644 (file)
@@ -30,6 +30,7 @@ class ErrorCheck {
                void validateReadPhil();
                void validateParseFiles();
                void validateTreeFiles();
+               void validateBinFiles();
                void clear();
                void refresh();
                string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, cutoff, format; 
index 1fefdd15c144edad253892a56be7917b0ea1320b..97c090d34564f92363da4ef1340853b7a204cdee 100644 (file)
@@ -26,6 +26,7 @@ void FastaMap::readFastaFile(ifstream& in) {
                                }
                                else{
                                //input sequence info into map
+                                       seqmap[name] = sequence;  
                                        it = data.find(sequence);
                                        if (it == data.end()) {         //it's unique.
                                                data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
@@ -42,6 +43,7 @@ void FastaMap::readFastaFile(ifstream& in) {
                }
        
                //store last sequence and name info.
+               seqmap[name] = sequence;
                it = data.find(sequence);
                if (it == data.end()) {         //it's unique.
                        data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
@@ -50,7 +52,8 @@ void FastaMap::readFastaFile(ifstream& in) {
                }else { // its a duplicate.
                        data[sequence].names += "," + name;
                        data[sequence].groupnumber++;
-               }       
+               }
+                       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -74,25 +77,34 @@ int FastaMap::getGroupNumber(string seq) {  //pass a sequence get the number of i
        return data[seq].groupnumber;
 }
 /*******************************************************************************/
-void FastaMap::push_back(string seq, string Name) {//sequencename, name
-       data[seq].groupname = Name;
-       data[seq].names = Name;
-}
-/*******************************************************************************/
-void FastaMap::set(string seq, string groupName, string Names) {
-       data[seq].groupname = groupName;
-       data[seq].names = Names;
-}
+string FastaMap::getSequence(string name) {
+       it2 = seqmap.find(name);
+       if (it2 == seqmap.end()) {      //it's not found
+               return "not found";
+       }else { // found it
+               return it2->second;
+       }
+}      
 /*******************************************************************************/
-void FastaMap::clear() { //clears out data
-       data.clear();
+void FastaMap::push_back(string name, string seq) {
+       it = data.find(seq);
+       if (it == data.end()) {         //it's unique.
+               data[seq].groupname = name;  //group name will be the name of the first duplicate sequence found.
+               data[seq].groupnumber = 1;
+               data[seq].names = name;
+       }else { // its a duplicate.
+               data[seq].names += "," + name;
+               data[seq].groupnumber++;
+       }
+       
+       seqmap[name] = seq;
 }
 /*******************************************************************************/
-int FastaMap::size(){ //returns datas size which is the number of unique sequences
+int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences
        return data.size();
 }
 /*******************************************************************************/
-void FastaMap::print(ostream& out){ //prints data
+void FastaMap::printNamesFile(ostream& out){ //prints data
        try {
                // two column file created with groupname and them list of identical sequence names
                for (it = data.begin(); it != data.end(); it++) {
index 169974b27f107a00ba2a0f7738d7551160a37c62..6dd4cba381c08e4f0717f996beeb762ee146aaa4 100644 (file)
@@ -30,13 +30,12 @@ public:
        string getGroupName(string);  //pass a sequence name get its group
        int getGroupNumber(string);  //pass a sequence name get number of sequence in its group
        string getNames(string);        //pass a sequence get the string of names in the group separated by ','s.
-       void push_back(string, string); //sequencename, groupname
-       void set(string, string, string); //sequencename, groupname, groupnumber, names.
-       void clear();
-       int size();                                     //returns number of unique sequences
-       void print(ostream&);           //produces a 2 column file with the groupname in the first column and the names in the second column.
+       void push_back(string, string); //sequencename, sequence
+       int sizeUnique();                                       //returns number of unique sequences
+       void printNamesFile(ostream&);          //produces a 2 column file with the groupname in the first column and the names in the second column - a names file.
        void printCondensedFasta(ostream&);             //produces a fasta file.
        void readFastaFile(ifstream&);
+       string getSequence(string);             //pass it a name of a sequence, it returns the sequence.
 
 private:
        struct group {
@@ -45,8 +44,10 @@ private:
                string names;                                           //the names of the sequence separated by ','.
        };
 
-       map<string, group>  data;  //sequence, groupinfo
+       map<string, group>  data;  //sequence, groupinfo        - condensed representation of file
+       map<string, string>  seqmap;  //name, sequence  -  uncondensed representation of file
        map<string, group>::iterator it;
+       map<string, string>::iterator it2;
 };
 
 #endif
index d69577311ec2a091e5ff46673e92ba13119db252..9f5b4150d7e9ed1c9446321a8c747e69216cfa3f 100644 (file)
@@ -209,6 +209,15 @@ int HelpCommand::execute(){
                cout << "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups." << "\n";
                cout << "The venn command outputs a .svg file for each calculator you specify at each distance you choose." << "\n";
                cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
+       }else if (globaldata->helpRequest == "bin.seqs") { 
+               cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n";
+               cout << "The bin.seqs command parameters are fasta, name, line and label.  The fasta parameter is required, and you may not use line and label at the same time." << "\n";
+               cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n";
+               cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n";
+               cout << "Example bin.seqs(fasta=amazon.fasta, line=1-3-5, name=amazon.names)." << "\n";
+               cout << "The default value for line and label are all lines in your inputfile." << "\n";
+               cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n";
+               cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "quit") {
                cout << "The quit command will terminate Dotur and should be in the following format: " << "\n";
                cout << "quit()" << "\n" << "\n";
index 95e9d34974e005b50629d48502257fd7579f08c9..592367a5fbf0c07f0587a577551654173eb5e15a 100644 (file)
@@ -33,6 +33,7 @@ InputData::~InputData(){
 
 InputData::InputData(string fName, string orderFileName, string f) : format(f){
        try {
+               
                ifstream ofHandle;
                openInputFile(orderFileName, ofHandle);
                string name;
index fc1a8b6f48821a3d66aa1fb9c70799260c56c241..7d08adf8ad3b0a0ad5a25bc98bbf74d1bacda3b9 100644 (file)
@@ -126,6 +126,8 @@ EstOutput Parsimony::getValues(Tree* t) {
 //                     cin >> hold;
                }
                
+               delete copyTree;
+               
                return data;
        }
        catch(exception& e) {
index 493af9b867e5542ee4c4c93acfa6c40a9c95fc5a..190b7d6ce1b913c733e52b974451cb9b3d81664b 100644 (file)
@@ -51,7 +51,9 @@ ParsimonyCommand::ParsimonyCommand() {
 /***********************************************************/
 int ParsimonyCommand::execute() {
        try {
-       
+               Progress* reading;
+               reading = new Progress("Comparing to random:", iters);
+               
                //get pscore for users tree
                userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
                randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
@@ -66,9 +68,10 @@ int ParsimonyCommand::execute() {
                        //get pscores for users trees
                        for (int i = 0; i < T.size(); i++) {
                                userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
-                               
+
                                //output scores for each combination
                                for(int k = 0; k < numComp; k++) {
+
                                        //update uscoreFreq
                                        it = uscoreFreq[k].find(userData[k]);
                                        if (it == uscoreFreq[k].end()) {//new score
@@ -87,11 +90,13 @@ int ParsimonyCommand::execute() {
                        for (int j = 0; j < iters; j++) {
                                //create new tree with same num nodes and leaves as users
                                randT = new Tree();
+
                                //create random relationships between nodes
                                randT->assembleRandomTree();
+
                                //get pscore of random tree
                                randomData = pars->getValues(randT);
-                               
+                                       
                                for(int r = 0; r < numComp; r++) {
                                        //add trees pscore to map of scores
                                        it2 = rscoreFreq[r].find(randomData[r]);
@@ -105,18 +110,24 @@ int ParsimonyCommand::execute() {
                                        validScores[randomData[r]] = randomData[r];
                                }
                                
+                               //update progress bar
+                               reading->update(j);
+                               
                                delete randT;
                        }
+
                }else {
                        //get pscores for random trees
                        for (int j = 0; j < iters; j++) {
                                //create new tree with same num nodes and leaves as users
                                randT = new Tree();
                                //create random relationships between nodes
+
                                randT->assembleRandomTree();
+
                                //get pscore of random tree
                                randomData = pars->getValues(randT);
-                               
+                       
                                for(int r = 0; r < numComp; r++) {
                                        //add trees pscore to map of scores
                                        it2 = rscoreFreq[r].find(randomData[r]);
@@ -130,10 +141,13 @@ int ParsimonyCommand::execute() {
                                        validScores[randomData[r]] = randomData[r];
                                }
                                
+                               //update progress bar
+                               reading->update(j);
+                               
                                delete randT;
                        }
                }
-               
+
                for(int a = 0; a < numComp; a++) {
                        float rcumul = 0.0000;
                        float ucumul = 0.0000;
@@ -162,6 +176,11 @@ int ParsimonyCommand::execute() {
                        }
                }
                
+               //finish progress bar
+               reading->finish();
+               delete reading;
+
+               
                printParsimonyFile();
                if (randomtree == "") { printUSummaryFile(); }
                
index 8907e2165c82ac291a902fe04ca3dd00638bd3db..e093ceb2de8efbbacf2d58e4d46257563b0797c3 100644 (file)
@@ -12,6 +12,7 @@
 #include "command.hpp"
 #include "parsimony.h"
 #include "treemap.h"
+#include "progress.hpp"
 
 using namespace std;
 
index 529eef5c7cf8b70e12f30dde6b4c2b661a01207f..70774d7e06819705a77b51bd38dedc69815ab240 100644 (file)
@@ -342,6 +342,7 @@ void ReadPhilFile::read(GlobalData* globaldata){
                        //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.  
                        //So when you run the collect or summary commands you miss a line.
                        input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
+                       inputList = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
                        inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
                }else {//there is an orderfile
                        input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
@@ -358,6 +359,8 @@ void ReadPhilFile::read(GlobalData* globaldata){
                        globaldata->gorder = order;     //saving to be used by collect and rarefact commands.
                        sabund = inputSabund->getSAbundVector(); 
                        globaldata->sabund = sabund; //saving to be used by summary command.
+                       list = inputList->getListVector();
+                       globaldata->gListVector = list;
                }else if (globaldata->getFormat() == "shared") {
                        SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
                        //memory leak prevention
index 44e7bf6a61932122b8e6212f01cb1773b91de9ed..4e26881d1c883d34b4262bb1c38935525cc6f565 100644 (file)
@@ -82,6 +82,7 @@ private:
        string philFile;
        InputData* input;
        InputData* inputSabund;
+       InputData* inputList;
        ListVector* list;
        SharedListVector* SharedList;
        OrderVector* order;
index 1e115bddad1a451c181c4c26b90b758788df1155..22892e2cbd7c9738261d5632492eafe674e7471b 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -49,6 +49,8 @@ Tree::Tree() {
        }               
 }
 
+/*****************************************************************/
+Tree::~Tree() {}
 /*****************************************************************/
 int Tree::getIndex(string searchName) {
        try {
diff --git a/tree.h b/tree.h
index d6a3b556f09ca251a2cbad64e07c0187dd50e6b4..ac57a4a8168701e5d9f285537a8550de3f946bfa 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -20,7 +20,7 @@ using namespace std;
 class Tree {
 public: 
        Tree();         //to generate a tree from a file
-       ~Tree() {};
+       ~Tree();
        
        
        void getCopy(Tree*);  //makes tree a copy of the one passed in.
index a5c39167ca6bda64c8058e99b16a02ca4e07c748..a83bbca615d6300e113e4fdf402e64934adb189c 100644 (file)
@@ -20,7 +20,7 @@ using namespace std;
 class Node  {
        public:
                Node();  //pass it the sequence name
-               ~Node() {};
+               ~Node() { pGroups.clear(); pcount.clear(); };
                
                void setName(string);
                void setGroup(string);  
index 75ab99682974b3152d3c5020130855788ef5f151..65f30ebc949f4945023fd295224d00d52827d82b 100644 (file)
@@ -36,7 +36,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() {
 /***********************************************************/
 int UnifracUnweightedCommand::execute() {
        try {
-       
+
                userData.resize(numComp,0);  //data[0] = unweightedscore 
                randomData.resize(numComp,0); //data[0] = unweightedscore
                //create new tree with same num nodes and leaves as users
@@ -85,6 +85,7 @@ int UnifracUnweightedCommand::execute() {
                                        //add randoms score to validscores
                                        validScores[randomData[k]] = randomData[k];
                                }
+                               
                        }
                
                for(int a = 0; a < numComp; a++) {
@@ -101,6 +102,8 @@ int UnifracUnweightedCommand::execute() {
                        UWScoreSig[a].push_back(rCumul[a][userData[a]]);
                }
                
+               
+               
                printUnweightedFile();
                printUWSummaryFile();
                
index 351e05212437174a9efeb09ecbecc9a7da53f449..3c250db030b04740e59df7ce24ba6b2752dc2023 100644 (file)
@@ -14,6 +14,7 @@
 #include "unweighted.h"
 #include "treemap.h"
 
+
 using namespace std;
 
 class GlobalData;
index 7cb8f058e8a36cea95dcefd32196b3acab024b5e..0a4c7facec93938af068ebad350307ab6f5d4c0d 100644 (file)
@@ -36,6 +36,8 @@ UnifracWeightedCommand::UnifracWeightedCommand() {
 /***********************************************************/
 int UnifracWeightedCommand::execute() {
        try {
+               Progress* reading;
+               reading = new Progress("Comparing to random:", iters);
                
                //get weighted for users tree
                userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
@@ -81,6 +83,10 @@ int UnifracWeightedCommand::execute() {
                                                count++;
                                        }
                                }
+                               
+                               //update progress bar
+                               reading->update(j);
+
                        }
 
                        //removeValidScoresDuplicates(); 
@@ -109,6 +115,10 @@ int UnifracWeightedCommand::execute() {
                        validScores.clear();
                }
                
+               //finish progress bar
+               reading->finish();
+               delete reading;
+               
                printWSummaryFile();
                
                //clear out users groups
index e10aa44e8462d4690e095e4ab1ef37ea8542e9cd..ea2c41814ac1940a61bd8c48995ecebcff30f324 100644 (file)
@@ -13,6 +13,7 @@
 #include "command.hpp"
 #include "weighted.h"
 #include "treemap.h"
+#include "progress.hpp"
 
 using namespace std;
 
index d1b32490beca985771529fb0665842109272329b..eea96bffafd5408b95ec87c2ed7630a16ad36167 100644 (file)
@@ -297,6 +297,31 @@ inline void splitAtDash(string& estim, set<int>& container) {
 
 }
 /***********************************************************************/
+//This function parses the a string and puts peices in a vector
+inline void splitAtComma(string& estim, vector<string>& container) {
+       try {
+               string individual;
+               
+               while (estim.find_first_of(',') != -1) {
+                       individual = estim.substr(0,estim.find_first_of(','));
+                       if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
+                               estim = estim.substr(estim.find_first_of(',')+1, estim.length());
+                               container.push_back(individual);
+                       }
+               }
+               //get last one
+               container.push_back(estim);
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the utilities class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the utilities class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/***********************************************************************/
 
 //This function splits up the various option parameters
 inline void splitAtComma(string& prefix, string& suffix){
index 20dd588bf85b14fe15d3534471200bd76b3a231e..2adbc0ee298ccc7b177d2c87be0c11cfb7df4f77 100644 (file)
@@ -17,6 +17,7 @@ ValidCommands::ValidCommands() {
                commands["read.dist"]                   = "read.dist"; 
                commands["read.otu"]                    = "read.otu";
                commands["read.tree"]                   = "read.tree"; 
+               commands["bin.seqs"]                    = "bin.seqs"; 
                commands["cluster"]                             = "cluster"; 
                commands["deconvolute"]                 = "deconvolute"; 
                commands["collect.single"]              = "collect.single"; 
index 3806f5054b9c2a0faad7787c9ff9549b13c1fda6..5278d51fa3ef2da9429f12c89f4e12d4923d67fe 100644 (file)
@@ -289,6 +289,9 @@ void ValidParameters::initCommandParameters() {
                string vennArray[] =  {"groups","line","label","calc"};
                commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string));
                
+               string binseqsArray[] =  {"fasta","line","label","name"};
+               commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string));
+               
                string quitArray[] = {};
                commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string));