From f18d75f9c996cb619a5051a7664620c0c5ae05fb Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 6 Apr 2009 14:48:16 +0000 Subject: [PATCH] fixed memory leak in parsimony calculator and added progress bars to parsimony and unifrac.weighted commands. also added bin.seqs command. --- Mothur.xcodeproj/project.pbxproj | 12 ++- binsequencecommand.cpp | 164 +++++++++++++++++++++++++++++++ binsequencecommand.h | 42 ++++++++ collectcommand.cpp | 2 - collectcommand.h | 2 +- commandfactory.cpp | 2 + deconvolutecommand.cpp | 2 +- errorchecking.cpp | 51 ++++++++++ errorchecking.h | 1 + fastamap.cpp | 40 +++++--- fastamap.h | 13 +-- helpcommand.cpp | 9 ++ inputdata.cpp | 1 + parsimony.cpp | 2 + parsimonycommand.cpp | 29 +++++- parsimonycommand.h | 1 + readmatrix.cpp | 3 + readmatrix.hpp | 1 + tree.cpp | 2 + tree.h | 2 +- treenode.h | 2 +- unifracunweightedcommand.cpp | 5 +- unifracunweightedcommand.h | 1 + unifracweightedcommand.cpp | 10 ++ unifracweightedcommand.h | 1 + utilities.hpp | 25 +++++ validcommands.cpp | 1 + validparameter.cpp | 3 + 28 files changed, 394 insertions(+), 35 deletions(-) create mode 100644 binsequencecommand.cpp create mode 100644 binsequencecommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 17bf8ae..619737b 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -37,6 +37,7 @@ 37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; }; 37AFC71F0F445386005F492D /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */; }; 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; }; + 37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */; }; 37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; }; 37D928560F21331F001D4494 /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BA0F21331F001D4494 /* averagelinkage.cpp */; }; 37D928570F21331F001D4494 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927BB0F21331F001D4494 /* bootstrap.cpp */; }; @@ -195,6 +196,8 @@ 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedsobscollectsummary.cpp; sourceTree = ""; }; 37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = ""; }; 37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = ""; }; + 37C1D9710F86506E0059E3F0 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binsequencecommand.h; sourceTree = ""; }; + 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binsequencecommand.cpp; sourceTree = ""; }; 37D927B80F21331F001D4494 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; 37D927B90F21331F001D4494 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; 37D927BA0F21331F001D4494 /* averagelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = averagelinkage.cpp; sourceTree = ""; }; @@ -444,8 +447,6 @@ 37D928210F21331F001D4494 /* shared.h */, 37D928200F21331F001D4494 /* shared.cpp */, 37D928420F21331F001D4494 /* singlelinkage.cpp */, - 37D928450F21331F001D4494 /* sparsematrix.hpp */, - 37D928440F21331F001D4494 /* sparsematrix.cpp */, 37D928480F21331F001D4494 /* summarydata.h */, 37D928490F21331F001D4494 /* summarydisplay.h */, 37D9284C0F21331F001D4494 /* utilities.hpp */, @@ -558,6 +559,8 @@ isa = PBXGroup; children = ( 37D927CD0F21331F001D4494 /* command.hpp */, + 37C1D9710F86506E0059E3F0 /* binsequencecommand.h */, + 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */, 37D927C40F21331F001D4494 /* clustercommand.h */, 37D927C30F21331F001D4494 /* clustercommand.cpp */, 37D927C80F21331F001D4494 /* collectcommand.h */, @@ -576,8 +579,8 @@ 375873F00F7D64800040F377 /* heatmapcommand.cpp */, 37D927E40F21331F001D4494 /* helpcommand.h */, 37D927E30F21331F001D4494 /* helpcommand.cpp */, - 375873F30F7D648F0040F377 /* libshuffcommand.cpp */, 375873F40F7D648F0040F377 /* libshuffcommand.h */, + 375873F30F7D648F0040F377 /* libshuffcommand.cpp */, 375873F60F7D649C0040F377 /* nocommands.cpp */, 375873F70F7D649C0040F377 /* nocommands.h */, 37D927FA0F21331F001D4494 /* parselistcommand.h */, @@ -638,6 +641,8 @@ 37D928300F21331F001D4494 /* sharedrabundvector.cpp */, 37D928330F21331F001D4494 /* sharedsabundvector.h */, 37D928320F21331F001D4494 /* sharedsabundvector.cpp */, + 37D928450F21331F001D4494 /* sparsematrix.hpp */, + 37D928440F21331F001D4494 /* sparsematrix.cpp */, 37AD4DB90F28E2FE00AA2D49 /* tree.h */, 37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */, 379293C10F2DE73400B9034A /* treemap.h */, @@ -823,6 +828,7 @@ 37519A6B0F80E6EB00FED5E8 /* sharedanderbergs.cpp in Sources */, 37519AA10F810D0200FED5E8 /* venncommand.cpp in Sources */, 37519AB50F810FAE00FED5E8 /* venn.cpp in Sources */, + 37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp new file mode 100644 index 0000000..4db41bf --- /dev/null +++ b/binsequencecommand.cpp @@ -0,0 +1,164 @@ +/* + * binsequencecommand.cpp + * Mothur + * + * Created by Sarah Westcott on 4/3/09. + * Copyright 2009 Schloss Lab UMASS Amhers. All rights reserved. + * + */ + +#include "binsequencecommand.h" + +//********************************************************************************************************************** +BinSeqCommand::BinSeqCommand(){ + try { + globaldata = GlobalData::getInstance(); + fastafile = globaldata->getFastaFile(); + namesfile = globaldata->getNameFile(); + openInputFile(fastafile, in); + + fasta = new FastaMap(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function BinSeqCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + +BinSeqCommand::~BinSeqCommand(){ + delete input; + delete read; + delete fasta; + delete list; +} + +//********************************************************************************************************************** + +int BinSeqCommand::execute(){ + try { + int count = 1; + string binnames, name, sequence; + + //read fastafile + fasta->readFastaFile(in); + + //set format to list so input can get listvector + globaldata->setFormat("list"); + + //if user gave a namesfile then use it + if (namesfile != "") { + readNamesFile(); + } + + //read list file + read = new ReadPhilFile(globaldata->getListFile()); + read->read(&*globaldata); + + input = globaldata->ginput; + list = globaldata->gListVector; + + while(list != NULL){ + + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){ + + //create output file + string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta"; + openOutputFile(outputFileName, out); + + cout << list->getLabel() << '\t' << count << endl; + + //for each bin in the list vector + for (int i = 0; i < list->size(); i++) { + binnames = list->get(i); + while (binnames.find_first_of(',') != -1) { + name = binnames.substr(0,binnames.find_first_of(',')); + binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); + + //do work for that name + sequence = fasta->getSequence(name); + if (sequence != "not found") { + name = name + "bin" + toString(i+1); + out << ">" << name << endl; + out << sequence << endl; + }else { + cout << name << " is missing from your fasta or name file. Please correct. " << endl; + remove(outputFileName.c_str()); + return 0; + } + + } + + //get last name + sequence = fasta->getSequence(binnames); + if (sequence != "not found") { + name = binnames + "bin" + toString(i+1); + out << ">" << name << endl; + out << sequence << endl; + }else { + cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; + remove(outputFileName.c_str()); + return 0; + } + } + } + + list = input->getListVector(); + count++; + } + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** +void BinSeqCommand::readNamesFile() { + try { + vector dupNames; + openInputFile(namesfile, inNames); + + string name, names, sequence; + + while(inNames){ + inNames >> name; //read from first column A + inNames >> names; //read from second column A,B,C,D + + dupNames.clear(); + + //parse names into vector + splitAtComma(names, dupNames); + + //store names in fasta map + sequence = fasta->getSequence(name); + for (int i = 0; i < dupNames.size(); i++) { + fasta->push_back(dupNames[i], sequence); + } + + gobble(inNames); + } + inNames.close(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** diff --git a/binsequencecommand.h b/binsequencecommand.h new file mode 100644 index 0000000..22847dc --- /dev/null +++ b/binsequencecommand.h @@ -0,0 +1,42 @@ +#ifndef BINSEQCOMMAND_H +#define BINSEQCOMMAND_H +/* + * binsequencecommand.h + * Mothur + * + * Created by Sarah Westcott on 4/3/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + + +#include "command.hpp" +#include "inputdata.h" +#include "listvector.hpp" +#include "readmatrix.hpp" +#include "fastamap.h" + + +class GlobalData; + +class BinSeqCommand : public Command { + +public: + BinSeqCommand(); + ~BinSeqCommand(); + int execute(); + +private: + GlobalData* globaldata; + ListVector* list; + ReadMatrix* read; + InputData* input; + FastaMap* fasta; + string filename, fastafile, namesfile; + ofstream out; + ifstream in, inNames; + + void readNamesFile(); +}; + +#endif diff --git a/collectcommand.cpp b/collectcommand.cpp index b3e8d8f..5994af5 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -25,8 +25,6 @@ //********************************************************************************************************************** - - CollectCommand::CollectCommand(){ try { globaldata = GlobalData::getInstance(); diff --git a/collectcommand.h b/collectcommand.h index 93b5607..9c24339 100644 --- a/collectcommand.h +++ b/collectcommand.h @@ -12,7 +12,7 @@ #include "command.hpp" #include "ordervector.hpp" #include "inputdata.h" -#include "groupmap.h" +//#include "groupmap.h" #include "collect.h" #include "display.h" #include "readmatrix.hpp" diff --git a/commandfactory.cpp b/commandfactory.cpp index 732fc85..354b5a3 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -34,6 +34,7 @@ #include "venncommand.h" #include "mothur.h" #include "nocommands.h" +#include "binsequencecommand.h" /***********************************************************/ @@ -79,6 +80,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "libshuff") { command = new LibShuffCommand(); } else if(commandName == "heatmap") { command = new HeatMapCommand(); } else if(commandName == "venn") { command = new VennCommand(); } + else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } else { command = new NoCommand(); } return command; diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp index cf28221..36cc776 100644 --- a/deconvolutecommand.cpp +++ b/deconvolutecommand.cpp @@ -32,7 +32,7 @@ int DeconvoluteCommand::execute() { //print out new names file //file contains 2 columns separated by tabs. the first column is the groupname(name of first sequence found. //the second column is the list of names of identical sequences separated by ','. - fastamap->print(out); + fastamap->printNamesFile(out); fastamap->printCondensedFasta(outFasta); return 0; diff --git a/errorchecking.cpp b/errorchecking.cpp index 59cfb66..da1d1bb 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -228,6 +228,11 @@ bool ErrorCheck::checkInput(string input) { cout << "You must read a list, or a list and a group, or a shared before you can use the heatmap or venn commands." << endl; return false; } } + + if ((commandName == "bin.seqs")) { + if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; } + validateBinFiles(); + } return errorFree; } @@ -470,6 +475,52 @@ void ErrorCheck::validateReadPhil() { } /*******************************************************/ +/******************************************************/ +//This function checks to make sure the user entered appropriate +// format parameters on a bin.seq command +void ErrorCheck::validateBinFiles() { + try { + ifstream filehandle; + int ableToOpen; + + if (fastafile == "") { + cout << "fasta is a required parameter for bin.seqs." << endl; errorFree = false; + }else if (fastafile != "") { + //is it a valid filename' + ableToOpen = openInputFile(fastafile, filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } + }else if (globaldata->getNameFile() != "") { + //is it a valid filename' + ifstream filehandle; + int ableToOpen = openInputFile(globaldata->getNameFile(), filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } + }else if (namefile != "") { + //is it a valid filename' + ifstream filehandle; + int ableToOpen = openInputFile(namefile, filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } + } + + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ErrorCheck class Function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ErrorCheck class function validateBinFiles. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/*******************************************************/ + /******************************************************/ void ErrorCheck::clear() { diff --git a/errorchecking.h b/errorchecking.h index 42188c0..0d6da74 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -30,6 +30,7 @@ class ErrorCheck { void validateReadPhil(); void validateParseFiles(); void validateTreeFiles(); + void validateBinFiles(); void clear(); void refresh(); string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, cutoff, format; diff --git a/fastamap.cpp b/fastamap.cpp index 1fefdd1..97c090d 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -26,6 +26,7 @@ void FastaMap::readFastaFile(ifstream& in) { } else{ //input sequence info into map + seqmap[name] = sequence; it = data.find(sequence); if (it == data.end()) { //it's unique. data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. @@ -42,6 +43,7 @@ void FastaMap::readFastaFile(ifstream& in) { } //store last sequence and name info. + seqmap[name] = sequence; it = data.find(sequence); if (it == data.end()) { //it's unique. data[sequence].groupname = name; //group name will be the name of the first duplicate sequence found. @@ -50,7 +52,8 @@ void FastaMap::readFastaFile(ifstream& in) { }else { // its a duplicate. data[sequence].names += "," + name; data[sequence].groupnumber++; - } + } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readFastaFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -74,25 +77,34 @@ int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of i return data[seq].groupnumber; } /*******************************************************************************/ -void FastaMap::push_back(string seq, string Name) {//sequencename, name - data[seq].groupname = Name; - data[seq].names = Name; -} -/*******************************************************************************/ -void FastaMap::set(string seq, string groupName, string Names) { - data[seq].groupname = groupName; - data[seq].names = Names; -} +string FastaMap::getSequence(string name) { + it2 = seqmap.find(name); + if (it2 == seqmap.end()) { //it's not found + return "not found"; + }else { // found it + return it2->second; + } +} /*******************************************************************************/ -void FastaMap::clear() { //clears out data - data.clear(); +void FastaMap::push_back(string name, string seq) { + it = data.find(seq); + if (it == data.end()) { //it's unique. + data[seq].groupname = name; //group name will be the name of the first duplicate sequence found. + data[seq].groupnumber = 1; + data[seq].names = name; + }else { // its a duplicate. + data[seq].names += "," + name; + data[seq].groupnumber++; + } + + seqmap[name] = seq; } /*******************************************************************************/ -int FastaMap::size(){ //returns datas size which is the number of unique sequences +int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences return data.size(); } /*******************************************************************************/ -void FastaMap::print(ostream& out){ //prints data +void FastaMap::printNamesFile(ostream& out){ //prints data try { // two column file created with groupname and them list of identical sequence names for (it = data.begin(); it != data.end(); it++) { diff --git a/fastamap.h b/fastamap.h index 169974b..6dd4cba 100644 --- a/fastamap.h +++ b/fastamap.h @@ -30,13 +30,12 @@ public: string getGroupName(string); //pass a sequence name get its group int getGroupNumber(string); //pass a sequence name get number of sequence in its group string getNames(string); //pass a sequence get the string of names in the group separated by ','s. - void push_back(string, string); //sequencename, groupname - void set(string, string, string); //sequencename, groupname, groupnumber, names. - void clear(); - int size(); //returns number of unique sequences - void print(ostream&); //produces a 2 column file with the groupname in the first column and the names in the second column. + void push_back(string, string); //sequencename, sequence + int sizeUnique(); //returns number of unique sequences + void printNamesFile(ostream&); //produces a 2 column file with the groupname in the first column and the names in the second column - a names file. void printCondensedFasta(ostream&); //produces a fasta file. void readFastaFile(ifstream&); + string getSequence(string); //pass it a name of a sequence, it returns the sequence. private: struct group { @@ -45,8 +44,10 @@ private: string names; //the names of the sequence separated by ','. }; - map data; //sequence, groupinfo + map data; //sequence, groupinfo - condensed representation of file + map seqmap; //name, sequence - uncondensed representation of file map::iterator it; + map::iterator it2; }; #endif diff --git a/helpcommand.cpp b/helpcommand.cpp index d695773..9f5b415 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -209,6 +209,15 @@ int HelpCommand::execute(){ cout << "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups." << "\n"; cout << "The venn command outputs a .svg file for each calculator you specify at each distance you choose." << "\n"; cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + }else if (globaldata->helpRequest == "bin.seqs") { + cout << "The bin.seqs command can only be executed after a successful read.otu command of a list file." << "\n"; + cout << "The bin.seqs command parameters are fasta, name, line and label. The fasta parameter is required, and you may not use line and label at the same time." << "\n"; + cout << "The line and label allow you to select what distance levels you would like a output files created for, and are separated by dashes." << "\n"; + cout << "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, line=yourLines, label=yourLabels)." << "\n"; + cout << "Example bin.seqs(fasta=amazon.fasta, line=1-3-5, name=amazon.names)." << "\n"; + cout << "The default value for line and label are all lines in your inputfile." << "\n"; + cout << "The bin.seqs command outputs a .fasta file for each distance you specify appending the OTU number to each name." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n"; }else if (globaldata->helpRequest == "quit") { cout << "The quit command will terminate Dotur and should be in the following format: " << "\n"; cout << "quit()" << "\n" << "\n"; diff --git a/inputdata.cpp b/inputdata.cpp index 95e9d34..592367a 100644 --- a/inputdata.cpp +++ b/inputdata.cpp @@ -33,6 +33,7 @@ InputData::~InputData(){ InputData::InputData(string fName, string orderFileName, string f) : format(f){ try { + ifstream ofHandle; openInputFile(orderFileName, ofHandle); string name; diff --git a/parsimony.cpp b/parsimony.cpp index fc1a8b6..7d08adf 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -126,6 +126,8 @@ EstOutput Parsimony::getValues(Tree* t) { // cin >> hold; } + delete copyTree; + return data; } catch(exception& e) { diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 493af9b..190b7d6 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -51,7 +51,9 @@ ParsimonyCommand::ParsimonyCommand() { /***********************************************************/ int ParsimonyCommand::execute() { try { - + Progress* reading; + reading = new Progress("Comparing to random:", iters); + //get pscore for users tree userData.resize(numComp,0); //data = AB, AC, BC, ABC. randomData.resize(numComp,0); //data = AB, AC, BC, ABC. @@ -66,9 +68,10 @@ int ParsimonyCommand::execute() { //get pscores for users trees for (int i = 0; i < T.size(); i++) { userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC. - + //output scores for each combination for(int k = 0; k < numComp; k++) { + //update uscoreFreq it = uscoreFreq[k].find(userData[k]); if (it == uscoreFreq[k].end()) {//new score @@ -87,11 +90,13 @@ int ParsimonyCommand::execute() { for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(); + //create random relationships between nodes randT->assembleRandomTree(); + //get pscore of random tree randomData = pars->getValues(randT); - + for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores it2 = rscoreFreq[r].find(randomData[r]); @@ -105,18 +110,24 @@ int ParsimonyCommand::execute() { validScores[randomData[r]] = randomData[r]; } + //update progress bar + reading->update(j); + delete randT; } + }else { //get pscores for random trees for (int j = 0; j < iters; j++) { //create new tree with same num nodes and leaves as users randT = new Tree(); //create random relationships between nodes + randT->assembleRandomTree(); + //get pscore of random tree randomData = pars->getValues(randT); - + for(int r = 0; r < numComp; r++) { //add trees pscore to map of scores it2 = rscoreFreq[r].find(randomData[r]); @@ -130,10 +141,13 @@ int ParsimonyCommand::execute() { validScores[randomData[r]] = randomData[r]; } + //update progress bar + reading->update(j); + delete randT; } } - + for(int a = 0; a < numComp; a++) { float rcumul = 0.0000; float ucumul = 0.0000; @@ -162,6 +176,11 @@ int ParsimonyCommand::execute() { } } + //finish progress bar + reading->finish(); + delete reading; + + printParsimonyFile(); if (randomtree == "") { printUSummaryFile(); } diff --git a/parsimonycommand.h b/parsimonycommand.h index 8907e21..e093ceb 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -12,6 +12,7 @@ #include "command.hpp" #include "parsimony.h" #include "treemap.h" +#include "progress.hpp" using namespace std; diff --git a/readmatrix.cpp b/readmatrix.cpp index 529eef5..70774d7 100644 --- a/readmatrix.cpp +++ b/readmatrix.cpp @@ -342,6 +342,7 @@ void ReadPhilFile::read(GlobalData* globaldata){ //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file. //So when you run the collect or summary commands you miss a line. input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund. + inputList = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund. inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared. }else {//there is an orderfile input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat()); @@ -358,6 +359,8 @@ void ReadPhilFile::read(GlobalData* globaldata){ globaldata->gorder = order; //saving to be used by collect and rarefact commands. sabund = inputSabund->getSAbundVector(); globaldata->sabund = sabund; //saving to be used by summary command. + list = inputList->getListVector(); + globaldata->gListVector = list; }else if (globaldata->getFormat() == "shared") { SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands. //memory leak prevention diff --git a/readmatrix.hpp b/readmatrix.hpp index 44e7bf6..4e26881 100644 --- a/readmatrix.hpp +++ b/readmatrix.hpp @@ -82,6 +82,7 @@ private: string philFile; InputData* input; InputData* inputSabund; + InputData* inputList; ListVector* list; SharedListVector* SharedList; OrderVector* order; diff --git a/tree.cpp b/tree.cpp index 1e115bd..22892e2 100644 --- a/tree.cpp +++ b/tree.cpp @@ -49,6 +49,8 @@ Tree::Tree() { } } +/*****************************************************************/ +Tree::~Tree() {} /*****************************************************************/ int Tree::getIndex(string searchName) { try { diff --git a/tree.h b/tree.h index d6a3b55..ac57a4a 100644 --- a/tree.h +++ b/tree.h @@ -20,7 +20,7 @@ using namespace std; class Tree { public: Tree(); //to generate a tree from a file - ~Tree() {}; + ~Tree(); void getCopy(Tree*); //makes tree a copy of the one passed in. diff --git a/treenode.h b/treenode.h index a5c3916..a83bbca 100644 --- a/treenode.h +++ b/treenode.h @@ -20,7 +20,7 @@ using namespace std; class Node { public: Node(); //pass it the sequence name - ~Node() {}; + ~Node() { pGroups.clear(); pcount.clear(); }; void setName(string); void setGroup(string); diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 75ab996..65f30eb 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -36,7 +36,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { /***********************************************************/ int UnifracUnweightedCommand::execute() { try { - + userData.resize(numComp,0); //data[0] = unweightedscore randomData.resize(numComp,0); //data[0] = unweightedscore //create new tree with same num nodes and leaves as users @@ -85,6 +85,7 @@ int UnifracUnweightedCommand::execute() { //add randoms score to validscores validScores[randomData[k]] = randomData[k]; } + } for(int a = 0; a < numComp; a++) { @@ -101,6 +102,8 @@ int UnifracUnweightedCommand::execute() { UWScoreSig[a].push_back(rCumul[a][userData[a]]); } + + printUnweightedFile(); printUWSummaryFile(); diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h index 351e052..3c250db 100644 --- a/unifracunweightedcommand.h +++ b/unifracunweightedcommand.h @@ -14,6 +14,7 @@ #include "unweighted.h" #include "treemap.h" + using namespace std; class GlobalData; diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 7cb8f05..0a4c7fa 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -36,6 +36,8 @@ UnifracWeightedCommand::UnifracWeightedCommand() { /***********************************************************/ int UnifracWeightedCommand::execute() { try { + Progress* reading; + reading = new Progress("Comparing to random:", iters); //get weighted for users tree userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC... @@ -81,6 +83,10 @@ int UnifracWeightedCommand::execute() { count++; } } + + //update progress bar + reading->update(j); + } //removeValidScoresDuplicates(); @@ -109,6 +115,10 @@ int UnifracWeightedCommand::execute() { validScores.clear(); } + //finish progress bar + reading->finish(); + delete reading; + printWSummaryFile(); //clear out users groups diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h index e10aa44..ea2c418 100644 --- a/unifracweightedcommand.h +++ b/unifracweightedcommand.h @@ -13,6 +13,7 @@ #include "command.hpp" #include "weighted.h" #include "treemap.h" +#include "progress.hpp" using namespace std; diff --git a/utilities.hpp b/utilities.hpp index d1b3249..eea96bf 100644 --- a/utilities.hpp +++ b/utilities.hpp @@ -297,6 +297,31 @@ inline void splitAtDash(string& estim, set& container) { } /***********************************************************************/ +//This function parses the a string and puts peices in a vector +inline void splitAtComma(string& estim, vector& container) { + try { + string individual; + + while (estim.find_first_of(',') != -1) { + individual = estim.substr(0,estim.find_first_of(',')); + if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string + estim = estim.substr(estim.find_first_of(',')+1, estim.length()); + container.push_back(individual); + } + } + //get last one + container.push_back(estim); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the utilities class Function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the utilities class function splitAtComma. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ //This function splits up the various option parameters inline void splitAtComma(string& prefix, string& suffix){ diff --git a/validcommands.cpp b/validcommands.cpp index 20dd588..2adbc0e 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -17,6 +17,7 @@ ValidCommands::ValidCommands() { commands["read.dist"] = "read.dist"; commands["read.otu"] = "read.otu"; commands["read.tree"] = "read.tree"; + commands["bin.seqs"] = "bin.seqs"; commands["cluster"] = "cluster"; commands["deconvolute"] = "deconvolute"; commands["collect.single"] = "collect.single"; diff --git a/validparameter.cpp b/validparameter.cpp index 3806f50..5278d51 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -289,6 +289,9 @@ void ValidParameters::initCommandParameters() { string vennArray[] = {"groups","line","label","calc"}; commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); + string binseqsArray[] = {"fasta","line","label","name"}; + commandParameters["bin.seqs"] = addParameters(binseqsArray, sizeof(binseqsArray)/sizeof(string)); + string quitArray[] = {}; commandParameters["quit"] = addParameters(quitArray, sizeof(quitArray)/sizeof(string)); -- 2.39.2