From 37a229b590342d94cd22bb1f7f4127e85197a27f Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 13 May 2009 15:19:07 +0000 Subject: [PATCH] fixed bug with getting sharedrabunds for venn, heatmap and tree.shared commands, speeding them up a bit. --- distancecommand.cpp | 12 +---- helpcommand.cpp | 12 +++++ inputdata.cpp | 9 ++-- readdistcommand.cpp | 3 ++ readotucommand.cpp | 4 +- sharedlistvector.cpp | 10 +++- sharedordervector.cpp | 14 ++--- sharedrabundvector.cpp | 120 +++++++++++++++++++++++++++++++++++------ sharedrabundvector.h | 9 +++- sharedutilities.cpp | 2 +- sharedutilities.h | 3 +- treegroupscommand.cpp | 30 ++++------- 12 files changed, 161 insertions(+), 67 deletions(-) diff --git a/distancecommand.cpp b/distancecommand.cpp index 0714ab3..90ffb8d 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -52,8 +52,6 @@ DistanceCommand::DistanceCommand(){ //reset calc for next command globaldata->setCalc(""); - - } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -138,16 +136,12 @@ int DistanceCommand::execute(){ int pid2 = fork(); if(pid2 > 0){ driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff); - //system(("cat " + distFile + "tempa" + " >> " + distFile).c_str()); appendFiles(distFile+"tempa", distFile); - //system(("rm " + distFile + "tempa").c_str()); remove((distFile + "tempa").c_str()); } else{ driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff); - //system(("cat " + distFile + "tempb" + " >> " + distFile).c_str()); appendFiles(distFile+"tempb", distFile); - //system(("rm " + distFile + "tempb").c_str()); remove((distFile + "tempb").c_str()); } wait(NULL); @@ -156,16 +150,12 @@ int DistanceCommand::execute(){ int pid3 = fork(); if(pid3 > 0){ driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff); - //system(("cat " + distFile + "tempc" + " >> " + distFile).c_str()); appendFiles(distFile+"tempc", distFile); - //system(("rm " + distFile + "tempc").c_str()); remove((distFile + "tempc").c_str()); } else{ driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff); - //system(("cat " + distFile + "tempd" + " >> " + distFile).c_str()); appendFiles(distFile+"tempd", distFile); - //system(("rm " + distFile + "tempd").c_str()); remove((distFile + "tempd").c_str()); } wait(NULL); @@ -207,7 +197,6 @@ int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLi if(dist <= cutoff){ distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl; -//cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl; } } @@ -264,3 +253,4 @@ void DistanceCommand::appendFiles(string temp, string filename) { exit(1); } } +/**************************************************************************************************/ \ No newline at end of file diff --git a/helpcommand.cpp b/helpcommand.cpp index 8060d5f..5ea5158 100644 --- a/helpcommand.cpp +++ b/helpcommand.cpp @@ -70,6 +70,18 @@ int HelpCommand::execute(){ cout << "The deconvolute command parameter is fasta and it is required." << "\n"; cout << "The deconvolute command should be in the following format: " << "\n"; cout << "deconvolute(fasta=yourFastaFile) " << "\n"; + }else if (globaldata->helpRequest == "distance") { + cout << "The distance command reads a file containing sequences and creates a distance file." << "\n"; + cout << "The distance command parameters are fasta, phylip, clustal, nexus, calc, ends, cutoff and processors. " << "\n"; + cout << "You must use one of the following parameters for your filename: fasta, phylip, clustal or nexus. " << "\n"; + cout << "The calc parameter allows you to specify the method of calculating the distances. Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n"; + cout << "The ends parameter allows you to specify whether to include terminal gaps in distance. Your options are: T or F. The default is T." << "\n"; + cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n"; + cout << "The processors parameter allows you to specify number of processors to use. The default is 1, but you can use up to 4 processors." << "\n"; + cout << "The distance command should be in the following format: " << "\n"; + cout << "distance(fasta=yourFastaFile, calc=yourCalc, ends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n"; + cout << "Example distance(fasta=amazon.fasta, calc=eachgap, ends=F, cutoff= 2.0, processors=3)." << "\n"; + cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n"; }else if (globaldata->helpRequest == "collect.single") { cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n"; cout << "The collect.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster." << "\n"; diff --git a/inputdata.cpp b/inputdata.cpp index 2c249e6..05eaaca 100644 --- a/inputdata.cpp +++ b/inputdata.cpp @@ -183,14 +183,14 @@ OrderVector* InputData::getOrderVector(){ } } /***********************************************************************/ - +//this is used when you don't need the order vector vector InputData::getSharedRAbundVectors(){ try { if(fileHandle){ if (format == "sharedfile") { - SharedOrder = new SharedOrderVector(fileHandle); - if (SharedOrder != NULL) { - return SharedOrder->getSharedRAbundVector(); + SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle); + if (SharedRAbund != NULL) { + return SharedRAbund->getSharedRAbundVectors(); } }else if (format == "shared") { SharedList = new SharedListVector(fileHandle); @@ -216,6 +216,7 @@ vector InputData::getSharedRAbundVectors(){ } } + /***********************************************************************/ SAbundVector* InputData::getSAbundVector(){ diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 0e45ad5..8de0c34 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -78,7 +78,10 @@ int ReadDistCommand::execute(){ globaldata->gMatrix = matrix; //save matrix for coverage commands }else { read->read(nameMap); + //to prevent memory leak + if (globaldata->gListVector != NULL) { delete globaldata->gListVector; } globaldata->gListVector = read->getListVector(); + if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; } globaldata->gSparseMatrix = read->getMatrix(); } return 0; diff --git a/readotucommand.cpp b/readotucommand.cpp index 9f27057..1b2ec9e 100644 --- a/readotucommand.cpp +++ b/readotucommand.cpp @@ -45,10 +45,10 @@ int ReadOtuCommand::execute(){ groupMap->readMap(); //if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } - globaldata->gGroupmap = groupMap; - + globaldata->gGroupmap = groupMap; shared = new SharedCommand(); shared->execute(); + parselist = new ParseListCommand(); parselist->execute(); } diff --git a/sharedlistvector.cpp b/sharedlistvector.cpp index b562450..02c012e 100644 --- a/sharedlistvector.cpp +++ b/sharedlistvector.cpp @@ -234,6 +234,7 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){ groupName = groupmap->getGroup(names); order->push_back(i, binSize, groupName); } + random_shuffle(order->begin(), order->end()); return order; } @@ -293,8 +294,13 @@ vector SharedListVector::getSharedRAbundVector() { vector lookup; util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups); - util->getSharedVectors(globaldata->Groups, lookup, this->getSharedOrderVector()); - + + for (int i = 0; i < globaldata->Groups.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + *temp = getSharedRAbundVector(globaldata->Groups[i]); + lookup.push_back(temp); + } + return lookup; } catch(exception& e) { diff --git a/sharedordervector.cpp b/sharedordervector.cpp index 2830cec..973f695 100644 --- a/sharedordervector.cpp +++ b/sharedordervector.cpp @@ -355,18 +355,18 @@ void SharedOrderVector::updateStats(){ maxRank = 0; for(int i=0;i hold(numSeqs, 0); - vector hold(numSeqs); - - for(int i=0;i numBins) { numBins = data[i].bin; } diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 9634613..1b8023d 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -13,6 +13,7 @@ using namespace std; #include "sharedrabundvector.h" #include "sabundvector.hpp" #include "ordervector.hpp" +#include "sharedutilities.h" /***********************************************************************/ @@ -56,30 +57,87 @@ SharedRAbundVector::SharedRAbundVector(string id, vector rav) : Data } -/*********************************************************************** - - +/***********************************************************************/ +//reads a shared file SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { - int i, num; - string holdLabel, group - individual newGuy; + globaldata = GlobalData::getInstance(); - f >> label >> group >> num; + if (globaldata->gGroupmap == NULL) { groupmap = new GroupMap(); } - //initialize data - for (i=0; i> label >> groupN >> num; + holdLabel = label; + + //add new vector to lookup + lookup.push_back(new SharedRAbundVector(num)); + lookup[0]->setLabel(label); + lookup[0]->setGroup(groupN); + + if (globaldata->gGroupmap == NULL) { + //save group in groupmap + groupmap->namesOfGroups.push_back(groupN); + groupmap->groupIndex[groupN] = 0; } - int inputData; - - for(int i=0;i> inputData; - set(i, inputData); + + lookup[0]->push_back(inputData, i, groupN); //abundance, bin, group + push_back(inputData, i, groupN); + numSeqs += inputData; + numBins++; + if (inputData > maxRank) { maxRank = inputData; } + } + + //save position in file in case next line is a new label. + pos = f.tellg(); + + if (f.eof() != true) { f >> nextLabel; } + + //read the rest of the groups info in + while ((nextLabel == holdLabel) && (f.eof() != true)) { + f >> groupN >> num; + count++; + + if (globaldata->gGroupmap == NULL) { + //save group in groupmap + groupmap->namesOfGroups.push_back(groupN); + groupmap->groupIndex[groupN] = count; + } + + //add new vector to lookup + lookup.push_back(new SharedRAbundVector(num)); + lookup[count]->setLabel(label); + lookup[count]->setGroup(groupN); + + //fill vector. + for(int i=0;i> inputData; + lookup[count]->push_back(inputData, i, groupN); //abundance, bin, group + } + + //save position in file in case next line is a new label. + pos = f.tellg(); + + if (f.eof() != true) { f >> nextLabel; } + } + + //put file pointer back since you are now at a new distance label + f.seekg(pos, ios::beg); + + if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -325,6 +383,34 @@ SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){ return *this; } /***********************************************************************/ +vector SharedRAbundVector::getSharedRAbundVectors(){ + try { + SharedUtil* util; + util = new SharedUtil(); + + util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups); + + for (int i = 0; i < lookup.size(); i++) { + //if this sharedrabund is not from a group the user wants then delete it. + if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { + delete lookup[i]; + lookup.erase(lookup.begin()+i); + i--; + } + } + + return lookup; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/***********************************************************************/ RAbundVector SharedRAbundVector::getRAbundVector() { try { diff --git a/sharedrabundvector.h b/sharedrabundvector.h index 5cc92a3..0c71394 100644 --- a/sharedrabundvector.h +++ b/sharedrabundvector.h @@ -14,6 +14,7 @@ #include "sharedordervector.h" #include "sharedsabundvector.h" #include "rabundvector.hpp" +#include "groupmap.h" /* This class is a child to datavector. It represents OTU information at a certain distance. It is similiar to an rabundvector except each member of data knows which group it belongs to. @@ -21,7 +22,7 @@ An individual which knows the OTU from which it came, the group it is in and its abundance. */ - +class GlobalData; class SharedRAbundVector : public DataVector { @@ -30,7 +31,7 @@ public: SharedRAbundVector(int); //SharedRAbundVector(string, vector); SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){}; - //SharedRAbundVector(ifstream&); + SharedRAbundVector(ifstream&); ~SharedRAbundVector(); int getNumBins(); @@ -66,9 +67,13 @@ public: SharedOrderVector getSharedOrderVector(); SharedSAbundVector getSharedSAbundVector(); SharedRAbundVector getSharedRAbundVector(); + vector getSharedRAbundVectors(); private: vector data; + vector lookup; + GlobalData* globaldata; + GroupMap* groupmap; int maxRank; int numBins; int numSeqs; diff --git a/sharedutilities.cpp b/sharedutilities.cpp index fc1b335..fd0595d 100644 --- a/sharedutilities.cpp +++ b/sharedutilities.cpp @@ -125,7 +125,7 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups //if the user only entered invalid groups if (userGroups.size() == 0) { - cout << "When using the groups parameter you must have at least 1 valid groups. I will run the command using all the groups in your groupfile." << endl; + cout << "You provided no valid groups. I will run the command using all the groups in your groupfile." << endl; for (int i = 0; i < allGroups.size(); i++) { userGroups.push_back(allGroups[i]); } diff --git a/sharedutilities.h b/sharedutilities.h index fa00933..75ecf23 100644 --- a/sharedutilities.h +++ b/sharedutilities.h @@ -28,10 +28,11 @@ class SharedUtil { void setGroups(vector&, vector&, string&, int&, string); //globaldata->Groups, your tree or group map, allgroups, numGroups, mode void getCombos(vector&, vector, int&); //groupcomb, globaldata->Groups, numcomb void updateGroupIndex(vector&, map&); //globaldata->Groups, groupmap->groupIndex + bool isValidGroup(string, vector); private: - bool isValidGroup(string, vector); + }; /**************************************************************************************************/ diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index de30d3c..047ea61 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -89,11 +89,12 @@ int TreeGroupCommand::execute(){ if (treeCalculators.size() == 0) { return 0; } if (format == "sharedfile") { + //you have groups read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); input = globaldata->ginput; - order = input->getSharedOrderVector(); + lookup = input->getSharedRAbundVectors(); }else { //you are using a list and a groupfile read = new ReadOTUFile(globaldata->inputFileName); @@ -101,11 +102,11 @@ int TreeGroupCommand::execute(){ input = globaldata->ginput; SharedList = globaldata->gSharedList; - order = SharedList->getSharedOrderVector(); + lookup = SharedList->getSharedRAbundVector(); } - //set users groups - util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup"); + if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; } + numGroups = globaldata->Groups.size(); groupNames = ""; for (int i = 0; i < numGroups; i++) { groupNames += globaldata->Groups[i]; } @@ -121,12 +122,11 @@ int TreeGroupCommand::execute(){ tmap->makeSim(globaldata->gGroupmap); globaldata->gTreemap = tmap; - while(order != NULL){ + while(lookup[0] != NULL){ - if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){ + if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){ - cout << order->getLabel() << '\t' << count << endl; - util->getSharedVectors(globaldata->Groups, lookup, order); //fills group vectors from order vector. + cout << lookup[0]->getLabel() << '\t' << count << endl; //for each calculator for(int i = 0 ; i < treeCalculators.size(); i++) { @@ -145,7 +145,7 @@ int TreeGroupCommand::execute(){ for (int g = 0; g < numGroups; g++) { index[g] = g; } //create a new filename - outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + order->getLabel() + ".tre"; + outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre"; for (int k = 0; k < lookup.size(); k++) { for (int l = k; l < lookup.size(); l++) { @@ -170,17 +170,7 @@ int TreeGroupCommand::execute(){ } //get next line to process - if (format == "sharedfile") { - order = input->getSharedOrderVector(); - }else { - //you are using a list and a groupfile - SharedList = input->getSharedListVector(); //get new list vector to process - if (SharedList != NULL) { - order = SharedList->getSharedOrderVector(); //gets new order vector with group info. - }else { - break; - } - } + lookup = input->getSharedRAbundVectors(); count++; } -- 2.39.2