]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug with getting sharedrabunds for venn, heatmap and tree.shared commands,...
authorwestcott <westcott>
Wed, 13 May 2009 15:19:07 +0000 (15:19 +0000)
committerwestcott <westcott>
Wed, 13 May 2009 15:19:07 +0000 (15:19 +0000)
12 files changed:
distancecommand.cpp
helpcommand.cpp
inputdata.cpp
readdistcommand.cpp
readotucommand.cpp
sharedlistvector.cpp
sharedordervector.cpp
sharedrabundvector.cpp
sharedrabundvector.h
sharedutilities.cpp
sharedutilities.h
treegroupscommand.cpp

index 0714ab3623181a86d5e0485b5a4e6613144a6b66..90ffb8df31d5aff2a47ee0f29ce9df33397d39b7 100644 (file)
@@ -52,8 +52,6 @@ DistanceCommand::DistanceCommand(){
                                
                //reset calc for next command
                globaldata->setCalc("");
-
-               
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the DistanceCommand class Function DistanceCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -138,16 +136,12 @@ int DistanceCommand::execute(){
                                int pid2 = fork();
                                if(pid2 > 0){
                                        driver(distCalculator, seqDB, 0, numSeqs / 2, distFile + "tempa", cutoff);      
-                                       //system(("cat " + distFile + "tempa" + " >> " + distFile).c_str());
                                        appendFiles(distFile+"tempa", distFile);
-                                       //system(("rm " + distFile + "tempa").c_str()); 
                                        remove((distFile + "tempa").c_str());                   
                                }
                                else{
                                        driver(distCalculator, seqDB, numSeqs / 2, (numSeqs/sqrt(2)), distFile + "tempb", cutoff);      
-                                       //system(("cat " + distFile + "tempb" + " >> " + distFile).c_str());
                                        appendFiles(distFile+"tempb", distFile);
-                                       //system(("rm " + distFile + "tempb").c_str());
                                        remove((distFile + "tempb").c_str());                           
                                }
                                wait(NULL);
@@ -156,16 +150,12 @@ int DistanceCommand::execute(){
                                int pid3 = fork();
                                if(pid3 > 0){
                                        driver(distCalculator, seqDB, (numSeqs/sqrt(2)), (sqrt(3) * numSeqs / 2), distFile + "tempc", cutoff);  
-                                       //system(("cat " + distFile + "tempc" + " >> " + distFile).c_str());
                                        appendFiles(distFile+"tempc", distFile);
-                                       //system(("rm " + distFile + "tempc").c_str());
                                        remove((distFile + "tempc").c_str());                           
                                }
                                else{
                                        driver(distCalculator, seqDB, (sqrt(3) * numSeqs / 2), numSeqs, distFile + "tempd", cutoff);    
-                                       //system(("cat " + distFile + "tempd" + " >> " + distFile).c_str());
                                        appendFiles(distFile+"tempd", distFile);
-                                       //system(("rm " + distFile + "tempd").c_str());
                                        remove((distFile + "tempd").c_str());                           
                                }
                                wait(NULL);
@@ -207,7 +197,6 @@ int DistanceCommand::driver(Dist* distCalculator, SequenceDB* align, int startLi
 
                                if(dist <= cutoff){
                                        distFile << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
-//cout << align->get(i).getName() << ' ' << align->get(j).getName() << ' ' << dist << endl;
                                }
                        
                        }
@@ -264,3 +253,4 @@ void DistanceCommand::appendFiles(string temp, string filename) {
                exit(1);
        }       
 }
+/**************************************************************************************************/
\ No newline at end of file
index 8060d5fc132a7920bd800ecb6dbc3e6c514312b3..5ea5158bf2e3b00456fe1b752a7623a8875bdae6 100644 (file)
@@ -70,6 +70,18 @@ int HelpCommand::execute(){
                cout << "The deconvolute command parameter is fasta and it is required." << "\n";
                cout << "The deconvolute command should be in the following format: " << "\n";
                cout << "deconvolute(fasta=yourFastaFile) " << "\n";
+       }else if (globaldata->helpRequest == "distance") {
+               cout << "The distance command reads a file containing sequences and creates a distance file." << "\n";
+               cout << "The distance command parameters are fasta, phylip, clustal, nexus, calc, ends, cutoff and processors.  " << "\n";
+               cout << "You must use one of the following parameters for your filename: fasta, phylip, clustal or nexus. " << "\n";
+               cout << "The calc parameter allows you to specify the method of calculating the distances.  Your options are: nogaps, onegap or eachgap. The default is onegap." << "\n";
+               cout << "The ends parameter allows you to specify whether to include terminal gaps in distance.  Your options are: T or F. The default is T." << "\n";
+               cout << "The cutoff parameter allows you to specify maximum distance to keep. The default is 1.0." << "\n";
+               cout << "The processors parameter allows you to specify number of processors to use.  The default is 1, but you can use up to 4 processors." << "\n";
+               cout << "The distance command should be in the following format: " << "\n";
+               cout << "distance(fasta=yourFastaFile, calc=yourCalc, ends=yourEnds, cutoff= yourCutOff, processors=yourProcessors) " << "\n";
+               cout << "Example distance(fasta=amazon.fasta, calc=eachgap, ends=F, cutoff= 2.0, processors=3)." << "\n";
+               cout << "Note: No spaces between parameter labels (i.e. calc), '=' and parameters (i.e.yourCalc)." << "\n" << "\n";
        }else if (globaldata->helpRequest == "collect.single") {
                cout << "The collect.single command can only be executed after a successful read.otu command. WITH ONE EXECEPTION. " << "\n";
                cout << "The collect.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster." << "\n";
index 2c249e6a4a42b89a93b48a4252c60613f40fae9f..05eaaca210ce4fe1fc5413a6feb9a9347bb7206b 100644 (file)
@@ -183,14 +183,14 @@ OrderVector* InputData::getOrderVector(){
        }       
 }
 /***********************************************************************/
-
+//this is used when you don't need the order vector
 vector<SharedRAbundVector*> InputData::getSharedRAbundVectors(){
        try {
                if(fileHandle){
                        if (format == "sharedfile")  {
-                               SharedOrder = new SharedOrderVector(fileHandle);
-                               if (SharedOrder != NULL) {
-                                       return SharedOrder->getSharedRAbundVector();
+                               SharedRAbundVector* SharedRAbund = new SharedRAbundVector(fileHandle);
+                               if (SharedRAbund != NULL) {
+                                       return SharedRAbund->getSharedRAbundVectors();
                                }
                        }else if (format == "shared") {
                                SharedList = new SharedListVector(fileHandle);
@@ -216,6 +216,7 @@ vector<SharedRAbundVector*> InputData::getSharedRAbundVectors(){
        }       
 }
 
+
 /***********************************************************************/
 
 SAbundVector* InputData::getSAbundVector(){
index 0e45ad51f3ebac61c74542076501e775a20498cd..8de0c34ee039e3c79d539fb7ed531fb2351f3400 100644 (file)
@@ -78,7 +78,10 @@ int ReadDistCommand::execute(){
                        globaldata->gMatrix = matrix; //save matrix for coverage commands
                }else {
                        read->read(nameMap);
+                       //to prevent memory leak
+                       if (globaldata->gListVector != NULL) { delete globaldata->gListVector;  }
                        globaldata->gListVector = read->getListVector();
+                       if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
                        globaldata->gSparseMatrix = read->getMatrix();
                }
                return 0;
index 9f27057ece85672069c9ebf219c08b459b648efc..1b2ec9e750445f7b256b978fec0298d42b3f825c 100644 (file)
@@ -45,10 +45,10 @@ int ReadOtuCommand::execute(){
                        groupMap->readMap();
                        
                        //if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
-                       globaldata->gGroupmap = groupMap;
-               
+                       globaldata->gGroupmap = groupMap;               
                        shared = new SharedCommand();
                        shared->execute();
+
                        parselist = new ParseListCommand();
                        parselist->execute();
                }
index b5624508ac792aa82d367ed04ed1fbaf571d4cd6..02c012e797e47efe6e509c7bdd62bd79fa3e390d 100644 (file)
@@ -234,6 +234,7 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){
                        groupName = groupmap->getGroup(names);
                        order->push_back(i, binSize, groupName);
                }
+
                random_shuffle(order->begin(), order->end());
                return order;
        }
@@ -293,8 +294,13 @@ vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
                vector<SharedRAbundVector*> lookup;
                
                util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
-               util->getSharedVectors(globaldata->Groups, lookup, this->getSharedOrderVector());
-               
+
+               for (int i = 0; i < globaldata->Groups.size(); i++) {
+                       SharedRAbundVector* temp = new SharedRAbundVector();
+                       *temp = getSharedRAbundVector(globaldata->Groups[i]);
+                       lookup.push_back(temp);
+               }
+
                return lookup;
        }
        catch(exception& e) {
index 2830cec3011c4ff713172d198ae03fd93955c0a6..973f69553a476c4e67f72c4b9f10cecae0bc32b6 100644 (file)
@@ -355,18 +355,18 @@ void SharedOrderVector::updateStats(){
                maxRank = 0;
        
                for(int i=0;i<data.size();i++){
+
                        if(data[i].bin != -1){
                                numSeqs++;
                        }
                }
+               //vector<individual> hold(numSeqs, 0);
        
-               vector<individual> hold(numSeqs);
-       
-               for(int i=0;i<numSeqs;i++){
-                       if(data[i].bin != -1){
-                               hold[data[i].bin].bin = hold[data[i].bin].bin+1;
-                       }
-               }       
+               //for(int i=0;i<numSeqs;i++){
+                       //if(data[i].bin != -1){
+                               //hold[data[i].bin].bin = hold[data[i].bin].bin+1;
+                       //}
+               //}     
 
                for(int i=0;i<numSeqs;i++){
                        if(data[i].bin > numBins) { numBins = data[i].bin;      } 
index 9634613ded396633062188934cab9f1f4a40e9dc..1b8023d8ea190bcde9b64dae1ca815fea84c8136 100644 (file)
@@ -13,6 +13,7 @@ using namespace std;
 #include "sharedrabundvector.h" 
 #include "sabundvector.hpp"
 #include "ordervector.hpp"
+#include "sharedutilities.h"
 
 
 /***********************************************************************/
@@ -56,30 +57,87 @@ SharedRAbundVector::SharedRAbundVector(string id, vector<individual> rav) : Data
 }
 
 
-/***********************************************************************
-
-
+/***********************************************************************/
+//reads a shared file
 SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
-               int i, num;
-               string holdLabel, group
-               individual newGuy;
+               globaldata = GlobalData::getInstance();
                
-               f >> label >> group >> num;
+               if (globaldata->gGroupmap == NULL) {  groupmap = new GroupMap(); }
                
-               //initialize data
-               for (i=0; i<hold; i++) {
-                       newGuy = new individual;
-                       newGuy.abundance = 0;
-                       newGuy.bin = i;
-                       data.push_back(newGuy);
+               int num, inputData, pos, count;
+               count = 0;  
+               string holdLabel, nextLabel, groupN;
+               individual newguy;
+               
+               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+               lookup.clear();
+               
+               //read in first row since you know there is at least 1 group.
+               f >> label >> groupN >> num;
+               holdLabel = label;
+               
+               //add new vector to lookup
+               lookup.push_back(new SharedRAbundVector(num));
+               lookup[0]->setLabel(label);
+               lookup[0]->setGroup(groupN);
+               
+               if (globaldata->gGroupmap == NULL) { 
+                       //save group in groupmap
+                       groupmap->namesOfGroups.push_back(groupN);
+                       groupmap->groupIndex[groupN] = 0;
                }
-               int inputData;
-       
-               for(int i=0;i<hold;i++){
+               
+               //fill vector.  data = first sharedrabund in file
+               for(int i=0;i<num;i++){
                        f >> inputData;
-                       set(i, inputData);
+                       
+                       lookup[0]->push_back(inputData, i, groupN); //abundance, bin, group
+                       push_back(inputData, i, groupN);
+                       numSeqs += inputData;
+                       numBins++;
+                       if (inputData > maxRank) { maxRank = inputData; }
+                       
                }
+               
+               //save position in file in case next line is a new label.
+               pos = f.tellg();
+               
+               if (f.eof() != true) { f >> nextLabel; }
+               
+               //read the rest of the groups info in
+               while ((nextLabel == holdLabel) && (f.eof() != true)) {
+                       f >> groupN >> num;
+                       count++;
+                       
+                       if (globaldata->gGroupmap == NULL) { 
+                               //save group in groupmap
+                               groupmap->namesOfGroups.push_back(groupN);
+                               groupmap->groupIndex[groupN] = count;
+                       }
+                       
+                       //add new vector to lookup
+                       lookup.push_back(new SharedRAbundVector(num));
+                       lookup[count]->setLabel(label);
+                       lookup[count]->setGroup(groupN);
+
+                       //fill vector.  
+                       for(int i=0;i<num;i++){
+                               f >> inputData;
+                               lookup[count]->push_back(inputData, i, groupN); //abundance, bin, group
+                       }
+                       
+                       //save position in file in case next line is a new label.
+                       pos = f.tellg();
+       
+                       if (f.eof() != true) { f >> nextLabel; }
+               }
+               
+               //put file pointer back since you are now at a new distance label
+               f.seekg(pos, ios::beg);
+       
+               if (globaldata->gGroupmap == NULL) { globaldata->gGroupmap = groupmap; }
+               
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function SharedRAbundVector. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -325,6 +383,34 @@ SharedRAbundVector SharedRAbundVector::getSharedRAbundVector(){
        return *this;                   
 }
 /***********************************************************************/
+vector<SharedRAbundVector*> SharedRAbundVector::getSharedRAbundVectors(){
+       try {
+               SharedUtil* util;
+               util = new SharedUtil();
+               
+               util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups);
+
+               for (int i = 0; i < lookup.size(); i++) {
+                       //if this sharedrabund is not from a group the user wants then delete it.
+                       if (util->isValidGroup(lookup[i]->getGroup(), globaldata->Groups) == false) { 
+                               delete lookup[i]; 
+                               lookup.erase(lookup.begin()+i); 
+                               i--; 
+                       }
+               }
+
+               return lookup;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the SharedRAbundVector class Function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the SharedRAbundVector class function getSharedRAbundVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/***********************************************************************/
 
 RAbundVector SharedRAbundVector::getRAbundVector() {
        try {
index 5cc92a3b0904d7e685d89b8160c396f249164d1e..0c71394689e8aad4d522e5f1ba6948466b218f56 100644 (file)
@@ -14,6 +14,7 @@
 #include "sharedordervector.h"
 #include "sharedsabundvector.h"
 #include "rabundvector.hpp"
+#include "groupmap.h"
 
 /* This class is a child to datavector.  It represents OTU information at a certain distance. 
        It is similiar to an rabundvector except each member of data knows which group it belongs to.
@@ -21,7 +22,7 @@
        An individual which knows the OTU from which it came, 
        the group it is in and its abundance.  */
 
-
+class GlobalData;
 
 class SharedRAbundVector : public DataVector {
        
@@ -30,7 +31,7 @@ public:
        SharedRAbundVector(int);
        //SharedRAbundVector(string, vector<int>);
        SharedRAbundVector(const SharedRAbundVector& bv) : DataVector(bv), data(bv.data), maxRank(bv.maxRank), numBins(bv.numBins), numSeqs(bv.numSeqs){};
-    //SharedRAbundVector(ifstream&);
+    SharedRAbundVector(ifstream&);
        ~SharedRAbundVector();
 
        int getNumBins();               
@@ -66,9 +67,13 @@ public:
        SharedOrderVector getSharedOrderVector();
        SharedSAbundVector getSharedSAbundVector();
        SharedRAbundVector getSharedRAbundVector();
+       vector<SharedRAbundVector*> getSharedRAbundVectors();
        
 private:
        vector<individual>  data; 
+       vector<SharedRAbundVector*> lookup;
+       GlobalData* globaldata;
+       GroupMap* groupmap;
        int maxRank;
        int numBins;
        int numSeqs;
index fc1b3350599ebb6db17da3d6f02f977a1f793bb1..fd0595de079b588a53e5b11c23d89c5a56687d78 100644 (file)
@@ -125,7 +125,7 @@ void SharedUtil::setGroups(vector<string>& userGroups, vector<string>& allGroups
                                
                                //if the user only entered invalid groups
                                if (userGroups.size() == 0) { 
-                                       cout << "When using the groups parameter you must have at least 1 valid groups. I will run the command using all the groups in your groupfile." << endl; 
+                                       cout << "You provided no valid groups. I will run the command using all the groups in your groupfile." << endl; 
                                        for (int i = 0; i < allGroups.size(); i++) {
                                                userGroups.push_back(allGroups[i]);
                                        }
index fa00933f197dee90428a62011903c1be84989c1a..75ecf232bf422d062b5ac8d06560e6ed4448612f 100644 (file)
@@ -28,10 +28,11 @@ class SharedUtil {
                void setGroups(vector<string>&, vector<string>&, string&, int&, string);  //globaldata->Groups, your tree or group map, allgroups, numGroups, mode
                void getCombos(vector<string>&, vector<string>, int&); //groupcomb, globaldata->Groups, numcomb
                void updateGroupIndex(vector<string>&, map<string, int>&); //globaldata->Groups, groupmap->groupIndex
+               bool isValidGroup(string, vector<string>);
                
        private:
        
-               bool isValidGroup(string, vector<string>);
+               
 };
 
 /**************************************************************************************************/
index de30d3c5ac2e30ab6e44cb0f84cdb9e47c1eaf73..047ea6173103b5c2ad82c19c6213ab5bab9ed978 100644 (file)
@@ -89,11 +89,12 @@ int TreeGroupCommand::execute(){
                if (treeCalculators.size() == 0) { return 0; }
 
                if (format == "sharedfile") {
+                       //you have groups
                        read = new ReadOTUFile(globaldata->inputFileName);      
                        read->read(&*globaldata); 
                        
                        input = globaldata->ginput;
-                       order = input->getSharedOrderVector();
+                       lookup = input->getSharedRAbundVectors();
                }else {
                        //you are using a list and a groupfile
                        read = new ReadOTUFile(globaldata->inputFileName);      
@@ -101,11 +102,11 @@ int TreeGroupCommand::execute(){
                
                        input = globaldata->ginput;
                        SharedList = globaldata->gSharedList;
-                       order = SharedList->getSharedOrderVector();
+                       lookup = SharedList->getSharedRAbundVector();
                }
                
-               //set users groups
-               util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "treegroup");
+               if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; }
+               
                numGroups = globaldata->Groups.size();
                groupNames = "";
                for (int i = 0; i < numGroups; i++) { groupNames += globaldata->Groups[i]; }
@@ -121,12 +122,11 @@ int TreeGroupCommand::execute(){
                tmap->makeSim(globaldata->gGroupmap);
                globaldata->gTreemap = tmap;
                        
-               while(order != NULL){
+               while(lookup[0] != NULL){
                
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){                       
+                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
                                
-                               cout << order->getLabel() << '\t' << count << endl;
-                               util->getSharedVectors(globaldata->Groups, lookup, order);  //fills group vectors from order vector.
+                               cout << lookup[0]->getLabel() << '\t' << count << endl;
                                
                                //for each calculator                                                                                           
                                for(int i = 0 ; i < treeCalculators.size(); i++) {
@@ -145,7 +145,7 @@ int TreeGroupCommand::execute(){
                                        for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
                
                                        //create a new filename
-                                       outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + order->getLabel() + ".tre";
+                                       outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
                                                        
                                        for (int k = 0; k < lookup.size(); k++) { 
                                                for (int l = k; l < lookup.size(); l++) {
@@ -170,17 +170,7 @@ int TreeGroupCommand::execute(){
                        }
                
                        //get next line to process
-                       if (format == "sharedfile") {
-                               order = input->getSharedOrderVector();
-                       }else {
-                               //you are using a list and a groupfile
-                               SharedList = input->getSharedListVector(); //get new list vector to process
-                               if (SharedList != NULL) {
-                                       order = SharedList->getSharedOrderVector(); //gets new order vector with group info.
-                               }else {
-                                       break;
-                               }
-                       }
+                       lookup = input->getSharedRAbundVectors();
                        count++;
                }