]> git.donarmstrong.com Git - mothur.git/commitdiff
added distance option to summary.shared
authorwestcott <westcott>
Tue, 28 Sep 2010 16:31:21 +0000 (16:31 +0000)
committerwestcott <westcott>
Tue, 28 Sep 2010 16:31:21 +0000 (16:31 +0000)
clustersplitcommand.cpp
makefile
mothur
splitmatrix.cpp
summarysharedcommand.cpp
summarysharedcommand.h
unweighted.cpp

index 050a615253767622c04ac26ff1b747d1e50eed28..10579a3b209c3f5b15b1590d44d2ab99f90326b1 100644 (file)
@@ -206,7 +206,7 @@ void ClusterSplitCommand::help(){
                m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
                m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
                m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
-               m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1. \n");
+               m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
                m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
                #ifdef USE_MPI
                m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
index 92c20134b7911b1cb2aa570da7c2e95cdc1b119b..773b52f3db800aafb94a1da4dbabbe1967134759 100644 (file)
--- a/makefile
+++ b/makefile
@@ -34,11 +34,15 @@ endif
 64BIT_VERSION ?= yes
 
 ifeq  ($(strip $(64BIT_VERSION)),yes)
-    TARGET_ARCH += -arch x86_64
         CXXFLAGS += -DBIT_VERSION
        
        #if you are using centos uncomment the following lines
        #CXX = g++44
+       
+       #if you are a mac user use the following line
+       TARGET_ARCH += -arch x86_64
+       
+       #if you are a linux user use the following line
        #CXXFLAGS += -mtune=native -march=native -m64
 endif
 
diff --git a/mothur b/mothur
index cd70f8b7a0ee201efce0d4ded8cfa3fa506ebb76..d4e222cb44721ea3b22a5231de06064a006e3100 100755 (executable)
Binary files a/mothur and b/mothur differ
index a4e1f98581b9ce947dd53ad016dc347c71c99ffe..b7a3b49362dbd7c67f54649ba1b846a5c7cf6c79 100644 (file)
@@ -222,6 +222,7 @@ int SplitMatrix::createDistanceFilesFromTax(map<string, int>& seqGroup, int numG
                
                for(int i=0;i<numGroups;i++){
                        string tempNameFile = namefile + "." + toString(i) + ".temp";
+                       if (outputDir == "") { outputDir = m->hasPath(fastafile); }
                        string tempDistFile = outputDir + m->getRootName(m->getSimpleName((fastafile + "." + toString(i) + ".temp"))) + "dist";
 
                        //if there are valid distances
index e6472a8af60b281eb84c021516dc1a88f6c01cb3..3069a6ffbd8a43f1a0d3c956d3d3190769404a07 100644 (file)
@@ -48,7 +48,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"label","calc","groups","all","outputdir","inputdir", "processors"};
+                       string Array[] =  {"label","calc","groups","all","outputdir","distance","inputdir", "processors"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -104,6 +104,9 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
+                       temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
+                       createPhylip = m->isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
                        convert(temp, processors); 
                        
@@ -396,6 +399,7 @@ int SummarySharedCommand::execute(){
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                m->mothurOut(outputFileName); m->mothurOutEndLine();    
                if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        }
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
 
                return 0;
@@ -409,10 +413,12 @@ int SummarySharedCommand::execute(){
 /***********************************************************/
 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
        try {
+                       vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
+                       calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
                                
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp");
+                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
                                        m->appendFiles((sumFileName + ".temp"), sumFileName);
                                        remove((sumFileName + ".temp").c_str());
                                        if (mult) {
@@ -420,7 +426,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                remove((sumAllFileName + ".temp").c_str());
                                        }
                                }else{
-                                       int process = 0;
+                                       int process = 1;
                                        vector<int> processIDS;
                
                                        //loop through and create all the processes you want
@@ -431,11 +437,34 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                        processIDS.push_back(pid); 
                                                        process++;
                                                }else if (pid == 0){
-                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp");   
+                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                                                       
+                                                       //only do this if you want a distance file
+                                                       if (createPhylip) {
+                                                               string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                                                               ofstream outtemp;
+                                                               m->openOutputFile(tempdistFileName, outtemp);
+                                                               
+                                                               for (int i = 0; i < calcDists.size(); i++) {
+                                                                       outtemp << calcDists[i].size() << endl;
+                                                                       
+                                                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                                                               outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                                                                       }
+                                                               }
+                                                               outtemp.close();
+                                                       }
+                                                       
                                                        exit(0);
                                                }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                                        }
-                               
+                                       
+                                       //parent do your part
+                                       driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                                       m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+                                       remove((sumFileName + toString(getpid()) + ".temp").c_str());
+                                       if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+                                               
                                        //force parent to wait until all the processes are done
                                        for (int i = 0; i < processIDS.size(); i++) {
                                                int temp = processIDS[i];
@@ -445,15 +474,36 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                        for (int i = 0; i < processIDS.size(); i++) {
                                                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
                                                remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
-                                               if (mult) {
-                                                       if (i == 0) {  m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName);  }
-                                                       remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               if (mult) {     remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());   }
+                                               
+                                               if (createPhylip) {
+                                                       string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                                                       ifstream intemp;
+                                                       m->openInputFile(tempdistFileName, intemp);
+                                                       
+                                                       for (int i = 0; i < calcDists.size(); i++) {
+                                                               int size = 0;
+                                                               intemp >> size; m->gobble(intemp);
+                                                                       
+                                                               for (int j = 0; j < size; j++) {
+                                                                       int seq1 = 0;
+                                                                       int seq2 = 0;
+                                                                       float dist = 1.0;
+                                                                       
+                                                                       intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                                                       
+                                                                       seqDist tempDist(seq1, seq2, dist);
+                                                                       calcDists[i].push_back(tempDist);
+                                                               }
+                                                       }
+                                                       intemp.close();
+                                                       remove(tempdistFileName.c_str());
                                                }
                                        }
 
                                }
                        #else
-                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"));
+                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
                                m->appendFiles((sumFileName + ".temp"), sumFileName);
                                remove((sumFileName + ".temp").c_str());
                                if (mult) {
@@ -461,6 +511,50 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                        remove((sumAllFileName + ".temp").c_str());
                                }
                        #endif
+                       
+                       if (createPhylip) {
+                               for (int i = 0; i < calcDists.size(); i++) {
+                                       if (m->control_pressed) { break; }
+                               
+                                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+                                       outputNames.push_back(distFileName);
+                                       ofstream outDist;
+                                       m->openOutputFile(distFileName, outDist);
+                                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                                       
+                                       //initialize matrix
+                                       vector< vector<float> > matrix; //square matrix to represent the distance
+                                       matrix.resize(thisLookup.size());
+                                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                                       
+                                       
+                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                               int row = calcDists[i][j].seq1;
+                                               int column = calcDists[i][j].seq2;
+                                               float dist = calcDists[i][j].dist;
+                                               
+                                               matrix[row][column] = dist;
+                                               matrix[column][row] = dist;
+                                       }
+                                       
+                                       //output to file
+                                       outDist << thisLookup.size() << endl;
+                                       for (int r=0; r<thisLookup.size(); r++) { 
+                                               //output name
+                                               string name = thisLookup[r]->getGroup();
+                                               if (name.length() < 10) { //pad with spaces to make compatible
+                                                       while (name.length() < 10) {  name += " ";  }
+                                               }
+                                               outDist << name << '\t';
+                                       
+                                               //output distances
+                                               for (int l = 0; l < r; l++) {   outDist  << matrix[r][l] << '\t';  }
+                                               outDist << endl;
+                                       }
+                                       
+                                       outDist.close();
+                               }
+                       }
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
@@ -468,7 +562,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
        }
 }
 /**************************************************************************************************/
-int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile) { 
+int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
        try {
                
                //loop through calculators and add to file all for all calcs that can do mutiple groups
@@ -524,12 +618,15 @@ int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int sta
                                
                                for(int i=0;i<sumCalculators.size();i++) {
 
-                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                       vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
                                        
                                        if (m->control_pressed) { outputFileHandle.close(); return 1; }
                                        
                                        outputFileHandle << '\t';
                                        sumCalculators[i]->print(outputFileHandle);
+                                       
+                                       seqDist temp(l, k, tempdata[0]);
+                                       calcDists[i].push_back(temp);
                                }
                                outputFileHandle << endl;
                        }
index b99e0f037206496f788635fa7d8a6ce21958d56b..c0172645387d4846aca623d4bfbe7827ca08cd74 100644 (file)
@@ -39,7 +39,7 @@ private:
        InputData* input;
        ValidCalculators* validCalculator;
        
-       bool abort, allLines, mult, all;
+       bool abort, allLines, mult, all, createPhylip;
        set<string> labels; //holds labels to be used
        string label, calc, groups;
        vector<string>  Estimators, Groups, outputNames;
@@ -47,7 +47,7 @@ private:
        string format, outputDir;
        int numGroups, processors;
        int process(vector<SharedRAbundVector*>, string, string);
-       int driver(vector<SharedRAbundVector*>, int, int, string, string);
+       int driver(vector<SharedRAbundVector*>, int, int, string, string, vector< vector<seqDist> >&);
 
 };
 
index 8103688fb04d34542ee2174728d3399055a5d452..2afc4a71486617b61308b651fef413895ba05b54 100644 (file)
@@ -401,7 +401,7 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                        double totalBL = 0.00;  //all branch lengths
                        double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
                                
-                       for(int i=0;i<t->getNumNodes();i++){
+                       for(int i=0;i<copyTree->getNumNodes();i++){
                        
                                if (m->control_pressed) {  return data; }
                                
@@ -411,15 +411,15 @@ EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombo
                                
                                int pcountSize = 0;
                                for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
-                                       map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
-                                       if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
+                                       map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(namesOfGroupCombos[h][j]);
+                                       if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                }
                                
                                if (pcountSize == 0) { }
-                               else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
+                               else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
                                        
-                               if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
-                                       totalBL += abs(t->tree[i].getBranchLength()); 
+                               if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
+                                       totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                }
                        }