]> git.donarmstrong.com Git - mothur.git/blobdiff - unifracunweightedcommand.cpp
working on adding count file to mgcluster
[mothur.git] / unifracunweightedcommand.cpp
index dbdee2ad942db51860cb0a7f403662a895def1bf..0749cb79ff41e03c55ca2a9defeb1b70e44645d8 100644 (file)
@@ -22,7 +22,7 @@ vector<string> UnifracUnweightedCommand::setParameters(){
                CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
-               CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
+               CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
@@ -63,6 +63,31 @@ string UnifracUnweightedCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "unweighted")            {   outputFileName =  "unweighted";   }
+            else if (type == "uwsummary")        {   outputFileName =  "uwsummary";   }
+            else if (type == "phylip")           {   outputFileName =  "dist";   }
+            else if (type == "column")           {   outputFileName =  "dist";   }
+            else if (type == "tree")             {   outputFileName =  "tre";   }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+
 //**********************************************************************************************************************
 UnifracUnweightedCommand::UnifracUnweightedCommand(){  
        try {
@@ -179,6 +204,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "distance", false);                  
                        if (temp == "not found") { phylip = false; outputForm = ""; }
                        else{
+                if (temp=="phylip") { temp = "lt"; }
                                if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
                                else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
                        }
@@ -245,9 +271,10 @@ int UnifracUnweightedCommand::execute() {
         T = reader->getTrees();
         tmap = T[0]->getTreeMap();
         map<string, string> nameMap = reader->getNames();
+        map<string, string> unique2Dup = reader->getNameMap();
         delete reader; 
         
-               sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
+               sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
                outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
                m->openOutputFile(sumFile, outSum);
                
@@ -281,7 +308,7 @@ int UnifracUnweightedCommand::execute() {
                     int thisSize = thisGroupsSeqs.size();
                     
                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]);        }
-                    else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
+                    else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
                 } 
                 m->setGroups(Groups);
             }
@@ -305,12 +332,12 @@ int UnifracUnweightedCommand::execute() {
                for (int i = 0; i < T.size(); i++) {
                        if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
                        
-                       counter = 0;
+            counter = 0;
                        
                        if (random)  {  
-                               output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted", itersString);
-                               outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
-                               outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
+                               output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
+                               outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
+                               outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
                        }
                        
                        
@@ -341,18 +368,23 @@ int UnifracUnweightedCommand::execute() {
                        
                        if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
             
+            int startSubsample = time(NULL);
+            
             //subsample loop
             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
-                
                 if (m->control_pressed) { break; }
                 
                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
                 TreeMap* newTmap = new TreeMap();
-                newTmap->getCopy(*tmap);
+                //newTmap->getCopy(*tmap);
                 
+                //SubSample sample;
+                //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
+                
+                //uses method of setting groups to doNotIncludeMe
                 SubSample sample;
-                Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
+                Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
                 
                 //call new weighted function
                 vector<double> iterData; iterData.resize(numComp,0);
@@ -367,6 +399,7 @@ int UnifracUnweightedCommand::execute() {
                 
                 if((thisIter+1) % 100 == 0){   m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
             }
+            m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
             
             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0;  }
 
@@ -473,15 +506,15 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& di
             }
         }
         
-        string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave.dist";
-        outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
-        
+        string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
+        if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
+        else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
         ofstream out;
         m->openOutputFile(aveFileName, out);
         
-        string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std.dist";
-        outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
-        
+        string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
+        if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
+        else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
         ofstream outStd;
         m->openOutputFile(stdFileName, outStd);
         
@@ -562,7 +595,7 @@ int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists,
         Tree* conTree = con.getTree(newTrees);
         
         //create a new filename
-        string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";                                
+        string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");                            
         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
         ofstream outTree;
         m->openOutputFile(conFile, outTree);
@@ -586,7 +619,7 @@ vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dis
         vector<Tree*> trees;
         
         //create a new filename
-        string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";                              
+        string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");                          
         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
         
         ofstream outAll;
@@ -754,10 +787,10 @@ void UnifracUnweightedCommand::createPhylipFile(int i) {
        try {
                string phylipFileName;
                if ((outputForm == "lt") || (outputForm == "square")) {
-                       phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip.dist";
+                       phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
                        outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
                }else { //column
-                       phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column.dist";
+                       phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
                        outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
                }