Merge remote-tracking branch 'origin'

[mothur.git] / unifracweightedcommand.cpp
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index e596db230ee24d80597cfa903a49ed9ee6a7f25b..3b0c53b7437111cfe5045663a8c78370274b939e 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -10,23 +10,25 @@
  #include "unifracweightedcommand.h"
  #include "consensus.h"
  #include "subsample.h"
+#include "treereader.h"
  
  //**********************************************************************************************************************
  vector<string> UnifracWeightedCommand::setParameters(){        
         try {
-               CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
-        CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
-        CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
-               CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
-               CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
+        CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
+               CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
+               CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
                 vector<string> myArray;
                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -41,7 +43,7 @@ vector<string> UnifracWeightedCommand::setParameters(){
  string UnifracWeightedCommand::getHelpString(){        
         try {
                 string helpString = "";
-               helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
+               helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
@@ -63,6 +65,24 @@ string UnifracWeightedCommand::getHelpString(){
         }
  }
  //**********************************************************************************************************************
+string UnifracWeightedCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        if (type == "weighted")            {  pattern = "[filename],weighted-[filename],[tag],weighted";   }
+        else if (type == "wsummary")        {  pattern = "[filename],wsummary";   }
+        else if (type == "phylip")           {  pattern = "[filename],[tag],[tag2],dist";   }
+        else if (type == "column")           {  pattern = "[filename],[tag],[tag2],dist";   }
+        else if (type == "tree")             {  pattern = "[filename],[tag],[tag2],tre";   }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
  UnifracWeightedCommand::UnifracWeightedCommand(){      
         try {
                 abort = true; calledHelp = true; 
@@ -139,14 +159,16 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
-                       m->runParse = true;
-                       m->clearGroups();
-                       m->clearAllGroups();
-                       m->Treenames.clear();
-                       m->names.clear();
-                       
                         //check for required parameters
                         treefile = validParameter.validFile(parameters, "tree", true);
                         if (treefile == "not open") { treefile = ""; abort = true; }
@@ -167,6 +189,19 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
                         
                                                                                                                                         
@@ -185,6 +220,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         string temp = validParameter.validFile(parameters, "distance", false);                  
                         if (temp == "not found") { phylip = false; outputForm = ""; }
                         else{
+                if (temp=="phylip") { temp = "lt"; }
                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
                         }
@@ -213,14 +249,22 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         consensus = m->isTrue(temp);
              
                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
-                       if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
+                       if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
+            else {  
+                CountTable testCt; 
+                if ((!testCt.testGroups(countfile)) && (subsample)) {
+                    m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
+                }
+            }
              if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
              if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
              
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile=="") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                } 
+            }
                 }
                 
                 
@@ -238,22 +282,32 @@ int UnifracWeightedCommand::execute() {
                 
                 m->setTreeFile(treefile);
                 
-        readTrees();  if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
-                               
-               sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
+        TreeReader* reader;
+        if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+        else { reader = new TreeReader(treefile, countfile); }
+        T = reader->getTrees();
+        ct = T[0]->getCountTable();
+        delete reader;
+        
+        if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+               
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+               sumFile = getOutputFileName("wsummary",variables);
                 m->openOutputFile(sumFile, outSum);
                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
                 
          SharedUtil util;
                 string s; //to make work with setgroups
                 Groups = m->getGroups();
-               vector<string> nameGroups = tmap->getNamesOfGroups();
-               util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
+               vector<string> nameGroups = ct->getNamesOfGroups();
+        if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+               util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
                 m->setGroups(Groups);
                 
-        if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+        if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
          
-               Weighted weighted(tmap, includeRoot);
+               Weighted weighted(includeRoot);
                         
                 int start = time(NULL);
              
@@ -262,10 +316,9 @@ int UnifracWeightedCommand::execute() {
              //user has not set size, set size = smallest samples size
              if (subsampleSize == -1) { 
                  vector<string> temp; temp.push_back(Groups[0]);
-                subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+                subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
                  for (int i = 1; i < Groups.size(); i++) {
-                    temp.clear(); temp.push_back(Groups[i]);
-                    int thisSize = (tmap->getNamesSeqs(temp)).size();
+                    int thisSize = ct->getGroupCount(Groups[i]);
                      if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
                  }
                  m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
@@ -273,12 +326,10 @@ int UnifracWeightedCommand::execute() {
                  vector<string> newGroups = Groups;
                  Groups.clear();
                  for (int i = 0; i < newGroups.size(); i++) {
-                    vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
-                    vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
-                    int thisSize = thisGroupsSeqs.size();
+                    int thisSize = ct->getGroupCount(newGroups[i]);
                      
                      if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]);        }
-                    else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
+                    else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
                  } 
                  m->setGroups(Groups);
              }
@@ -294,7 +345,7 @@ int UnifracWeightedCommand::execute() {
          //get weighted scores for users trees
          for (int i = 0; i < T.size(); i++) {
              
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
              
              counter = 0;
              rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
@@ -304,13 +355,15 @@ int UnifracWeightedCommand::execute() {
              vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
              
              if (random) {  
-                output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
-                outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
-                outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
+                variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+                variables["[tag]"] = toString(i+1);
+                string wFileName = getOutputFileName("weighted", variables);
+                output = new ColumnFile(wFileName, itersString);
+                               outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
              } 
              
              userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
              
              //save users score
              for (int s=0; s<numComp; s++) {
@@ -330,36 +383,42 @@ int UnifracWeightedCommand::execute() {
              //subsample loop
              vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
              for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
-                
                  if (m->control_pressed) { break; }
                  
-                //copy to preserve old one - would do this in subsample but tree needs it and memory cleanup becomes messy.
-                TreeMap* newTmap = new TreeMap();
-                newTmap->getCopy(tmap);
+                //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
+                CountTable* newCt = new CountTable();
+                
+                int sampleTime = 0;
+                if (m->debug) { sampleTime = time(NULL); }
                  
+                //uses method of setting groups to doNotIncludeMe
                  SubSample sample;
-                Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
-                   
+                Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
+                
                  //call new weighted function
                  vector<double> iterData; iterData.resize(numComp,0);
-                Weighted thisWeighted(newTmap, includeRoot);
+                Weighted thisWeighted(includeRoot);
                  iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
                  
                  //save data to make ave dist, std dist
                  calcDistsTotals.push_back(iterData);
                  
-                delete newTmap;
+                delete newCt;
                  delete subSampleTree;
+                
+                if((thisIter+1) % 100 == 0){   m->mothurOutJustToScreen(toString(thisIter+1)+"\n");    }
              }
              
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
              
              if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
              if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
          }
          
                 
-               if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
+               if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
                 
          if (phylip) {  createPhylipFile();             }
      
@@ -367,7 +426,7 @@ int UnifracWeightedCommand::execute() {
                 
                 //clear out users groups
                 m->clearGroups();
-               delete tmap; 
+               delete ct; 
                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
                 
                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
@@ -406,40 +465,28 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
          //we need to find the average distance and standard deviation for each groups distance
          
          //finds sum
-        vector<double> averages; averages.resize(numComp, 0); 
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
+        vector<double> averages = m->getAverages(dists);        
          
          //find standard deviation
-        vector<double> stdDev; stdDev.resize(numComp, 0);
-                
-        for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
          
          //make matrix with scores in it
-        vector< vector<double> > avedists;     avedists.resize(m->getNumGroups());
+        vector< vector<double> > avedists;     //avedists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            avedists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            avedists.push_back(temp);
          }
          
          //make matrix with scores in it
-        vector< vector<double> > stddists;     stddists.resize(m->getNumGroups());
+        vector< vector<double> > stddists;     //stddists.resize(m->getNumGroups());
          for (int i = 0; i < m->getNumGroups(); i++) {
-            stddists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            //stddists[i].resize(m->getNumGroups(), 0.0);
+            stddists.push_back(temp);
          }
+
          
          //flip it so you can print it
          int count = 0;
@@ -453,15 +500,20 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
              }
          }
          
-        string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.ave.dist";
-        outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
-        
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.ave";
+        string aveFileName = getOutputFileName("phylip",variables);
+        if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);  }
+        else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName);  }
          ofstream out;
          m->openOutputFile(aveFileName, out);
          
-       string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".weighted.std.dist";
-       outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
-        
+        variables["[tag2]"] = "weighted.std";
+        string stdFileName = getOutputFileName("phylip",variables);
+        if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
+        else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }        
          ofstream outStd;
          m->openOutputFile(stdFileName, outStd);
          
@@ -524,9 +576,17 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
          //used in tree constructor 
          m->runParse = false;
          
-        //create treemap class from groupmap for tree class to use
-        TreeMap* newTmap = new TreeMap();
-        newTmap->makeSim(m->getGroups());
+        ///create treemap class from groupmap for tree class to use
+        CountTable newCt;
+        set<string> nameMap;
+        map<string, string> groupMap;
+        set<string> gps;
+        for (int i = 0; i < m->getGroups().size(); i++) { 
+            nameMap.insert(m->getGroups()[i]); 
+            gps.insert(m->getGroups()[i]); 
+            groupMap[m->getGroups()[i]] = m->getGroups()[i];
+        }
+        newCt.createTable(nameMap, groupMap, gps);
          
          //clear  old tree names if any
          m->Treenames.clear();
@@ -534,22 +594,25 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
          //fills globaldatas tree names
          m->Treenames = m->getGroups();
          
-        vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+        vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
          
-        if (m->control_pressed) { delete newTmap; return 0; }
+        if (m->control_pressed) { return 0; }
          
          Consensus con;
-        Tree* conTree = con.getTree(newTrees, newTmap);
+        Tree* conTree = con.getTree(newTrees);
          
          //create a new filename
-        string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";                          
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.cons";
+        string conFile = getOutputFileName("tree",variables);                                                  
          outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
          ofstream outTree;
          m->openOutputFile(conFile, outTree);
          
          if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
          outTree.close();
-        delete newTmap;
          
          return 0;
  
@@ -561,13 +624,17 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
  }
  /**************************************************************************************************/
  
-vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap* mytmap) {
+vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
         try {
          
          vector<Tree*> trees;
          
          //create a new filename
-        string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";                                
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+        variables["[tag]"] = toString(treeNum+1);
+        variables["[tag2]"] = "weighted.all";
+        string outputFile = getOutputFileName("tree",variables);                               
          outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
          
          ofstream outAll;
@@ -595,7 +662,7 @@ vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists
                         }
  
              //create tree
-            Tree* tempTree = new Tree(mytmap, sims);
+            Tree* tempTree = new Tree(&myct, sims);
              tempTree->assembleTree();
              
              trees.push_back(tempTree);
@@ -617,80 +684,6 @@ vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists
  }
  /**************************************************************************************************/
  
-int UnifracWeightedCommand::readTrees() {
-       try {
-        
-        if (groupfile != "") {
-                       //read in group map info.
-                       tmap = new TreeMap(groupfile);
-                       tmap->readMap();
-               }else{ //fake out by putting everyone in one group
-                       Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
-                       tmap = new TreeMap();
-                       
-                       for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
-               }
-               
-               if (namefile != "") { readNamesFile(); }
-               
-               read = new ReadNewickTree(treefile);
-               int readOk = read->read(tmap); 
-               
-               if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-               
-               read->AssembleTrees();
-               T = read->getTrees();
-               delete read;
-               
-               //make sure all files match
-               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
-               int numNamesInTree;
-               if (namefile != "")  {  
-                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
-                       else {   numNamesInTree = m->Treenames.size();  }
-               }else {  numNamesInTree = m->Treenames.size();  }
-               
-               
-               //output any names that are in group file but not in tree
-               if (numNamesInTree < tmap->getNumSeqs()) {
-                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
-                               //is that name in the tree?
-                               int count = 0;
-                               for (int j = 0; j < m->Treenames.size(); j++) {
-                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
-                                       count++;
-                               }
-                               
-                               if (m->control_pressed) { 
-                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
-                                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
-                                       m->clearGroups();
-                                       return 0;
-                               }
-                               
-                               //then you did not find it so report it 
-                               if (count == m->Treenames.size()) { 
-                                       //if it is in your namefile then don't remove
-                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-                                       
-                                       if (it == nameMap.end()) {
-                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
-                                               i--; //need this because removeSeq removes name from namesOfSeqs
-                                       }
-                               }
-                       }
-               }
-
-        return 0;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "UnifracWeightedCommand", "readTrees");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-
  int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
         try {
          
@@ -705,39 +698,32 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
          
          lines.clear();
          
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-        if(processors != 1){
-            int numPairs = namesOfGroupCombos.size();
-            int numPairsPerProcessor = numPairs / processors;
+        //breakdown work between processors
+        int numPairs = namesOfGroupCombos.size();
+        int numPairsPerProcessor = numPairs / processors;
              
-            for (int i = 0; i < processors; i++) {
-                int startPos = i * numPairsPerProcessor;
-                if(i == processors - 1){
-                    numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                }
-                lines.push_back(linePair(startPos, numPairsPerProcessor));
-            }
+        for (int i = 0; i < processors; i++) {
+            int startPos = i * numPairsPerProcessor;
+            if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+            lines.push_back(linePair(startPos, numPairsPerProcessor));
          }
-#endif
          
          
          //get scores for random trees
          for (int j = 0; j < iters; j++) {
-            
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-            if(processors == 1){
-                driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
-            }else{
+//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            //if(processors == 1){
+              //  driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
+           // }else{
                  createProcesses(thisTree,  namesOfGroupCombos, rScores);
-            }
-#else
-            driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
-#endif
+           // }
+//#else
+            //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
+//#endif
+ 
              
-            if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
              
-            //report progress
-            //                                 m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
          }
          lines.clear();
          
@@ -773,12 +759,11 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
  
  int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
         try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                 vector<int> processIDS;
-               
                 EstOutput results;
-               
+        
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 //loop through and create all the processes you want
                 while (process != processors) {
                         int pid = fork();
@@ -824,9 +809,53 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > na
                         in.close();
                         m->mothurRemove(s);
                 }
+#else
+        //fill in functions
+        vector<weightedRandomData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
                 
-               return 0;
-#endif         
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            vector< vector<double> > copyScores = rScores;
+            
+            weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores);
+                       pDataArray.push_back(tempweighted);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) {
+                scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]);
+            }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+
+               
+#endif 
+        
+        return 0;
         }
         catch(exception& e) {
                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
@@ -837,9 +866,9 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > na
  /**************************************************************************************************/
  int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
   try {
-               Tree* randT = new Tree(tmap);
+               Tree* randT = new Tree(ct);
       
-        Weighted weighted(tmap, includeRoot);
+        Weighted weighted(includeRoot);
       
                 for (int h = start; h < (start+num); h++) {
         
@@ -949,14 +978,20 @@ void UnifracWeightedCommand::createPhylipFile() {
                 //for each tree
                 for (int i = 0; i < T.size(); i++) { 
                 
-                       string phylipFileName;
-                       if ((outputForm == "lt") || (outputForm == "square")) {
-                               phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
-                               outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
-                       }else { //column
-                               phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
-                               outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
-                       }
+            string phylipFileName;
+                       map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+            variables["[tag]"] = toString(i+1);
+            if ((outputForm == "lt") || (outputForm == "square")) {
+                variables["[tag2]"] = "weighted.phylip";
+                phylipFileName = getOutputFileName("phylip",variables);
+                outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
+            }else { //column
+                variables["[tag2]"] = "weighted.column";
+                phylipFileName = getOutputFileName("column",variables);
+                outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
+            }
+
                         
                         ofstream out;
                         m->openOutputFile(phylipFileName, out);
@@ -1079,46 +1114,6 @@ void UnifracWeightedCommand::calculateFreqsCumuls() {
                 exit(1);
         }
  }
-/*****************************************************************/
-int UnifracWeightedCommand::readNamesFile() {
-       try {
-               m->names.clear();
-               numUniquesInName = 0;
-               
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               string first, second;
-               map<string, string>::iterator itNames;
-               
-               while(!in.eof()) {
-                       in >> first >> second; m->gobble(in);
-                       
-                       numUniquesInName++;
-                       
-                       itNames = m->names.find(first);
-                       if (itNames == m->names.end()) {  
-                               m->names[first] = second; 
-                               
-                               //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
-                               vector<string> dupNames;
-                               m->splitAtComma(second, dupNames);
-                               
-                               for (int i = 0; i < dupNames.size(); i++) {     
-                                       nameMap[dupNames[i]] = first; 
-                                       if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
-                               }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
-               }
-               in.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
-               exit(1);
-       }
-}
  /***********************************************************/