#include "unifracweightedcommand.h"
#include "consensus.h"
#include "subsample.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> UnifracWeightedCommand::setParameters(){
try {
- CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
- CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
- CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
- CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
- CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
- CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+ CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
+ CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
+ CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
+ CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
string UnifracWeightedCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
+ helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
}
}
//**********************************************************************************************************************
+string UnifracWeightedCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+ if (type == "weighted") { pattern = "[filename],weighted-[filename],[tag],weighted"; }
+ else if (type == "wsummary") { pattern = "[filename],wsummary"; }
+ else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
+ else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
+ else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
UnifracWeightedCommand::UnifracWeightedCommand(){
try {
abort = true; calledHelp = true;
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
- m->runParse = true;
- m->clearGroups();
- m->clearAllGroups();
- m->Treenames.clear();
- m->names.clear();
-
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
if (treefile == "not open") { treefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
string temp = validParameter.validFile(parameters, "distance", false);
if (temp == "not found") { phylip = false; outputForm = ""; }
else{
+ if (temp=="phylip") { temp = "lt"; }
if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
}
consensus = m->isTrue(temp);
if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
- if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
+ if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
+ else {
+ CountTable testCt;
+ if ((!testCt.testGroups(countfile)) && (subsample)) {
+ m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
+ }
+ }
if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
- }
+ if (countfile=="") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+ }
}
m->setTreeFile(treefile);
- readTrees(); if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
-
- sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
+ TreeReader* reader;
+ if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+ else { reader = new TreeReader(treefile, countfile); }
+ T = reader->getTrees();
+ ct = T[0]->getCountTable();
+ delete reader;
+
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+ sumFile = getOutputFileName("wsummary",variables);
m->openOutputFile(sumFile, outSum);
outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
SharedUtil util;
string s; //to make work with setgroups
Groups = m->getGroups();
- vector<string> nameGroups = tmap->getNamesOfGroups();
- util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
+ vector<string> nameGroups = ct->getNamesOfGroups();
+ if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+ util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
m->setGroups(Groups);
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
- Weighted weighted(tmap, includeRoot);
+ Weighted weighted(includeRoot);
int start = time(NULL);
//user has not set size, set size = smallest samples size
if (subsampleSize == -1) {
vector<string> temp; temp.push_back(Groups[0]);
- subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+ subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
for (int i = 1; i < Groups.size(); i++) {
- temp.clear(); temp.push_back(Groups[i]);
- int thisSize = (tmap->getNamesSeqs(temp)).size();
+ int thisSize = ct->getGroupCount(Groups[i]);
if (thisSize < subsampleSize) { subsampleSize = thisSize; }
}
m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
vector<string> newGroups = Groups;
Groups.clear();
for (int i = 0; i < newGroups.size(); i++) {
- vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
- vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
+ int thisSize = ct->getGroupCount(newGroups[i]);
if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
- else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
+ else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
}
m->setGroups(Groups);
}
//get weighted scores for users trees
for (int i = 0; i < T.size(); i++) {
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
counter = 0;
rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
if (random) {
- output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
- outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
- outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
+ variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+ variables["[tag]"] = toString(i+1);
+ string wFileName = getOutputFileName("weighted", variables);
+ output = new ColumnFile(wFileName, itersString);
+ outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
}
userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//save users score
for (int s=0; s<numComp; s++) {
//subsample loop
vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
-
if (m->control_pressed) { break; }
- //copy to preserve old one - would do this in subsample but tree needs it and memory cleanup becomes messy.
- TreeMap* newTmap = new TreeMap();
- newTmap->getCopy(tmap);
+ //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
+ CountTable* newCt = new CountTable();
+
+ int sampleTime = 0;
+ if (m->debug) { sampleTime = time(NULL); }
+ //uses method of setting groups to doNotIncludeMe
SubSample sample;
- Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
-
+ Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
+
//call new weighted function
vector<double> iterData; iterData.resize(numComp,0);
- Weighted thisWeighted(newTmap, includeRoot);
+ Weighted thisWeighted(includeRoot);
iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
//save data to make ave dist, std dist
calcDistsTotals.push_back(iterData);
- delete newTmap;
+ delete newCt;
delete subSampleTree;
+
+ if((thisIter+1) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); }
}
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
if (consensus) { getConsensusTrees(calcDistsTotals, i); }
}
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (phylip) { createPhylipFile(); }
//clear out users groups
m->clearGroups();
- delete tmap;
+ delete ct;
for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//we need to find the average distance and standard deviation for each groups distance
//finds sum
- vector<double> averages; averages.resize(numComp, 0);
- for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
- for (int i = 0; i < dists[thisIter].size(); i++) {
- averages[i] += dists[thisIter][i];
- }
- }
-
- //finds average.
- for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
+ vector<double> averages = m->getAverages(dists);
//find standard deviation
- vector<double> stdDev; stdDev.resize(numComp, 0);
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
- for (int j = 0; j < dists[thisIter].size(); j++) {
- stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
- }
- }
- for (int i = 0; i < stdDev.size(); i++) {
- stdDev[i] /= (float) subsampleIters;
- stdDev[i] = sqrt(stdDev[i]);
- }
+ vector<double> stdDev = m->getStandardDeviation(dists, averages);
//make matrix with scores in it
- vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
+ vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
for (int i = 0; i < m->getNumGroups(); i++) {
- avedists[i].resize(m->getNumGroups(), 0.0);
+ vector<double> temp;
+ for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+ avedists.push_back(temp);
}
//make matrix with scores in it
- vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
+ vector< vector<double> > stddists; //stddists.resize(m->getNumGroups());
for (int i = 0; i < m->getNumGroups(); i++) {
- stddists[i].resize(m->getNumGroups(), 0.0);
+ vector<double> temp;
+ for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+ //stddists[i].resize(m->getNumGroups(), 0.0);
+ stddists.push_back(temp);
}
+
//flip it so you can print it
int count = 0;
}
}
- string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave.dist";
- outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
-
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+ variables["[tag]"] = toString(treeNum+1);
+ variables["[tag2]"] = "weighted.ave";
+ string aveFileName = getOutputFileName("phylip",variables);
+ if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
+ else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
ofstream out;
m->openOutputFile(aveFileName, out);
- string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std.dist";
- outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
-
+ variables["[tag2]"] = "weighted.std";
+ string stdFileName = getOutputFileName("phylip",variables);
+ if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
+ else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
ofstream outStd;
m->openOutputFile(stdFileName, outStd);
//used in tree constructor
m->runParse = false;
- //create treemap class from groupmap for tree class to use
- TreeMap* newTmap = new TreeMap();
- newTmap->makeSim(m->getGroups());
+ ///create treemap class from groupmap for tree class to use
+ CountTable newCt;
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->getGroups().size(); i++) {
+ nameMap.insert(m->getGroups()[i]);
+ gps.insert(m->getGroups()[i]);
+ groupMap[m->getGroups()[i]] = m->getGroups()[i];
+ }
+ newCt.createTable(nameMap, groupMap, gps);
//clear old tree names if any
m->Treenames.clear();
//fills globaldatas tree names
m->Treenames = m->getGroups();
- vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+ vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
- if (m->control_pressed) { delete newTmap; return 0; }
+ if (m->control_pressed) { return 0; }
Consensus con;
- Tree* conTree = con.getTree(newTrees, newTmap);
+ Tree* conTree = con.getTree(newTrees);
//create a new filename
- string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+ variables["[tag]"] = toString(treeNum+1);
+ variables["[tag2]"] = "weighted.cons";
+ string conFile = getOutputFileName("tree",variables);
outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
ofstream outTree;
m->openOutputFile(conFile, outTree);
if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
outTree.close();
- delete newTmap;
return 0;
}
/**************************************************************************************************/
-vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap* mytmap) {
+vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
try {
vector<Tree*> trees;
//create a new filename
- string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+ variables["[tag]"] = toString(treeNum+1);
+ variables["[tag2]"] = "weighted.all";
+ string outputFile = getOutputFileName("tree",variables);
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
ofstream outAll;
}
//create tree
- Tree* tempTree = new Tree(mytmap, sims);
+ Tree* tempTree = new Tree(&myct, sims);
tempTree->assembleTree();
trees.push_back(tempTree);
}
/**************************************************************************************************/
-int UnifracWeightedCommand::readTrees() {
- try {
-
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- T = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- m->clearGroups();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "UnifracWeightedCommand", "readTrees");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
try {
lines.clear();
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors != 1){
- int numPairs = namesOfGroupCombos.size();
- int numPairsPerProcessor = numPairs / processors;
+ //breakdown work between processors
+ int numPairs = namesOfGroupCombos.size();
+ int numPairsPerProcessor = numPairs / processors;
- for (int i = 0; i < processors; i++) {
- int startPos = i * numPairsPerProcessor;
- if(i == processors - 1){
- numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
- }
- lines.push_back(linePair(startPos, numPairsPerProcessor));
- }
+ for (int i = 0; i < processors; i++) {
+ int startPos = i * numPairsPerProcessor;
+ if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+ lines.push_back(linePair(startPos, numPairsPerProcessor));
}
-#endif
//get scores for random trees
for (int j = 0; j < iters; j++) {
-
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors == 1){
- driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
- }else{
+//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //if(processors == 1){
+ // driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
+ // }else{
createProcesses(thisTree, namesOfGroupCombos, rScores);
- }
-#else
- driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
-#endif
+ // }
+//#else
+ //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
+//#endif
+
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
- //report progress
- // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
}
lines.clear();
int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
+ int process = 1;
vector<int> processIDS;
-
EstOutput results;
-
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
in.close();
m->mothurRemove(s);
}
+#else
+ //fill in functions
+ vector<weightedRandomData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+ vector<CountTable*> cts;
+ vector<Tree*> trees;
- return 0;
-#endif
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ CountTable* copyCount = new CountTable();
+ copyCount->copy(ct);
+ Tree* copyTree = new Tree(copyCount);
+ copyTree->getCopy(t);
+
+ cts.push_back(copyCount);
+ trees.push_back(copyTree);
+
+ vector< vector<double> > copyScores = rScores;
+
+ weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores);
+ pDataArray.push_back(tempweighted);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) {
+ scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]);
+ }
+ delete cts[i];
+ delete trees[i];
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+
+#endif
+
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
/**************************************************************************************************/
int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
try {
- Tree* randT = new Tree(tmap);
+ Tree* randT = new Tree(ct);
- Weighted weighted(tmap, includeRoot);
+ Weighted weighted(includeRoot);
for (int h = start; h < (start+num); h++) {
//for each tree
for (int i = 0; i < T.size(); i++) {
- string phylipFileName;
- if ((outputForm == "lt") || (outputForm == "square")) {
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
- outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
- }else { //column
- phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
- outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
- }
+ string phylipFileName;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getSimpleName(treefile);
+ variables["[tag]"] = toString(i+1);
+ if ((outputForm == "lt") || (outputForm == "square")) {
+ variables["[tag2]"] = "weighted.phylip";
+ phylipFileName = getOutputFileName("phylip",variables);
+ outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
+ }else { //column
+ variables["[tag2]"] = "weighted.column";
+ phylipFileName = getOutputFileName("column",variables);
+ outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
+ }
+
ofstream out;
m->openOutputFile(phylipFileName, out);
exit(1);
}
}
-/*****************************************************************/
-int UnifracWeightedCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = first;
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
- exit(1);
- }
-}
/***********************************************************/