From: Sarah Westcott Date: Thu, 5 Apr 2012 13:52:08 +0000 (-0400) Subject: added subsample, tiers and processors to tree.shared command. removed bootstrap.share... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=82723a54e6109e2d46d84c10e87727cebd5a18ea added subsample, tiers and processors to tree.shared command. removed bootstrap.shared command. converted consensus command to a class. fixed a few small things related to binLabels. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index b181f5b..0e170bc 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -77,7 +77,6 @@ A7E9B88C12D37EC400DA6239 /* blastdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66412D37EC400DA6239 /* blastdb.cpp */; }; A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66612D37EC400DA6239 /* boneh.cpp */; }; A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66812D37EC400DA6239 /* bootstrap.cpp */; }; - A7E9B88F12D37EC400DA6239 /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */; }; A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66C12D37EC400DA6239 /* bstick.cpp */; }; A7E9B89112D37EC400DA6239 /* calculator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66E12D37EC400DA6239 /* calculator.cpp */; }; A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67012D37EC400DA6239 /* canberra.cpp */; }; @@ -112,7 +111,7 @@ A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */; }; A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */; }; A7E9B8B212D37EC400DA6239 /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */; }; - A7E9B8B312D37EC400DA6239 /* consensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */; }; + A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensus.cpp */; }; A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */; }; A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */; }; A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6BB12D37EC400DA6239 /* coverage.cpp */; }; @@ -527,8 +526,6 @@ A7E9B66712D37EC400DA6239 /* boneh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = boneh.h; sourceTree = ""; }; A7E9B66812D37EC400DA6239 /* bootstrap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrap.cpp; sourceTree = ""; }; A7E9B66912D37EC400DA6239 /* bootstrap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrap.h; sourceTree = ""; }; - A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrapsharedcommand.cpp; sourceTree = ""; }; - A7E9B66B12D37EC400DA6239 /* bootstrapsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrapsharedcommand.h; sourceTree = ""; }; A7E9B66C12D37EC400DA6239 /* bstick.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bstick.cpp; sourceTree = ""; }; A7E9B66D12D37EC400DA6239 /* bstick.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bstick.h; sourceTree = ""; }; A7E9B66E12D37EC400DA6239 /* calculator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calculator.cpp; sourceTree = ""; }; @@ -600,8 +597,8 @@ A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = commandoptionparser.hpp; sourceTree = SOURCE_ROOT; }; A7E9B6B312D37EC400DA6239 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = ""; }; A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = completelinkage.cpp; sourceTree = SOURCE_ROOT; }; - A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensuscommand.cpp; sourceTree = ""; }; - A7E9B6B612D37EC400DA6239 /* consensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensuscommand.h; sourceTree = ""; }; + A7E9B6B512D37EC400DA6239 /* consensus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensus.cpp; sourceTree = ""; }; + A7E9B6B612D37EC400DA6239 /* consensus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensus.h; sourceTree = ""; }; A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensusseqscommand.cpp; sourceTree = ""; }; A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensusseqscommand.h; sourceTree = ""; }; A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = corraxescommand.cpp; sourceTree = ""; }; @@ -1132,6 +1129,8 @@ A7DAAFA3133A254E003956EB /* commandparameter.h */, A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */, A7E9BA4212D3960D00DA6239 /* containers */, + A7E9B6B612D37EC400DA6239 /* consensus.h */, + A7E9B6B512D37EC400DA6239 /* consensus.cpp */, A7AACFBA132FE008003D6C4D /* currentfile.h */, A7E9B6C912D37EC400DA6239 /* display.h */, A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */, @@ -1308,8 +1307,6 @@ A71CB15E130B04A2001E7287 /* anosimcommand.cpp */, A7E9B66112D37EC300DA6239 /* binsequencecommand.h */, A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */, - A7E9B66B12D37EC400DA6239 /* bootstrapsharedcommand.h */, - A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */, A7E9B67312D37EC400DA6239 /* catchallcommand.h */, A7E9B67212D37EC400DA6239 /* catchallcommand.cpp */, A7E9B67B12D37EC400DA6239 /* chimerabellerophoncommand.h */, @@ -1350,8 +1347,6 @@ A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */, A7E9B6AD12D37EC400DA6239 /* collectsharedcommand.h */, A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */, - A7E9B6B612D37EC400DA6239 /* consensuscommand.h */, - A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */, A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */, A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */, A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */, @@ -1979,7 +1974,6 @@ A7E9B88C12D37EC400DA6239 /* blastdb.cpp in Sources */, A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */, A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */, - A7E9B88F12D37EC400DA6239 /* bootstrapsharedcommand.cpp in Sources */, A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */, A7E9B89112D37EC400DA6239 /* calculator.cpp in Sources */, A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */, @@ -2014,7 +2008,7 @@ A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */, A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */, A7E9B8B212D37EC400DA6239 /* completelinkage.cpp in Sources */, - A7E9B8B312D37EC400DA6239 /* consensuscommand.cpp in Sources */, + A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */, A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */, A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */, A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */, diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp deleted file mode 100644 index de96574..0000000 --- a/bootstrapsharedcommand.cpp +++ /dev/null @@ -1,560 +0,0 @@ -/* - * bootstrapsharedcommand.cpp - * Mothur - * - * Created by Sarah Westcott on 4/16/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "bootstrapsharedcommand.h" -#include "sharedjabund.h" -#include "sharedsorabund.h" -#include "sharedjclass.h" -#include "sharedsorclass.h" -#include "sharedjest.h" -#include "sharedsorest.h" -#include "sharedthetayc.h" -#include "sharedthetan.h" -#include "sharedmorisitahorn.h" -#include "sharedbraycurtis.h" - - -//********************************************************************************************************************** -vector BootSharedCommand::setParameters(){ - try { - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - - vector myArray; - for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } - return myArray; - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "setParameters"); - exit(1); - } -} -//********************************************************************************************************************** -string BootSharedCommand::getHelpString(){ - try { - string helpString = ""; - helpString += "The bootstrap.shared command parameters are shared, groups, calc, iters and label. shared is required.\n"; - helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n"; - helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n"; - helpString += "The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n"; - helpString += "Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n"; - helpString += "The default value for groups is all the groups in your groupfile.\n"; - helpString += "The default value for calc is jclass-thetayc. The default for iters is 1000.\n"; - return helpString; - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "getHelpString"); - exit(1); - } -} -//********************************************************************************************************************** -BootSharedCommand::BootSharedCommand(){ - try { - abort = true; calledHelp = true; - setParameters(); - vector tempOutNames; - outputTypes["tree"] = tempOutNames; - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "BootSharedCommand"); - exit(1); - } -} -//********************************************************************************************************************** -BootSharedCommand::BootSharedCommand(string option) { - try { - abort = false; calledHelp = false; - - //allow user to run help - if(option == "help") { help(); abort = true; calledHelp = true; } - else if(option == "citation") { citation(); abort = true; calledHelp = true;} - - else { - vector myArray = setParameters(); - - OptionParser parser(option); - map parameters = parser.getParameters(); - map::iterator it; - - ValidParameters validParameter; - - //check to make sure all parameters are valid for command - for (it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } - } - - //initialize outputTypes - vector tempOutNames; - outputTypes["tree"] = tempOutNames; - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); - if (inputDir == "not found"){ inputDir = ""; } - else { - string path; - it = parameters.find("shared"); - //user has given a template file - if(it != parameters.end()){ - path = m->hasPath(it->second); - //if the user has not given a path then, add inputdir. else leave path alone. - if (path == "") { parameters["shared"] = inputDir + it->second; } - } - } - - sharedfile = validParameter.validFile(parameters, "shared", true); - if (sharedfile == "not found") { - sharedfile = m->getSharedFile(); - if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current shared file and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } - } - else if (sharedfile == "not open") { sharedfile = ""; abort = true; } - else { m->setSharedFile(sharedfile); } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ - outputDir = ""; - outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it - } - - //check for optional parameter and set defaults - // ...at some point should added some additional type checking... - label = validParameter.validFile(parameters, "label", false); - if (label == "not found") { label = ""; } - else { - if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } - else { allLines = 1; } - } - - groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; } - else { - m->splitAtDash(groups, Groups); - m->setGroups(Groups); - } - - calc = validParameter.validFile(parameters, "calc", false); - if (calc == "not found") { calc = "jclass-thetayc"; } - else { - if (calc == "default") { calc = "jclass-thetayc"; } - } - m->splitAtDash(calc, Estimators); - if (m->inUsersGroups("citation", Estimators)) { - ValidCalculators validCalc; validCalc.printCitations(Estimators); - //remove citation from list of calcs - for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } - } - - string temp; - temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - m->mothurConvert(temp, iters); - - if (abort == false) { - - //used in tree constructor - m->runParse = false; - - validCalculator = new ValidCalculators(); - - - //NOTE: if you add a calc to this if statement you must add it to the setParameters function - //or it will not be visible in the gui - int i; - for (i=0; iisValidCalculator("boot", Estimators[i]) == true) { - if (Estimators[i] == "jabund") { - treeCalculators.push_back(new JAbund()); - }else if (Estimators[i] == "sorabund") { - treeCalculators.push_back(new SorAbund()); - }else if (Estimators[i] == "jclass") { - treeCalculators.push_back(new Jclass()); - }else if (Estimators[i] == "sorclass") { - treeCalculators.push_back(new SorClass()); - }else if (Estimators[i] == "jest") { - treeCalculators.push_back(new Jest()); - }else if (Estimators[i] == "sorest") { - treeCalculators.push_back(new SorEst()); - }else if (Estimators[i] == "thetayc") { - treeCalculators.push_back(new ThetaYC()); - }else if (Estimators[i] == "thetan") { - treeCalculators.push_back(new ThetaN()); - }else if (Estimators[i] == "morisitahorn") { - treeCalculators.push_back(new MorHorn()); - }else if (Estimators[i] == "braycurtis") { - treeCalculators.push_back(new BrayCurtis()); - } - } - } - - delete validCalculator; - - ofstream* tempo; - for (int i=0; i < treeCalculators.size(); i++) { - tempo = new ofstream; - out.push_back(tempo); - } - - //make a vector of tree* for each calculator - trees.resize(treeCalculators.size()); - } - } - - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "BootSharedCommand"); - exit(1); - } -} -//********************************************************************************************************************** -BootSharedCommand::~BootSharedCommand(){} -//********************************************************************************************************************** - -int BootSharedCommand::execute(){ - try { - - if (abort == true) { if (calledHelp) { return 0; } return 2; } - - m->mothurOut("bootstrap.shared command is no longer available."); m->mothurOutEndLine(); - /* - //read first line - input = new InputData(sharedfile, "sharedfile"); - order = input->getSharedOrderVector(); - string lastLabel = order->getLabel(); - - //if the users entered no valid calculators don't execute command - if (treeCalculators.size() == 0) { return 0; } - - //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. - set processedLabels; - set userLabels = labels; - - //set users groups - util = new SharedUtil(); - util->setGroups(m->Groups, m->namesOfGroups, "treegroup"); - - numGroups = m->Groups.size(); - - //clear globaldatas old tree names if any - globaldata->Treenames.clear(); - - //fills globaldatas tree names - globaldata->Treenames = m->Groups; - - //create treemap class from groupmap for tree class to use - tmap = new TreeMap(); - tmap->makeSim(globaldata->gGroupmap); - globaldata->gTreemap = tmap; - - while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; } - - if(allLines == 1 || labels.count(order->getLabel()) == 1){ - - m->mothurOut(order->getLabel()); m->mothurOutEndLine(); - int error = process(order); - if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; } - - processedLabels.insert(order->getLabel()); - userLabels.erase(order->getLabel()); - } - - //you have a label the user want that is smaller than this label and the last label has not already been processed - if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { - string saveLabel = order->getLabel(); - - delete order; - order = input->getSharedOrderVector(lastLabel); - m->mothurOut(order->getLabel()); m->mothurOutEndLine(); - int error = process(order); - if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; } - - processedLabels.insert(order->getLabel()); - userLabels.erase(order->getLabel()); - - //restore real lastlabel to save below - order->setLabel(saveLabel); - } - - - lastLabel = order->getLabel(); - - //get next line to process - delete order; - order = input->getSharedOrderVector(); - } - - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; } - - //output error messages about any remaining user labels - set::iterator it; - bool needToRun = false; - for (it = userLabels.begin(); it != userLabels.end(); it++) { - m->mothurOut("Your file does not include the label " + *it); - if (processedLabels.count(lastLabel) != 1) { - m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); - needToRun = true; - }else { - m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); - } - } - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; } - - //run last line if you need to - if (needToRun == true) { - if (order != NULL) { delete order; } - order = input->getSharedOrderVector(lastLabel); - m->mothurOut(order->getLabel()); m->mothurOutEndLine(); - int error = process(order); - if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; } - - delete order; - - } - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear();delete input; delete util; return 0; } - - //reset groups parameter - globaldata->Groups.clear(); - - //set first tree file as new current treefile - string currentTree = ""; - itTypes = outputTypes.find("tree"); - if (itTypes != outputTypes.end()) { - if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); } - } - - delete input; - delete util; - - m->mothurOutEndLine(); - m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } - m->mothurOutEndLine(); -*/ - - return 0; - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "execute"); - exit(1); - } -} -//********************************************************************************************************************** - -int BootSharedCommand::createTree(ostream* out, Tree* t){ - try { - /* - //do merges and create tree structure by setting parents and children - //there are numGroups - 1 merges to do - for (int i = 0; i < (numGroups - 1); i++) { - - if (m->control_pressed) { return 1; } - - float largest = -1000.0; - int row, column; - //find largest value in sims matrix by searching lower triangle - for (int j = 1; j < simMatrix.size(); j++) { - for (int k = 0; k < j; k++) { - if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; } - } - } - - //set non-leaf node info and update leaves to know their parents - //non-leaf - t->tree[numGroups + i].setChildren(index[row], index[column]); - - //parents - t->tree[index[row]].setParent(numGroups + i); - t->tree[index[column]].setParent(numGroups + i); - - //blength = distance / 2; - float blength = ((1.0 - largest) / 2); - - //branchlengths - t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves()); - t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves()); - - //set your length to leaves to your childs length plus branchlength - t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength()); - - - //update index - index[row] = numGroups+i; - index[column] = numGroups+i; - - //zero out highest value that caused the merge. - simMatrix[row][column] = -1000.0; - simMatrix[column][row] = -1000.0; - - //merge values in simsMatrix - for (int n = 0; n < simMatrix.size(); n++) { - //row becomes merge of 2 groups - simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2; - simMatrix[n][row] = simMatrix[row][n]; - //delete column - simMatrix[column][n] = -1000.0; - simMatrix[n][column] = -1000.0; - } - } - - //adjust tree to make sure root to tip length is .5 - int root = t->findRoot(); - t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves())); - - //assemble tree - t->assembleTree(); - - if (m->control_pressed) { return 1; } - - //print newick file - t->print(*out);*/ - - return 0; - - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "createTree"); - exit(1); - } -} -/***********************************************************/ -void BootSharedCommand::printSims() { - try { - m->mothurOut("simsMatrix"); m->mothurOutEndLine(); - for (int k = 0; k < simMatrix.size(); k++) { - for (int n = 0; n < simMatrix.size(); n++) { - m->mothurOut(toString(simMatrix[k][n])); m->mothurOut("\t"); - } - m->mothurOutEndLine(); - } - - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "printSims"); - exit(1); - } -} -/***********************************************************/ -int BootSharedCommand::process(SharedOrderVector* order) { - try{ - /* EstOutput data; - vector subset; - - //open an ostream for each calc to print to - for (int z = 0; z < treeCalculators.size(); z++) { - //create a new filename - outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre"; - m->openOutputFile(outputFile, *(out[z])); - outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); - } - - m->mothurOut("Generating bootstrap trees..."); cout.flush(); - - //create a file for each calculator with the 1000 trees in it. - for (int p = 0; p < iters; p++) { - - if (m->control_pressed) { return 1; } - - util->getSharedVectorswithReplacement(m->Groups, lookup, order); //fills group vectors from order vector. - - - //for each calculator - for(int i = 0 ; i < treeCalculators.size(); i++) { - - if (m->control_pressed) { return 1; } - - //initialize simMatrix - simMatrix.clear(); - simMatrix.resize(numGroups); - for (int o = 0; o < simMatrix.size(); o++) { - for (int j = 0; j < simMatrix.size(); j++) { - simMatrix[o].push_back(0.0); - } - } - - //initialize index - index.clear(); - for (int g = 0; g < numGroups; g++) { index[g] = g; } - - for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare - for (int l = k; l < lookup.size(); l++) { - if (k != l) { //we dont need to similiarity of a groups to itself - subset.clear(); //clear out old pair of sharedrabunds - //add new pair of sharedrabunds - subset.push_back(lookup[k]); subset.push_back(lookup[l]); - - //get estimated similarity between 2 groups - data = treeCalculators[i]->getValues(subset); //saves the calculator outputs - //save values in similarity matrix - simMatrix[k][l] = data[0]; - simMatrix[l][k] = data[0]; - } - } - } - - tempTree = new Tree(); - - if (m->control_pressed) { delete tempTree; return 1; } - - //creates tree from similarity matrix and write out file - createTree(out[i], tempTree); - - if (m->control_pressed) { delete tempTree; return 1; } - - //save trees for consensus command. - trees[i].push_back(tempTree); - } - } - - m->mothurOut("\tDone."); m->mothurOutEndLine(); - - - //create consensus trees for each bootstrapped tree set - for (int k = 0; k < trees.size(); k++) { - - m->mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); m->mothurOutEndLine(); - - if (m->control_pressed) { return 1; } - - //set global data to calc trees - globaldata->gTree = trees[k]; - - string filename = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + treeCalculators[k]->getName() + ".boot" + order->getLabel(); - consensus = new ConcensusCommand(filename); - consensus->execute(); - delete consensus; - - outputNames.push_back(filename + ".cons.pairs"); - outputNames.push_back(filename + ".cons.tre"); - - } - - - - //close ostream for each calc - for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); } - */ - return 0; - - } - catch(exception& e) { - m->errorOut(e, "BootSharedCommand", "process"); - exit(1); - } -} -/***********************************************************/ - - - diff --git a/bootstrapsharedcommand.h b/bootstrapsharedcommand.h deleted file mode 100644 index d5b48e8..0000000 --- a/bootstrapsharedcommand.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef BOOTSTRAPSHAREDCOMMAND_H -#define BOOTSTRAPSHAREDCOMMAND_H - -/* - * bootstrapsharedcommand.h - * Mothur - * - * Created by Sarah Westcott on 4/16/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "command.hpp" -#include "sharedordervector.h" -#include "inputdata.h" -#include "groupmap.h" -#include "validcalculator.h" -#include "tree.h" -#include "treemap.h" -#include "sharedutilities.h" -#include "consensuscommand.h" - -class BootSharedCommand : public Command { - -public: - BootSharedCommand(string); - BootSharedCommand(); - ~BootSharedCommand(); - - vector setParameters(); - string getCommandName() { return "bootstrap.shared"; } - string getCommandCategory() { return "Hidden"; } - string getHelpString(); - string getCitation() { return "no citation"; } - string getDescription() { return "bootstrap.shared"; } - - - int execute(); - void help() { m->mothurOut(getHelpString()); } - -private: - int createTree(ostream*, Tree*); - void printSims(); - int process(SharedOrderVector*); - - SharedUtil* util; - TreeMap* tmap; - Tree* t; - Tree* tempTree; - ConcensusCommand* consensus; - vector< vector > trees; //a vector of trees for each calculator chosen - vector treeCalculators; - vector out; - vector< vector > simMatrix; - map index; //maps row in simMatrix to vector index in the tree - InputData* input; - ValidCalculators* validCalculator; - SharedOrderVector* order; - vector lookup; - - bool abort, allLines; - set labels; //holds labels to be used - string outputFile, calc, groups, label, outputDir, sharedfile; - int numGroups, iters; - vector Estimators, Groups, outputNames; //holds estimators to be used -}; - - -#endif - - diff --git a/commandfactory.cpp b/commandfactory.cpp index b61c2d2..0c9504d 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -36,8 +36,6 @@ #include "binsequencecommand.h" #include "getoturepcommand.h" #include "treegroupscommand.h" -#include "bootstrapsharedcommand.h" -//#include "consensuscommand.h" #include "distancecommand.h" #include "aligncommand.h" #include "matrixoutputcommand.h" @@ -191,8 +189,6 @@ CommandFactory::CommandFactory(){ commands["get.label"] = "get.label"; commands["get.sabund"] = "get.sabund"; commands["get.rabund"] = "get.rabund"; - commands["bootstrap.shared"] = "bootstrap.shared"; - //commands["consensus"] = "consensus"; commands["help"] = "help"; commands["reverse.seqs"] = "reverse.seqs"; commands["trim.seqs"] = "trim.seqs"; @@ -359,8 +355,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "get.oturep") { command = new GetOTURepCommand(optionString); } else if(commandName == "tree.shared") { command = new TreeGroupCommand(optionString); } else if(commandName == "dist.shared") { command = new MatrixOutputCommand(optionString); } - else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(optionString); } - else if(commandName == "consensus") { command = new ConcensusCommand(optionString); } else if(commandName == "dist.seqs") { command = new DistanceCommand(optionString); } else if(commandName == "align.seqs") { command = new AlignCommand(optionString); } else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(optionString); } @@ -510,8 +504,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "get.oturep") { pipecommand = new GetOTURepCommand(optionString); } else if(commandName == "tree.shared") { pipecommand = new TreeGroupCommand(optionString); } else if(commandName == "dist.shared") { pipecommand = new MatrixOutputCommand(optionString); } - else if(commandName == "bootstrap.shared") { pipecommand = new BootSharedCommand(optionString); } - else if(commandName == "consensus") { pipecommand = new ConcensusCommand(optionString); } else if(commandName == "dist.seqs") { pipecommand = new DistanceCommand(optionString); } else if(commandName == "align.seqs") { pipecommand = new AlignCommand(optionString); } else if(commandName == "summary.seqs") { pipecommand = new SeqSummaryCommand(optionString); } @@ -648,8 +640,6 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.oturep") { shellcommand = new GetOTURepCommand(); } else if(commandName == "tree.shared") { shellcommand = new TreeGroupCommand(); } else if(commandName == "dist.shared") { shellcommand = new MatrixOutputCommand(); } - else if(commandName == "bootstrap.shared") { shellcommand = new BootSharedCommand(); } - else if(commandName == "consensus") { shellcommand = new ConcensusCommand(); } else if(commandName == "dist.seqs") { shellcommand = new DistanceCommand(); } else if(commandName == "align.seqs") { shellcommand = new AlignCommand(); } else if(commandName == "summary.seqs") { shellcommand = new SeqSummaryCommand(); } diff --git a/consensus.cpp b/consensus.cpp new file mode 100644 index 0000000..04671f8 --- /dev/null +++ b/consensus.cpp @@ -0,0 +1,434 @@ +/* + * consensuscommand.cpp + * Mothur + * + * Created by Sarah Westcott on 4/29/09. + * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. + * + */ + +#include "consensus.h" + +//********************************************************************************************************************** +Tree* Consensus::getTree(vector& t, TreeMap* tmap){ + try { + numNodes = t[0]->getNumNodes(); + numLeaves = t[0]->getNumLeaves(); + numTrees = t.size(); + + //get the possible pairings + getSets(t); + + if (m->control_pressed) { return 0; } + + consensusTree = new Tree(tmap); + + it2 = nodePairs.find(treeSet); + + nodePairsInTree[treeSet] = it2->second; + + //erase treeset because you are adding it + nodePairs.erase(treeSet); + + //set count to numLeaves; + count = numLeaves; + + buildConsensusTree(treeSet); + + if (m->control_pressed) { delete consensusTree; return 0; } + + consensusTree->assembleTree(); + + if (m->control_pressed) { delete consensusTree; return 0; } + + return consensusTree; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Consensus", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +int Consensus::printSetsInfo() { + try { + //open file for pairing not included in the tree + string notIncluded = "cons.pairs"; + ofstream out2; + m->openOutputFile(notIncluded, out2); + + //output species in order + out2 << "Species in Order: " << endl << endl; + for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; } + + //output sets included + out2 << endl << "Sets included in the consensus tree:" << endl << endl; + + if (m->control_pressed) { return 0; } + + vector temp; + for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) { + + if (m->control_pressed) { return 0; } + + //only output pairs not leaves + if (it2->first.size() > 1) { + temp.clear(); + //initialize temp to all "." + temp.resize(treeSet.size(), "."); + + //set the spot in temp that represents it2->first[i] to a "*" + for (int i = 0; i < it2->first.size(); i++) { + //find spot + int index = findSpot(it2->first[i]); + temp[index] = "*"; + //temp[index] = it2->first[i] + " "; + } + + //output temp + for (int j = 0; j < temp.size(); j++) { + out2 << temp[j]; + } + out2 << '\t' << it2->second << endl; + } + } + + //output sets not included + out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl; + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + + if (m->control_pressed) { return 0; } + + temp.clear(); + //initialize temp to all "." + temp.resize(treeSet.size(), "."); + + //set the spot in temp that represents it2->first[i] to a "*" + for (int i = 0; i < it2->first.size(); i++) { + //find spot + int index = findSpot(it2->first[i]); + temp[index] = "*"; + } + + //output temp + for (int j = 0; j < temp.size(); j++) { + out2 << temp[j]; + } + out2 << '\t' << it2->second << endl; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Consensus", "printSetsInfo"); + exit(1); + } +} +//********************************************************************************************************************** +int Consensus::buildConsensusTree(vector nodeSet) { + try { + vector leftChildSet; + vector rightChildSet; + + if (m->control_pressed) { return 1; } + + //if you are at a leaf + if (nodeSet.size() == 1) { + //return the vector index of the leaf you are at + return consensusTree->getIndex(nodeSet[0]); + //terminate recursion + }else if (count == numNodes) { return 0; } + else { + //finds best child pair + leftChildSet = getNextAvailableSet(nodeSet, rightChildSet); + int left = buildConsensusTree(leftChildSet); + int right = buildConsensusTree(rightChildSet); + consensusTree->tree[count].setChildren(left, right); + consensusTree->tree[count].setLabel((nodePairsInTree[nodeSet]/(float)numTrees)); + consensusTree->tree[left].setParent(count); + consensusTree->tree[right].setParent(count); + count++; + return (count-1); + } + + } + catch(exception& e) { + m->errorOut(e, "Consensus", "buildConcensusTree"); + exit(1); + } +} + +//********************************************************************************************************************** +int Consensus::getSets(vector& t) { + try { + vector temp; + treeSet.clear(); + + //for each tree add the possible pairs you find + for (int i = 0; i < t.size(); i++) { + + //for each non-leaf node get descendant info. + for (int j = numLeaves; j < numNodes; j++) { + + if (m->control_pressed) { return 1; } + + temp.clear(); + //go through pcounts and pull out descendants + for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) { + temp.push_back(it->first); + } + + //sort temp + sort(temp.begin(), temp.end()); + + it2 = nodePairs.find(temp); + if (it2 != nodePairs.end()) { + nodePairs[temp]++; + }else{ + nodePairs[temp] = 1; + } + } + } + + + //add each leaf to terminate recursion in consensus + //you want the leaves in there but with insignifigant sightings value so it is added last + //for each leaf node get descendant info. + for (int j = 0; j < numLeaves; j++) { + + if (m->control_pressed) { return 1; } + + //only need the first one since leaves have no descendants but themselves + it = t[0]->tree[j].pcount.begin(); + temp.clear(); temp.push_back(it->first); + + //fill treeSet + treeSet.push_back(it->first); + + //add leaf to list but with sighting value less then all non leaf pairs + nodePairs[temp] = 0; + } + + sort(treeSet.begin(), treeSet.end()); + + + map< vector, int> nodePairsCopy = nodePairs; + + + //set initial rating on pairs to sightings + subgroup sightings + while (nodePairsCopy.size() != 0) { + if (m->control_pressed) { return 1; } + + vector smallOne = getSmallest(nodePairsCopy); + + int subgrouprate = getSubgroupRating(smallOne); + + nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate; + + nodePairsCopy.erase(smallOne); + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Consensus", "getSets"); + exit(1); + } +} +//********************************************************************************************************************** +vector Consensus::getSmallest(map< vector, int> nodes) { + try{ + vector smallest = nodes.begin()->first; + int smallsize = smallest.size(); + + for(it2 = nodes.begin(); it2 != nodes.end(); it2++) { + if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; } + } + + return smallest; + } + catch(exception& e) { + m->errorOut(e, "Consensus", "getSmallest"); + exit(1); + } +} + +//********************************************************************************************************************** +vector Consensus::getNextAvailableSet(vector bigset, vector& rest) { + try { +//cout << "new call " << endl << endl << endl; + vector largest; largest.clear(); + rest.clear(); + + //if you are just 2 groups + if (bigset.size() == 2) { + rest.push_back(bigset[0]); + largest.push_back(bigset[1]); + }else{ + rest = bestSplit[bigset][0]; + largest = bestSplit[bigset][1]; + } + + + //save for printing out later and for branch lengths + nodePairsInTree[rest] = nodePairs[rest]; + + //delete whatever set you return because it is no longer available + nodePairs.erase(rest); + + //save for printing out later and for branch lengths + nodePairsInTree[largest] = nodePairs[largest]; + + //delete whatever set you return because it is no longer available + nodePairs.erase(largest); + + return largest; + + } + catch(exception& e) { + m->errorOut(e, "Consensus", "getNextAvailableSet"); + exit(1); + } +} + +/**********************************************************************************************************************/ +int Consensus::getSubgroupRating(vector group) { + try { + map< vector, int>::iterator ittemp; + map< vector< vector > , int >::iterator it3; + int rate = 0; + + // ***********************************************************************************// + //1. this function must be called passing it littlest sets to biggest + // since it the rating is made from your sighting plus you best splits rating + //2. it saves the top pair to use later + // ***********************************************************************************// + + + if (group.size() < 3) { return rate; } + + map< vector, int> possiblePairing; //this is all the subsets of group + + //go through the sets + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + //are you a subset of bigset, then save in possiblePairings + if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; } + } + + map< vector< vector > , int > rating; + + while (possiblePairing.size() != 0) { + + it2 = possiblePairing.begin(); + vector temprest = getRestSet(group, it2->first); + + //is the rest a set available in possiblePairings + ittemp = possiblePairing.find(temprest); + if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map + vector< vector > temprate; + temprate.push_back(it2->first); temprate.push_back(temprest); + + rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]); + + //erase so you dont add 1,2 and 2,1. + possiblePairing.erase(temprest); + } + + possiblePairing.erase(it2); + } + + + it3 = rating.begin(); + rate = it3->second; + vector< vector > topPair = it3->first; + + //choose the split with the best rating + for (it3 = rating.begin(); it3 != rating.end(); it3++) { + + if (it3->second > rate) { rate = it3->second; topPair = it3->first; } + } + + bestSplit[group] = topPair; + + return rate; + } + catch(exception& e) { + m->errorOut(e, "Consensus", "getSubgroupRating"); + exit(1); + } +} + +//********************************************************************************************************************** +vector Consensus::getRestSet(vector bigset, vector subset) { + try { + vector rest; + + for (int i = 0; i < bigset.size(); i++) { + bool inSubset = false; + for (int j = 0; j < subset.size(); j++) { + if (bigset[i] == subset[j]) { inSubset = true; break; } + } + + //its not in the subset so put it in the rest + if (inSubset == false) { rest.push_back(bigset[i]); } + } + + return rest; + + } + catch(exception& e) { + m->errorOut(e, "Consensus", "getRestSet"); + exit(1); + } +} + +//********************************************************************************************************************** +bool Consensus::isSubset(vector bigset, vector subset) { + try { + + + if (subset.size() > bigset.size()) { return false; } + + //check if each guy in suset is also in bigset + for (int i = 0; i < subset.size(); i++) { + bool match = false; + for (int j = 0; j < bigset.size(); j++) { + if (subset[i] == bigset[j]) { match = true; break; } + } + + //you have a guy in subset that had no match in bigset + if (match == false) { return false; } + } + + return true; + + } + catch(exception& e) { + m->errorOut(e, "Consensus", "isSubset"); + exit(1); + } +} +//********************************************************************************************************************** +int Consensus::findSpot(string node) { + try { + int spot; + + //check if each guy in suset is also in bigset + for (int i = 0; i < treeSet.size(); i++) { + if (treeSet[i] == node) { spot = i; break; } + } + + return spot; + + } + catch(exception& e) { + m->errorOut(e, "Consensus", "findSpot"); + exit(1); + } +} +//********************************************************************************************************************** + + + + diff --git a/consensus.h b/consensus.h new file mode 100644 index 0000000..630ee8d --- /dev/null +++ b/consensus.h @@ -0,0 +1,61 @@ +#ifndef CONCENSUS_H +#define CONCENSUS_H +/* + * consensus.h + * Mothur + * + * Created by Sarah Westcott on 4/29/09. + * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved. + * + */ + + +#include "tree.h" +#include "treemap.h" + +//NOTE: This class assumes all leaf nodes have 1 member. +// Mothur does allow for names files with trees which would make a tree with multiple members at one leaf. +// This class is currently only called internally by commands that have leaf node containing only 1 member. +// But if in the future, this changes things will need to be reworked in getSets and buildConsensus. + + +class Consensus { + +public: + Consensus() { m = MothurOut::getInstance(); } + ~Consensus() {} + + Tree* getTree(vector&, TreeMap*); + +private: + MothurOut* m; + Tree* consensusTree; + + vector treeSet; //set containing all members of the tree to start recursion. filled in getSets(). + map< vector, int > nodePairs; //, vector< vector > > bestSplit; //maps a group to its best split + map< vector, int > nodePairsInitialRate; + map< vector, int > nodePairsInTree; + map::iterator it; + map< vector, int>::iterator it2; + string outputFile, notIncluded, filename; + int numNodes, numLeaves, count, numTrees; //count is the next available spot in the tree vector + vector outputNames; + + int getSets(vector&); + int getSubgroupRating(vector); + vector getSmallest(map< vector, int>); + vector getNextAvailableSet(vector, vector&); + vector getRestSet(vector, vector); + bool isSubset(vector, vector); + int findSpot(string); + int buildConsensusTree(vector); + int printSetsInfo(); + +}; + +#endif + diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 73c38f7..0bfb009 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -65,7 +65,6 @@ MatrixOutputCommand::MatrixOutputCommand(){ setParameters(); vector tempOutNames; outputTypes["phylip"] = tempOutNames; - outputTypes["subsample"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand"); @@ -100,7 +99,6 @@ MatrixOutputCommand::MatrixOutputCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["phylip"] = tempOutNames; - outputTypes["subsample"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -459,10 +457,7 @@ void MatrixOutputCommand::printSims(ostream& out, vector< vector >& simM /***********************************************************/ int MatrixOutputCommand::process(vector thisLookup){ try { - EstOutput data; - vector subset; vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files - vector< vector > calcDists; calcDists.resize(matrixCalculators.size()); for (int thisIter = 0; thisIter < iters; thisIter++) { diff --git a/otuassociationcommand.cpp b/otuassociationcommand.cpp index b520ca6..eeefb41 100644 --- a/otuassociationcommand.cpp +++ b/otuassociationcommand.cpp @@ -308,7 +308,8 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; + if (m->binLabelsInFile.size() != 0) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; } + else { out << i+1 << '\t' << k+1 << '\t' << coef << '\t' << sig << endl; } } } @@ -436,7 +437,8 @@ int OTUAssociationCommand::process(vector& lookup){ else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); } else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; } - out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << coef << '\t' << sig << endl; + if (m->binLabelsInFile.size() != 0) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; } + else { out << i+1 << '\t' << k+1 << '\t' << coef << '\t' << sig << endl; } } } diff --git a/pcrseqscommand.h b/pcrseqscommand.h index 03092bc..45ce6f3 100644 --- a/pcrseqscommand.h +++ b/pcrseqscommand.h @@ -315,11 +315,11 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; }else { if (pDataArray->keepdots) { - currSeq.filterToPos(start); - currSeq.filterFromPos(end); + currSeq.filterToPos(pDataArray->start); + currSeq.filterFromPos(pDataArray->end); }else { - string seqString = currSeq.getAligned().substr(0, end); - seqString = seqString.substr(start); + string seqString = currSeq.getAligned().substr(0, pDataArray->end); + seqString = seqString.substr(pDataArray->start); currSeq.setAligned(seqString); } } @@ -331,17 +331,17 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ if (pDataArray->end != -1) { if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; } else { - if (pDataArray->keepdots) { currSeq.filterFromPos(end); } + if (pDataArray->keepdots) { currSeq.filterFromPos(pDataArray->end); } else { - string seqString = currSeq.getAligned().substr(0, end); + string seqString = currSeq.getAligned().substr(0, pDataArray->end); currSeq.setAligned(seqString); } } } if (pDataArray->start != -1) { - if (pDataArray->keepdots) { currSeq.filterToPos(start); } + if (pDataArray->keepdots) { currSeq.filterToPos(pDataArray->start); } else { - string seqString = currSeq.getAligned().substr(start); + string seqString = currSeq.getAligned().substr(pDataArray->start); currSeq.setAligned(seqString); } } diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp index 8abf920..9f6c156 100644 --- a/sharedrabundfloatvector.cpp +++ b/sharedrabundfloatvector.cpp @@ -76,7 +76,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma }else { label = m->saveNextLabel; } //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling - m->currentBinLabels = m-> binLabelsInFile; + m->currentBinLabels = m->binLabelsInFile; //read in first row since you know there is at least 1 group. f >> groupN >> num; @@ -108,6 +108,8 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma //read the rest of the groups info in while ((nextLabel == holdLabel) && (f.eof() != true)) { f >> groupN >> num; + + if (num != 1000) { break; } count++; allGroups.push_back(groupN); diff --git a/sharedrabundfloatvector.h b/sharedrabundfloatvector.h index 12df9e8..965b03a 100644 --- a/sharedrabundfloatvector.h +++ b/sharedrabundfloatvector.h @@ -23,7 +23,6 @@ An individual which knows the OTU from which it came, the group it is in and its abundance. */ -class GlobalData; class SharedRAbundFloatVector : public DataVector { diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index 8c5aa4b..0150a7a 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -8,47 +8,8 @@ */ #include "treegroupscommand.h" -#include "sharedsobscollectsummary.h" -#include "sharedchao1.h" -#include "sharedace.h" -#include "sharednseqs.h" -#include "sharedjabund.h" -#include "sharedsorabund.h" -#include "sharedjclass.h" -#include "sharedsorclass.h" -#include "sharedjest.h" -#include "sharedsorest.h" -#include "sharedthetayc.h" -#include "sharedthetan.h" -#include "sharedkstest.h" -#include "whittaker.h" -#include "sharedochiai.h" -#include "sharedanderbergs.h" -#include "sharedkulczynski.h" -#include "sharedkulczynskicody.h" -#include "sharedlennon.h" -#include "sharedmorisitahorn.h" -#include "sharedbraycurtis.h" -#include "sharedjackknife.h" -#include "whittaker.h" -#include "odum.h" -#include "canberra.h" -#include "structeuclidean.h" -#include "structchord.h" -#include "hellinger.h" -#include "manhattan.h" -#include "structpearson.h" -#include "soergel.h" -#include "spearman.h" -#include "structkulczynski.h" -#include "structchi2.h" -#include "speciesprofile.h" -#include "hamming.h" -#include "gower.h" -#include "memchi2.h" -#include "memchord.h" -#include "memeuclidean.h" -#include "mempearson.h" +#include "subsample.h" +#include "consensus.h" //********************************************************************************************************************** vector TreeGroupCommand::setParameters(){ @@ -56,13 +17,17 @@ vector TreeGroupCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared); CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname); - CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn); - CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff); + CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn); + CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); + CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample); + CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc); - CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput); + + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -81,12 +46,14 @@ string TreeGroupCommand::getHelpString(){ string helpString = ""; ValidCalculators validCalculator; helpString += "The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n"; - helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision and label.\n"; + helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision, processors, subsample, iters and label.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n"; helpString += "The group names are separated by dashes. The label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n"; helpString += "The phylip or column parameter are required if you do not provide a sharedfile, and only one may be used. If you use a column file the name filename is required. \n"; helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n"; helpString += "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n"; + helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n"; + helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a shared file.\n"; helpString += "Example tree.shared(groups=A-B-C, calc=jabund-sorabund).\n"; helpString += "The default value for groups is all the groups in your groupfile.\n"; helpString += "The default value for calc is jclass-thetayc.\n"; @@ -263,6 +230,24 @@ TreeGroupCommand::TreeGroupCommand(string option) { m->mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + m->mothurConvert(temp, processors); + + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + m->mothurConvert(temp, iters); + + temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; } + if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; } + else { + if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later + else { subsample = false; } + } + + if (subsample == false) { iters = 1; } + + if (subsample && (format != "sharedfile")) { m->mothurOut("[ERROR]: the subsample parameter can only be used with a shared file.\n"); abort=true; } + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; @@ -452,15 +437,17 @@ int TreeGroupCommand::execute(){ if (m->control_pressed) { return 0; } - makeSimsDist(); + vector< vector > matrix = makeSimsDist(); if (m->control_pressed) { return 0; } //create a new filename - outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre"; outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); - createTree(); + Tree* newTree = createTree(matrix); + + if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; } if (m->control_pressed) { return 0; } @@ -492,17 +479,21 @@ int TreeGroupCommand::execute(){ } //********************************************************************************************************************** -int TreeGroupCommand::createTree(){ +Tree* TreeGroupCommand::createTree(vector< vector >& simMatrix){ try { //create tree t = new Tree(tmap); + + //initialize index + map index; //maps row in simMatrix to vector index in the tree + for (int g = 0; g < numGroups; g++) { index[g] = g; } //do merges and create tree structure by setting parents and children //there are numGroups - 1 merges to do for (int i = 0; i < (numGroups - 1); i++) { float largest = -1000.0; - if (m->control_pressed) { delete t; return 1; } + if (m->control_pressed) { delete t; t = NULL; return t; } int row, column; //find largest value in sims matrix by searching lower triangle @@ -557,17 +548,9 @@ int TreeGroupCommand::createTree(){ //assemble tree t->assembleTree(); - if (m->control_pressed) { delete t; return 1; } - - //print newick file - t->createNewickFile(outputFile); - - //delete tree - delete t; + if (m->control_pressed) { delete t; t = NULL; return t; } - if (m->control_pressed) { m->mothurRemove(outputFile); outputNames.pop_back(); return 1; } - - return 0; + return t; } catch(exception& e) { @@ -576,16 +559,28 @@ int TreeGroupCommand::createTree(){ } } /***********************************************************/ -void TreeGroupCommand::printSims(ostream& out) { +int TreeGroupCommand::writeTree(string out, Tree* T) { try { - //output column headers - //out << '\t'; - //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; } - //out << endl; + //print newick file + t->createNewickFile(out); + if (m->control_pressed) { m->mothurRemove(out); outputNames.pop_back(); return 1; } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "TreeGroupCommand", "printSims"); + exit(1); + } +} + +/***********************************************************/ +void TreeGroupCommand::printSims(ostream& out, vector< vector >& simMatrix) { + try { - for (int m = 0; m < simMatrix.size(); m++) { + for (int m = 0; m < simMatrix.size(); m++) { //out << lookup[m]->getGroup() << '\t'; for (int n = 0; n < simMatrix.size(); n++) { out << simMatrix[m][n] << '\t'; @@ -600,16 +595,12 @@ void TreeGroupCommand::printSims(ostream& out) { } } /***********************************************************/ -int TreeGroupCommand::makeSimsDist() { +vector< vector > TreeGroupCommand::makeSimsDist() { try { numGroups = list->size(); - //initialize index - index.clear(); - for (int g = 0; g < numGroups; g++) { index[g] = g; } - //initialize simMatrix - simMatrix.clear(); + vector< vector > simMatrix; simMatrix.resize(numGroups); for (int k = 0; k < simMatrix.size(); k++) { for (int j = 0; j < simMatrix.size(); j++) { @@ -624,11 +615,11 @@ int TreeGroupCommand::makeSimsDist() { simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0); simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0); - if (m->control_pressed) { return 1; } + if (m->control_pressed) { return simMatrix; } } - return 0; + return simMatrix; } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "makeSimsDist"); @@ -639,6 +630,40 @@ int TreeGroupCommand::makeSimsDist() { /***********************************************************/ int TreeGroupCommand::makeSimsShared() { try { + + numGroups = lookup.size(); + lines.resize(processors); + for (int i = 0; i < processors; i++) { + lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); + lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); + } + + if (subsample) { + if (subsampleSize == -1) { //user has not set size, set size = smallest samples size + subsampleSize = lookup[0]->getNumSeqs(); + for (int i = 1; i < lookup.size(); i++) { + int thisSize = lookup[i]->getNumSeqs(); + + if (thisSize < subsampleSize) { subsampleSize = thisSize; } + } + }else { + m->clearGroups(); + Groups.clear(); + vector temp; + for (int i = 0; i < lookup.size(); i++) { + if (lookup[i]->getNumSeqs() < subsampleSize) { + m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); + delete lookup[i]; + }else { + Groups.push_back(lookup[i]->getGroup()); + temp.push_back(lookup[i]); + } + } + lookup = temp; + m->setGroups(Groups); + } + } + set processedLabels; set userLabels = labels; @@ -715,84 +740,355 @@ int TreeGroupCommand::makeSimsShared() { /***********************************************************/ int TreeGroupCommand::process(vector thisLookup) { try{ - EstOutput data; - vector subset; - numGroups = thisLookup.size(); - - //for each calculator - for(int i = 0 ; i < treeCalculators.size(); i++) { - //initialize simMatrix - simMatrix.clear(); - simMatrix.resize(numGroups); - for (int k = 0; k < simMatrix.size(); k++) { - for (int j = 0; j < simMatrix.size(); j++) { - simMatrix[k].push_back(0.0); - } - } + vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files + vector< vector > calcDists; calcDists.resize(treeCalculators.size()); + + for (int thisIter = 0; thisIter < iters; thisIter++) { + + vector thisItersLookup = thisLookup; + + if (subsample) { + SubSample sample; + vector tempLabels; //dont need since we arent printing the sampled sharedRabunds + + //make copy of lookup so we don't get access violations + vector newLookup; + for (int k = 0; k < thisItersLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisItersLookup[k]->getLabel()); + temp->setGroup(thisItersLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } + } + + tempLabels = sample.getSample(newLookup, subsampleSize); + thisItersLookup = newLookup; + } + + if(processors == 1){ + driver(thisItersLookup, 0, numGroups, calcDists); + }else{ + int process = 1; + vector processIDS; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); + process++; + }else if (pid == 0){ + + driver(thisItersLookup, lines[process].start, lines[process].end, calcDists); + + string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist"; + ofstream outtemp; + m->openOutputFile(tempdistFileName, outtemp); + + for (int i = 0; i < calcDists.size(); i++) { + outtemp << calcDists[i].size() << endl; + + for (int j = 0; j < calcDists[i].size(); j++) { + outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl; + } + } + outtemp.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //parent do your part + driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); + + //force parent to wait until all the processes are done + for (int i = 0; i < processIDS.size(); i++) { + int temp = processIDS[i]; + wait(&temp); + } + + for (int i = 0; i < processIDS.size(); i++) { + string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) + ".dist"; + ifstream intemp; + m->openInputFile(tempdistFileName, intemp); + + for (int k = 0; k < calcDists.size(); k++) { + int size = 0; + intemp >> size; m->gobble(intemp); + + for (int j = 0; j < size; j++) { + int seq1 = 0; + int seq2 = 0; + float dist = 1.0; + + intemp >> seq1 >> seq2 >> dist; m->gobble(intemp); + + seqDist tempDist(seq1, seq2, dist); + calcDists[k].push_back(tempDist); + } + } + intemp.close(); + m->mothurRemove(tempdistFileName); + } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the treeSharedData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to pass results vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i newLookup; + for (int k = 0; k < thisItersLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisItersLookup[k]->getLabel()); + temp->setGroup(thisItersLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } + } + + // Allocate memory for thread data. + treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup); + pDataArray.push_back(tempSum); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //parent do your part + driver(thisItersLookup, lines[0].start, lines[0].end, calcDists); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; } + + for (int k = 0; k < calcDists.size(); k++) { + int size = pDataArray[i]->calcDists[k].size(); + for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); } + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + +#endif + } + + calcDistsTotals.push_back(calcDists); + + if (subsample) { + + //clean up memory + for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } + thisItersLookup.clear(); + for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } + } + } - //initialize index - index.clear(); - for (int g = 0; g < numGroups; g++) { index[g] = g; } - - //create a new filename - outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre"; - outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); - - for (int k = 0; k < thisLookup.size(); k++) { - for (int l = k; l < thisLookup.size(); l++) { - if (k != l) { //we dont need to similiarity of a groups to itself - //get estimated similarity between 2 groups - - subset.clear(); //clear out old pair of sharedrabunds - //add new pair of sharedrabunds - subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); - - //if this calc needs all groups to calculate the pair load all groups - if (treeCalculators[i]->getNeedsAll()) { - //load subset with rest of lookup for those calcs that need everyone to calc for a pair - for (int w = 0; w < thisLookup.size(); w++) { - if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); } - } - } - - data = treeCalculators[i]->getValues(subset); //saves the calculator outputs - //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl; - if (m->control_pressed) { return 1; } - - //save values in similarity matrix - simMatrix[k][l] = -(data[0]-1.0); - simMatrix[l][k] = -(data[0]-1.0); - } - } - } + if (iters != 1) { + //we need to find the average distance and standard deviation for each groups distance + + vector< vector > calcAverages; calcAverages.resize(treeCalculators.size()); + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + calcAverages[i].resize(calcDistsTotals[0][i].size()); + + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].seq1 = calcDists[i][j].seq1; + calcAverages[i][j].seq2 = calcDists[i][j].seq2; + calcAverages[i][j].dist = 0.0; + } + } + + for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator + for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist; + } + } + } + + for (int i = 0; i < calcAverages.size(); i++) { //finds average. + for (int j = 0; j < calcAverages[i].size(); j++) { + calcAverages[i][j].dist /= (float) iters; + } + } + + //create average tree for each calc + for (int i = 0; i < calcDists.size(); i++) { + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcAverages[i].size(); j++) { + int row = calcAverages[i][j].seq1; + int column = calcAverages[i][j].seq2; + float dist = calcAverages[i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + //create a new filename + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave.tre"; + outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); + + //creates tree from similarity matrix and write out file + Tree* newTree = createTree(matrix); + if (newTree != NULL) { writeTree(outputFile, newTree); } + } + + //create all trees for each calc and find their consensus tree + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } + + //create a new filename + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all.tre"; + outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); + + ofstream outAll; + m->openOutputFile(outputFile, outAll); + + vector trees; + for (int myIter = 0; myIter < iters; myIter++) { + + if(m->control_pressed) { break; } + + //initialize matrix + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) { + int row = calcDistsTotals[myIter][i][j].seq1; + int column = calcDistsTotals[myIter][i][j].seq2; + double dist = calcDistsTotals[myIter][i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + //creates tree from similarity matrix and write out file + Tree* newTree = createTree(matrix); + if (newTree != NULL) { + newTree->print(outAll); + trees.push_back(newTree); + } + } + outAll.close(); + if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } } + + Consensus consensus; + Tree* conTree = consensus.getTree(trees, tmap); + + //create a new filename + string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre"; + outputNames.push_back(conFile); outputTypes["tree"].push_back(outputFile); + ofstream outTree; + m->openOutputFile(conFile, outTree); + + if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; } + } + + }else { + + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } + + //initialize matrix + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcDists[i].size(); j++) { + int row = calcDists[i][j].seq1; + int column = calcDists[i][j].seq2; + double dist = calcDists[i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + //create a new filename + string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre"; + outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); + + //creates tree from similarity matrix and write out file + Tree* newTree = createTree(matrix); + if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; } + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "TreeGroupCommand", "process"); + exit(1); + } +} +/**************************************************************************************************/ +int TreeGroupCommand::driver(vector thisLookup, int start, int end, vector< vector >& calcDists) { + try { + vector subset; + for (int k = start; k < end; k++) { // pass cdd each set of groups to compare + + for (int l = 0; l < k; l++) { + + if (k != l) { //we dont need to similiarity of a groups to itself + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); - //createdistance file from simMatrix - /*string o = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist"; - ofstream outDist; - m->openOutputFile(o, outDist); - outDist << simMatrix.size() << endl; - for (int k = 0; k < simMatrix.size(); k++) { - outDist << thisLookup[k]->getGroup() << '\t'; - for (int l = 0; l < k; l++) { - outDist << (1.0-simMatrix[k][l]) << '\t'; + for(int i=0;igetNeedsAll()) { + //load subset with rest of lookup for those calcs that need everyone to calc for a pair + for (int w = 0; w < thisLookup.size(); w++) { + if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); } + } } - outDist << endl; + + vector tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs + + if (m->control_pressed) { return 1; } + + seqDist temp(l, k, -(tempdata[0]-1.0)); + calcDists[i].push_back(temp); } - outDist.close();*/ - - - if (m->control_pressed) { return 1; } - //creates tree from similarity matrix and write out file - createTree(); - - if (m->control_pressed) { return 1; } } - - return 0; - + } + } + + return 0; } catch(exception& e) { - m->errorOut(e, "TreeGroupCommand", "process"); + m->errorOut(e, "TreeGroupCommand", "driver"); exit(1); } } diff --git a/treegroupscommand.h b/treegroupscommand.h index d2d6c72..d3c1b3e 100644 --- a/treegroupscommand.h +++ b/treegroupscommand.h @@ -20,6 +20,48 @@ #include "readcolumn.h" #include "readphylip.h" #include "sparsematrix.hpp" +#include "sharedsobscollectsummary.h" +#include "sharedchao1.h" +#include "sharedace.h" +#include "sharednseqs.h" +#include "sharedjabund.h" +#include "sharedsorabund.h" +#include "sharedjclass.h" +#include "sharedsorclass.h" +#include "sharedjest.h" +#include "sharedsorest.h" +#include "sharedthetayc.h" +#include "sharedthetan.h" +#include "sharedkstest.h" +#include "whittaker.h" +#include "sharedochiai.h" +#include "sharedanderbergs.h" +#include "sharedkulczynski.h" +#include "sharedkulczynskicody.h" +#include "sharedlennon.h" +#include "sharedmorisitahorn.h" +#include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "whittaker.h" +#include "odum.h" +#include "canberra.h" +#include "structeuclidean.h" +#include "structchord.h" +#include "hellinger.h" +#include "manhattan.h" +#include "structpearson.h" +#include "soergel.h" +#include "spearman.h" +#include "structkulczynski.h" +#include "structchi2.h" +#include "speciesprofile.h" +#include "hamming.h" +#include "gower.h" +#include "memchi2.h" +#include "memchord.h" +#include "memeuclidean.h" +#include "mempearson.h" + /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. @@ -48,10 +90,19 @@ public: void help() { m->mothurOut(getHelpString()); } private: - int createTree(); - void printSims(ostream&); + + struct linePair { + int start; + int end; + }; + vector lines; + + Tree* createTree(vector< vector >&); + void printSims(ostream&, vector< vector >&); int makeSimsShared(); - int makeSimsDist(); + vector< vector > makeSimsDist(); + int writeTree(string, Tree*); + int driver(vector, int, int, vector< vector >&); ReadMatrix* readMatrix; SparseMatrix* matrix; @@ -59,18 +110,16 @@ private: ListVector* list; TreeMap* tmap; Tree* t; + InputData* input; vector treeCalculators; - vector< vector > simMatrix; - map index; //maps row in simMatrix to vector index in the tree - InputData* input; vector lookup; string lastLabel; - string format, outputFile, groupNames, filename, sharedfile, inputfile; - int numGroups; + string format, groupNames, filename, sharedfile, inputfile; + int numGroups, subsampleSize, iters, processors; ofstream out; float precision, cutoff; - bool abort, allLines; + bool abort, allLines, subsample; set labels; //holds labels to be used string phylipfile, columnfile, namefile, calc, groups, label, outputDir; vector Estimators, Groups, outputNames; //holds estimators to be used @@ -81,7 +130,169 @@ private: }; + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct treeSharedData { + vector thisLookup; + vector< vector > calcDists; + vector Estimators; + unsigned long long start; + unsigned long long end; + MothurOut* m; + + treeSharedData(){} + treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector est, vector lu) { + m = mout; + start = st; + end = en; + Estimators = est; + thisLookup = lu; + } +}; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ + treeSharedData* pDataArray; + pDataArray = (treeSharedData*)lpParam; + try { + + vector treeCalculators; + ValidCalculators validCalculator; + for (int i=0; iEstimators.size(); i++) { + if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { + if (pDataArray->Estimators[i] == "sharedsobs") { + treeCalculators.push_back(new SharedSobsCS()); + }else if (pDataArray->Estimators[i] == "sharedchao") { + treeCalculators.push_back(new SharedChao1()); + }else if (pDataArray->Estimators[i] == "sharedace") { + treeCalculators.push_back(new SharedAce()); + }else if (pDataArray->Estimators[i] == "jabund") { + treeCalculators.push_back(new JAbund()); + }else if (pDataArray->Estimators[i] == "sorabund") { + treeCalculators.push_back(new SorAbund()); + }else if (pDataArray->Estimators[i] == "jclass") { + treeCalculators.push_back(new Jclass()); + }else if (pDataArray->Estimators[i] == "sorclass") { + treeCalculators.push_back(new SorClass()); + }else if (pDataArray->Estimators[i] == "jest") { + treeCalculators.push_back(new Jest()); + }else if (pDataArray->Estimators[i] == "sorest") { + treeCalculators.push_back(new SorEst()); + }else if (pDataArray->Estimators[i] == "thetayc") { + treeCalculators.push_back(new ThetaYC()); + }else if (pDataArray->Estimators[i] == "thetan") { + treeCalculators.push_back(new ThetaN()); + }else if (pDataArray->Estimators[i] == "kstest") { + treeCalculators.push_back(new KSTest()); + }else if (pDataArray->Estimators[i] == "sharednseqs") { + treeCalculators.push_back(new SharedNSeqs()); + }else if (pDataArray->Estimators[i] == "ochiai") { + treeCalculators.push_back(new Ochiai()); + }else if (pDataArray->Estimators[i] == "anderberg") { + treeCalculators.push_back(new Anderberg()); + }else if (pDataArray->Estimators[i] == "kulczynski") { + treeCalculators.push_back(new Kulczynski()); + }else if (pDataArray->Estimators[i] == "kulczynskicody") { + treeCalculators.push_back(new KulczynskiCody()); + }else if (pDataArray->Estimators[i] == "lennon") { + treeCalculators.push_back(new Lennon()); + }else if (pDataArray->Estimators[i] == "morisitahorn") { + treeCalculators.push_back(new MorHorn()); + }else if (pDataArray->Estimators[i] == "braycurtis") { + treeCalculators.push_back(new BrayCurtis()); + }else if (pDataArray->Estimators[i] == "whittaker") { + treeCalculators.push_back(new Whittaker()); + }else if (pDataArray->Estimators[i] == "odum") { + treeCalculators.push_back(new Odum()); + }else if (pDataArray->Estimators[i] == "canberra") { + treeCalculators.push_back(new Canberra()); + }else if (pDataArray->Estimators[i] == "structeuclidean") { + treeCalculators.push_back(new StructEuclidean()); + }else if (pDataArray->Estimators[i] == "structchord") { + treeCalculators.push_back(new StructChord()); + }else if (pDataArray->Estimators[i] == "hellinger") { + treeCalculators.push_back(new Hellinger()); + }else if (pDataArray->Estimators[i] == "manhattan") { + treeCalculators.push_back(new Manhattan()); + }else if (pDataArray->Estimators[i] == "structpearson") { + treeCalculators.push_back(new StructPearson()); + }else if (pDataArray->Estimators[i] == "soergel") { + treeCalculators.push_back(new Soergel()); + }else if (pDataArray->Estimators[i] == "spearman") { + treeCalculators.push_back(new Spearman()); + }else if (pDataArray->Estimators[i] == "structkulczynski") { + treeCalculators.push_back(new StructKulczynski()); + }else if (pDataArray->Estimators[i] == "speciesprofile") { + treeCalculators.push_back(new SpeciesProfile()); + }else if (pDataArray->Estimators[i] == "hamming") { + treeCalculators.push_back(new Hamming()); + }else if (pDataArray->Estimators[i] == "structchi2") { + treeCalculators.push_back(new StructChi2()); + }else if (pDataArray->Estimators[i] == "gower") { + treeCalculators.push_back(new Gower()); + }else if (pDataArray->Estimators[i] == "memchi2") { + treeCalculators.push_back(new MemChi2()); + }else if (pDataArray->Estimators[i] == "memchord") { + treeCalculators.push_back(new MemChord()); + }else if (pDataArray->Estimators[i] == "memeuclidean") { + treeCalculators.push_back(new MemEuclidean()); + }else if (pDataArray->Estimators[i] == "mempearson") { + treeCalculators.push_back(new MemPearson()); + } + } + } + + pDataArray->calcDists.resize(treeCalculators.size()); + + vector subset; + for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare + + for (int l = 0; l < k; l++) { + + if (k != l) { //we dont need to similiarity of a groups to itself + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); + + for(int i=0;igetNeedsAll()) { + //load subset with rest of lookup for those calcs that need everyone to calc for a pair + for (int w = 0; w < pDataArray->thisLookup.size(); w++) { + if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); } + } + } + + vector tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs + + if (pDataArray->m->control_pressed) { return 1; } + + seqDist temp(l, k, -(tempdata[0]-1.0)); + pDataArray->calcDists[i].push_back(temp); + } + } + } + } + + for(int i=0;im->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction"); + exit(1); + } +} +#endif + + #endif