A7E9B88C12D37EC400DA6239 /* blastdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66412D37EC400DA6239 /* blastdb.cpp */; };
A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66612D37EC400DA6239 /* boneh.cpp */; };
A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66812D37EC400DA6239 /* bootstrap.cpp */; };
- A7E9B88F12D37EC400DA6239 /* bootstrapsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */; };
A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66C12D37EC400DA6239 /* bstick.cpp */; };
A7E9B89112D37EC400DA6239 /* calculator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B66E12D37EC400DA6239 /* calculator.cpp */; };
A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B67012D37EC400DA6239 /* canberra.cpp */; };
A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6AF12D37EC400DA6239 /* commandfactory.cpp */; };
A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B112D37EC400DA6239 /* commandoptionparser.cpp */; };
A7E9B8B212D37EC400DA6239 /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */; };
- A7E9B8B312D37EC400DA6239 /* consensuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */; };
+ A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B512D37EC400DA6239 /* consensus.cpp */; };
A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */; };
A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */; };
A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6BB12D37EC400DA6239 /* coverage.cpp */; };
A7E9B66712D37EC400DA6239 /* boneh.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = boneh.h; sourceTree = "<group>"; };
A7E9B66812D37EC400DA6239 /* bootstrap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrap.cpp; sourceTree = "<group>"; };
A7E9B66912D37EC400DA6239 /* bootstrap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrap.h; sourceTree = "<group>"; };
- A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bootstrapsharedcommand.cpp; sourceTree = "<group>"; };
- A7E9B66B12D37EC400DA6239 /* bootstrapsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bootstrapsharedcommand.h; sourceTree = "<group>"; };
A7E9B66C12D37EC400DA6239 /* bstick.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bstick.cpp; sourceTree = "<group>"; };
A7E9B66D12D37EC400DA6239 /* bstick.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bstick.h; sourceTree = "<group>"; };
A7E9B66E12D37EC400DA6239 /* calculator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calculator.cpp; sourceTree = "<group>"; };
A7E9B6B212D37EC400DA6239 /* commandoptionparser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = commandoptionparser.hpp; sourceTree = SOURCE_ROOT; };
A7E9B6B312D37EC400DA6239 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = "<group>"; };
A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = completelinkage.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensuscommand.cpp; sourceTree = "<group>"; };
- A7E9B6B612D37EC400DA6239 /* consensuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensuscommand.h; sourceTree = "<group>"; };
+ A7E9B6B512D37EC400DA6239 /* consensus.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensus.cpp; sourceTree = "<group>"; };
+ A7E9B6B612D37EC400DA6239 /* consensus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensus.h; sourceTree = "<group>"; };
A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = consensusseqscommand.cpp; sourceTree = "<group>"; };
A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = consensusseqscommand.h; sourceTree = "<group>"; };
A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = corraxescommand.cpp; sourceTree = "<group>"; };
A7DAAFA3133A254E003956EB /* commandparameter.h */,
A7E9B6B412D37EC400DA6239 /* completelinkage.cpp */,
A7E9BA4212D3960D00DA6239 /* containers */,
+ A7E9B6B612D37EC400DA6239 /* consensus.h */,
+ A7E9B6B512D37EC400DA6239 /* consensus.cpp */,
A7AACFBA132FE008003D6C4D /* currentfile.h */,
A7E9B6C912D37EC400DA6239 /* display.h */,
A7E9B6D112D37EC400DA6239 /* dlibshuff.cpp */,
A71CB15E130B04A2001E7287 /* anosimcommand.cpp */,
A7E9B66112D37EC300DA6239 /* binsequencecommand.h */,
A7E9B66012D37EC300DA6239 /* binsequencecommand.cpp */,
- A7E9B66B12D37EC400DA6239 /* bootstrapsharedcommand.h */,
- A7E9B66A12D37EC400DA6239 /* bootstrapsharedcommand.cpp */,
A7E9B67312D37EC400DA6239 /* catchallcommand.h */,
A7E9B67212D37EC400DA6239 /* catchallcommand.cpp */,
A7E9B67B12D37EC400DA6239 /* chimerabellerophoncommand.h */,
A7E9B6A812D37EC400DA6239 /* collectcommand.cpp */,
A7E9B6AD12D37EC400DA6239 /* collectsharedcommand.h */,
A7E9B6AC12D37EC400DA6239 /* collectsharedcommand.cpp */,
- A7E9B6B612D37EC400DA6239 /* consensuscommand.h */,
- A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */,
A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */,
A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */,
A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */,
A7E9B88C12D37EC400DA6239 /* blastdb.cpp in Sources */,
A7E9B88D12D37EC400DA6239 /* boneh.cpp in Sources */,
A7E9B88E12D37EC400DA6239 /* bootstrap.cpp in Sources */,
- A7E9B88F12D37EC400DA6239 /* bootstrapsharedcommand.cpp in Sources */,
A7E9B89012D37EC400DA6239 /* bstick.cpp in Sources */,
A7E9B89112D37EC400DA6239 /* calculator.cpp in Sources */,
A7E9B89212D37EC400DA6239 /* canberra.cpp in Sources */,
A7E9B8B012D37EC400DA6239 /* commandfactory.cpp in Sources */,
A7E9B8B112D37EC400DA6239 /* commandoptionparser.cpp in Sources */,
A7E9B8B212D37EC400DA6239 /* completelinkage.cpp in Sources */,
- A7E9B8B312D37EC400DA6239 /* consensuscommand.cpp in Sources */,
+ A7E9B8B312D37EC400DA6239 /* consensus.cpp in Sources */,
A7E9B8B412D37EC400DA6239 /* consensusseqscommand.cpp in Sources */,
A7E9B8B512D37EC400DA6239 /* corraxescommand.cpp in Sources */,
A7E9B8B612D37EC400DA6239 /* coverage.cpp in Sources */,
+++ /dev/null
-/*
- * bootstrapsharedcommand.cpp
- * Mothur
- *
- * Created by Sarah Westcott on 4/16/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "bootstrapsharedcommand.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-
-
-//**********************************************************************************************************************
-vector<string> BootSharedCommand::setParameters(){
- try {
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
- CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
- CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
- CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
- CommandParameter pcalc("calc", "Multiple", "jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-morisitahorn-braycurtis", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
- vector<string> myArray;
- for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "setParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string BootSharedCommand::getHelpString(){
- try {
- string helpString = "";
- helpString += "The bootstrap.shared command parameters are shared, groups, calc, iters and label. shared is required.\n";
- helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n";
- helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like trees created for, and is also separated by dashes.\n";
- helpString += "The bootstrap.shared command should be in the following format: bootstrap.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels, iters=yourIters).\n";
- helpString += "Example bootstrap.shared(groups=A-B-C, calc=jabund-sorabund, iters=100).\n";
- helpString += "The default value for groups is all the groups in your groupfile.\n";
- helpString += "The default value for calc is jclass-thetayc. The default for iters is 1000.\n";
- return helpString;
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "getHelpString");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-BootSharedCommand::BootSharedCommand(){
- try {
- abort = true; calledHelp = true;
- setParameters();
- vector<string> tempOutNames;
- outputTypes["tree"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "BootSharedCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-BootSharedCommand::BootSharedCommand(string option) {
- try {
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- vector<string> myArray = setParameters();
-
- OptionParser parser(option);
- map<string,string> parameters = parser.getParameters();
- map<string,string>::iterator it;
-
- ValidParameters validParameter;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["tree"] = tempOutNames;
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("shared");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["shared"] = inputDir + it->second; }
- }
- }
-
- sharedfile = validParameter.validFile(parameters, "shared", true);
- if (sharedfile == "not found") {
- sharedfile = m->getSharedFile();
- if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current shared file and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
- }
- else if (sharedfile == "not open") { sharedfile = ""; abort = true; }
- else { m->setSharedFile(sharedfile); }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
- outputDir = "";
- outputDir += m->hasPath(sharedfile); //if user entered a file with a path then preserve it
- }
-
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- label = validParameter.validFile(parameters, "label", false);
- if (label == "not found") { label = ""; }
- else {
- if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
- else { allLines = 1; }
- }
-
- groups = validParameter.validFile(parameters, "groups", false);
- if (groups == "not found") { groups = ""; }
- else {
- m->splitAtDash(groups, Groups);
- m->setGroups(Groups);
- }
-
- calc = validParameter.validFile(parameters, "calc", false);
- if (calc == "not found") { calc = "jclass-thetayc"; }
- else {
- if (calc == "default") { calc = "jclass-thetayc"; }
- }
- m->splitAtDash(calc, Estimators);
- if (m->inUsersGroups("citation", Estimators)) {
- ValidCalculators validCalc; validCalc.printCitations(Estimators);
- //remove citation from list of calcs
- for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
- }
-
- string temp;
- temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
- m->mothurConvert(temp, iters);
-
- if (abort == false) {
-
- //used in tree constructor
- m->runParse = false;
-
- validCalculator = new ValidCalculators();
-
-
- //NOTE: if you add a calc to this if statement you must add it to the setParameters function
- //or it will not be visible in the gui
- int i;
- for (i=0; i<Estimators.size(); i++) {
- if (validCalculator->isValidCalculator("boot", Estimators[i]) == true) {
- if (Estimators[i] == "jabund") {
- treeCalculators.push_back(new JAbund());
- }else if (Estimators[i] == "sorabund") {
- treeCalculators.push_back(new SorAbund());
- }else if (Estimators[i] == "jclass") {
- treeCalculators.push_back(new Jclass());
- }else if (Estimators[i] == "sorclass") {
- treeCalculators.push_back(new SorClass());
- }else if (Estimators[i] == "jest") {
- treeCalculators.push_back(new Jest());
- }else if (Estimators[i] == "sorest") {
- treeCalculators.push_back(new SorEst());
- }else if (Estimators[i] == "thetayc") {
- treeCalculators.push_back(new ThetaYC());
- }else if (Estimators[i] == "thetan") {
- treeCalculators.push_back(new ThetaN());
- }else if (Estimators[i] == "morisitahorn") {
- treeCalculators.push_back(new MorHorn());
- }else if (Estimators[i] == "braycurtis") {
- treeCalculators.push_back(new BrayCurtis());
- }
- }
- }
-
- delete validCalculator;
-
- ofstream* tempo;
- for (int i=0; i < treeCalculators.size(); i++) {
- tempo = new ofstream;
- out.push_back(tempo);
- }
-
- //make a vector of tree* for each calculator
- trees.resize(treeCalculators.size());
- }
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "BootSharedCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-BootSharedCommand::~BootSharedCommand(){}
-//**********************************************************************************************************************
-
-int BootSharedCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- m->mothurOut("bootstrap.shared command is no longer available."); m->mothurOutEndLine();
- /*
- //read first line
- input = new InputData(sharedfile, "sharedfile");
- order = input->getSharedOrderVector();
- string lastLabel = order->getLabel();
-
- //if the users entered no valid calculators don't execute command
- if (treeCalculators.size() == 0) { return 0; }
-
- //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
- set<string> processedLabels;
- set<string> userLabels = labels;
-
- //set users groups
- util = new SharedUtil();
- util->setGroups(m->Groups, m->namesOfGroups, "treegroup");
-
- numGroups = m->Groups.size();
-
- //clear globaldatas old tree names if any
- globaldata->Treenames.clear();
-
- //fills globaldatas tree names
- globaldata->Treenames = m->Groups;
-
- //create treemap class from groupmap for tree class to use
- tmap = new TreeMap();
- tmap->makeSim(globaldata->gGroupmap);
- globaldata->gTreemap = tmap;
-
- while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; }
-
- if(allLines == 1 || labels.count(order->getLabel()) == 1){
-
- m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- int error = process(order);
- if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; }
-
- processedLabels.insert(order->getLabel());
- userLabels.erase(order->getLabel());
- }
-
- //you have a label the user want that is smaller than this label and the last label has not already been processed
- if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
- string saveLabel = order->getLabel();
-
- delete order;
- order = input->getSharedOrderVector(lastLabel);
- m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- int error = process(order);
- if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input;delete util; return 0; }
-
- processedLabels.insert(order->getLabel());
- userLabels.erase(order->getLabel());
-
- //restore real lastlabel to save below
- order->setLabel(saveLabel);
- }
-
-
- lastLabel = order->getLabel();
-
- //get next line to process
- delete order;
- order = input->getSharedOrderVector();
- }
-
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; }
-
- //output error messages about any remaining user labels
- set<string>::iterator it;
- bool needToRun = false;
- for (it = userLabels.begin(); it != userLabels.end(); it++) {
- m->mothurOut("Your file does not include the label " + *it);
- if (processedLabels.count(lastLabel) != 1) {
- m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
- needToRun = true;
- }else {
- m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
- }
- }
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; }
-
- //run last line if you need to
- if (needToRun == true) {
- if (order != NULL) { delete order; }
- order = input->getSharedOrderVector(lastLabel);
- m->mothurOut(order->getLabel()); m->mothurOutEndLine();
- int error = process(order);
- if (error == 1) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear(); delete input; delete util; return 0; }
-
- delete order;
-
- }
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } globaldata->Groups.clear();delete input; delete util; return 0; }
-
- //reset groups parameter
- globaldata->Groups.clear();
-
- //set first tree file as new current treefile
- string currentTree = "";
- itTypes = outputTypes.find("tree");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
- }
-
- delete input;
- delete util;
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-*/
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-int BootSharedCommand::createTree(ostream* out, Tree* t){
- try {
- /*
- //do merges and create tree structure by setting parents and children
- //there are numGroups - 1 merges to do
- for (int i = 0; i < (numGroups - 1); i++) {
-
- if (m->control_pressed) { return 1; }
-
- float largest = -1000.0;
- int row, column;
- //find largest value in sims matrix by searching lower triangle
- for (int j = 1; j < simMatrix.size(); j++) {
- for (int k = 0; k < j; k++) {
- if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
- }
- }
-
- //set non-leaf node info and update leaves to know their parents
- //non-leaf
- t->tree[numGroups + i].setChildren(index[row], index[column]);
-
- //parents
- t->tree[index[row]].setParent(numGroups + i);
- t->tree[index[column]].setParent(numGroups + i);
-
- //blength = distance / 2;
- float blength = ((1.0 - largest) / 2);
-
- //branchlengths
- t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
- t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
-
- //set your length to leaves to your childs length plus branchlength
- t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
-
-
- //update index
- index[row] = numGroups+i;
- index[column] = numGroups+i;
-
- //zero out highest value that caused the merge.
- simMatrix[row][column] = -1000.0;
- simMatrix[column][row] = -1000.0;
-
- //merge values in simsMatrix
- for (int n = 0; n < simMatrix.size(); n++) {
- //row becomes merge of 2 groups
- simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
- simMatrix[n][row] = simMatrix[row][n];
- //delete column
- simMatrix[column][n] = -1000.0;
- simMatrix[n][column] = -1000.0;
- }
- }
-
- //adjust tree to make sure root to tip length is .5
- int root = t->findRoot();
- t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
-
- //assemble tree
- t->assembleTree();
-
- if (m->control_pressed) { return 1; }
-
- //print newick file
- t->print(*out);*/
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "createTree");
- exit(1);
- }
-}
-/***********************************************************/
-void BootSharedCommand::printSims() {
- try {
- m->mothurOut("simsMatrix"); m->mothurOutEndLine();
- for (int k = 0; k < simMatrix.size(); k++) {
- for (int n = 0; n < simMatrix.size(); n++) {
- m->mothurOut(toString(simMatrix[k][n])); m->mothurOut("\t");
- }
- m->mothurOutEndLine();
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "printSims");
- exit(1);
- }
-}
-/***********************************************************/
-int BootSharedCommand::process(SharedOrderVector* order) {
- try{
- /* EstOutput data;
- vector<SharedRAbundVector*> subset;
-
- //open an ostream for each calc to print to
- for (int z = 0; z < treeCalculators.size(); z++) {
- //create a new filename
- outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + treeCalculators[z]->getName() + ".boot" + order->getLabel() + ".tre";
- m->openOutputFile(outputFile, *(out[z]));
- outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
- }
-
- m->mothurOut("Generating bootstrap trees..."); cout.flush();
-
- //create a file for each calculator with the 1000 trees in it.
- for (int p = 0; p < iters; p++) {
-
- if (m->control_pressed) { return 1; }
-
- util->getSharedVectorswithReplacement(m->Groups, lookup, order); //fills group vectors from order vector.
-
-
- //for each calculator
- for(int i = 0 ; i < treeCalculators.size(); i++) {
-
- if (m->control_pressed) { return 1; }
-
- //initialize simMatrix
- simMatrix.clear();
- simMatrix.resize(numGroups);
- for (int o = 0; o < simMatrix.size(); o++) {
- for (int j = 0; j < simMatrix.size(); j++) {
- simMatrix[o].push_back(0.0);
- }
- }
-
- //initialize index
- index.clear();
- for (int g = 0; g < numGroups; g++) { index[g] = g; }
-
- for (int k = 0; k < lookup.size(); k++) { // pass cdd each set of groups to commpare
- for (int l = k; l < lookup.size(); l++) {
- if (k != l) { //we dont need to similiarity of a groups to itself
- subset.clear(); //clear out old pair of sharedrabunds
- //add new pair of sharedrabunds
- subset.push_back(lookup[k]); subset.push_back(lookup[l]);
-
- //get estimated similarity between 2 groups
- data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
- //save values in similarity matrix
- simMatrix[k][l] = data[0];
- simMatrix[l][k] = data[0];
- }
- }
- }
-
- tempTree = new Tree();
-
- if (m->control_pressed) { delete tempTree; return 1; }
-
- //creates tree from similarity matrix and write out file
- createTree(out[i], tempTree);
-
- if (m->control_pressed) { delete tempTree; return 1; }
-
- //save trees for consensus command.
- trees[i].push_back(tempTree);
- }
- }
-
- m->mothurOut("\tDone."); m->mothurOutEndLine();
-
-
- //create consensus trees for each bootstrapped tree set
- for (int k = 0; k < trees.size(); k++) {
-
- m->mothurOut("Generating consensus tree for " + treeCalculators[k]->getName()); m->mothurOutEndLine();
-
- if (m->control_pressed) { return 1; }
-
- //set global data to calc trees
- globaldata->gTree = trees[k];
-
- string filename = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + treeCalculators[k]->getName() + ".boot" + order->getLabel();
- consensus = new ConcensusCommand(filename);
- consensus->execute();
- delete consensus;
-
- outputNames.push_back(filename + ".cons.pairs");
- outputNames.push_back(filename + ".cons.tre");
-
- }
-
-
-
- //close ostream for each calc
- for (int z = 0; z < treeCalculators.size(); z++) { out[z]->close(); }
- */
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "BootSharedCommand", "process");
- exit(1);
- }
-}
-/***********************************************************/
-
-
-
+++ /dev/null
-#ifndef BOOTSTRAPSHAREDCOMMAND_H
-#define BOOTSTRAPSHAREDCOMMAND_H
-
-/*
- * bootstrapsharedcommand.h
- * Mothur
- *
- * Created by Sarah Westcott on 4/16/09.
- * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "sharedordervector.h"
-#include "inputdata.h"
-#include "groupmap.h"
-#include "validcalculator.h"
-#include "tree.h"
-#include "treemap.h"
-#include "sharedutilities.h"
-#include "consensuscommand.h"
-
-class BootSharedCommand : public Command {
-
-public:
- BootSharedCommand(string);
- BootSharedCommand();
- ~BootSharedCommand();
-
- vector<string> setParameters();
- string getCommandName() { return "bootstrap.shared"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString();
- string getCitation() { return "no citation"; }
- string getDescription() { return "bootstrap.shared"; }
-
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
- int createTree(ostream*, Tree*);
- void printSims();
- int process(SharedOrderVector*);
-
- SharedUtil* util;
- TreeMap* tmap;
- Tree* t;
- Tree* tempTree;
- ConcensusCommand* consensus;
- vector< vector<Tree*> > trees; //a vector of trees for each calculator chosen
- vector<Calculator*> treeCalculators;
- vector<ofstream*> out;
- vector< vector<float> > simMatrix;
- map<int, int> index; //maps row in simMatrix to vector index in the tree
- InputData* input;
- ValidCalculators* validCalculator;
- SharedOrderVector* order;
- vector<SharedRAbundVector*> lookup;
-
- bool abort, allLines;
- set<string> labels; //holds labels to be used
- string outputFile, calc, groups, label, outputDir, sharedfile;
- int numGroups, iters;
- vector<string> Estimators, Groups, outputNames; //holds estimators to be used
-};
-
-
-#endif
-
-
#include "binsequencecommand.h"
#include "getoturepcommand.h"
#include "treegroupscommand.h"
-#include "bootstrapsharedcommand.h"
-//#include "consensuscommand.h"
#include "distancecommand.h"
#include "aligncommand.h"
#include "matrixoutputcommand.h"
commands["get.label"] = "get.label";
commands["get.sabund"] = "get.sabund";
commands["get.rabund"] = "get.rabund";
- commands["bootstrap.shared"] = "bootstrap.shared";
- //commands["consensus"] = "consensus";
commands["help"] = "help";
commands["reverse.seqs"] = "reverse.seqs";
commands["trim.seqs"] = "trim.seqs";
else if(commandName == "get.oturep") { command = new GetOTURepCommand(optionString); }
else if(commandName == "tree.shared") { command = new TreeGroupCommand(optionString); }
else if(commandName == "dist.shared") { command = new MatrixOutputCommand(optionString); }
- else if(commandName == "bootstrap.shared") { command = new BootSharedCommand(optionString); }
- else if(commandName == "consensus") { command = new ConcensusCommand(optionString); }
else if(commandName == "dist.seqs") { command = new DistanceCommand(optionString); }
else if(commandName == "align.seqs") { command = new AlignCommand(optionString); }
else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(optionString); }
else if(commandName == "get.oturep") { pipecommand = new GetOTURepCommand(optionString); }
else if(commandName == "tree.shared") { pipecommand = new TreeGroupCommand(optionString); }
else if(commandName == "dist.shared") { pipecommand = new MatrixOutputCommand(optionString); }
- else if(commandName == "bootstrap.shared") { pipecommand = new BootSharedCommand(optionString); }
- else if(commandName == "consensus") { pipecommand = new ConcensusCommand(optionString); }
else if(commandName == "dist.seqs") { pipecommand = new DistanceCommand(optionString); }
else if(commandName == "align.seqs") { pipecommand = new AlignCommand(optionString); }
else if(commandName == "summary.seqs") { pipecommand = new SeqSummaryCommand(optionString); }
else if(commandName == "get.oturep") { shellcommand = new GetOTURepCommand(); }
else if(commandName == "tree.shared") { shellcommand = new TreeGroupCommand(); }
else if(commandName == "dist.shared") { shellcommand = new MatrixOutputCommand(); }
- else if(commandName == "bootstrap.shared") { shellcommand = new BootSharedCommand(); }
- else if(commandName == "consensus") { shellcommand = new ConcensusCommand(); }
else if(commandName == "dist.seqs") { shellcommand = new DistanceCommand(); }
else if(commandName == "align.seqs") { shellcommand = new AlignCommand(); }
else if(commandName == "summary.seqs") { shellcommand = new SeqSummaryCommand(); }
--- /dev/null
+/*
+ * consensuscommand.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 4/29/09.
+ * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
+ *
+ */
+
+#include "consensus.h"
+
+//**********************************************************************************************************************
+Tree* Consensus::getTree(vector<Tree*>& t, TreeMap* tmap){
+ try {
+ numNodes = t[0]->getNumNodes();
+ numLeaves = t[0]->getNumLeaves();
+ numTrees = t.size();
+
+ //get the possible pairings
+ getSets(t);
+
+ if (m->control_pressed) { return 0; }
+
+ consensusTree = new Tree(tmap);
+
+ it2 = nodePairs.find(treeSet);
+
+ nodePairsInTree[treeSet] = it2->second;
+
+ //erase treeset because you are adding it
+ nodePairs.erase(treeSet);
+
+ //set count to numLeaves;
+ count = numLeaves;
+
+ buildConsensusTree(treeSet);
+
+ if (m->control_pressed) { delete consensusTree; return 0; }
+
+ consensusTree->assembleTree();
+
+ if (m->control_pressed) { delete consensusTree; return 0; }
+
+ return consensusTree;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int Consensus::printSetsInfo() {
+ try {
+ //open file for pairing not included in the tree
+ string notIncluded = "cons.pairs";
+ ofstream out2;
+ m->openOutputFile(notIncluded, out2);
+
+ //output species in order
+ out2 << "Species in Order: " << endl << endl;
+ for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
+
+ //output sets included
+ out2 << endl << "Sets included in the consensus tree:" << endl << endl;
+
+ if (m->control_pressed) { return 0; }
+
+ vector<string> temp;
+ for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
+
+ if (m->control_pressed) { return 0; }
+
+ //only output pairs not leaves
+ if (it2->first.size() > 1) {
+ temp.clear();
+ //initialize temp to all "."
+ temp.resize(treeSet.size(), ".");
+
+ //set the spot in temp that represents it2->first[i] to a "*"
+ for (int i = 0; i < it2->first.size(); i++) {
+ //find spot
+ int index = findSpot(it2->first[i]);
+ temp[index] = "*";
+ //temp[index] = it2->first[i] + " ";
+ }
+
+ //output temp
+ for (int j = 0; j < temp.size(); j++) {
+ out2 << temp[j];
+ }
+ out2 << '\t' << it2->second << endl;
+ }
+ }
+
+ //output sets not included
+ out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+
+ if (m->control_pressed) { return 0; }
+
+ temp.clear();
+ //initialize temp to all "."
+ temp.resize(treeSet.size(), ".");
+
+ //set the spot in temp that represents it2->first[i] to a "*"
+ for (int i = 0; i < it2->first.size(); i++) {
+ //find spot
+ int index = findSpot(it2->first[i]);
+ temp[index] = "*";
+ }
+
+ //output temp
+ for (int j = 0; j < temp.size(); j++) {
+ out2 << temp[j];
+ }
+ out2 << '\t' << it2->second << endl;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "printSetsInfo");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int Consensus::buildConsensusTree(vector<string> nodeSet) {
+ try {
+ vector<string> leftChildSet;
+ vector<string> rightChildSet;
+
+ if (m->control_pressed) { return 1; }
+
+ //if you are at a leaf
+ if (nodeSet.size() == 1) {
+ //return the vector index of the leaf you are at
+ return consensusTree->getIndex(nodeSet[0]);
+ //terminate recursion
+ }else if (count == numNodes) { return 0; }
+ else {
+ //finds best child pair
+ leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
+ int left = buildConsensusTree(leftChildSet);
+ int right = buildConsensusTree(rightChildSet);
+ consensusTree->tree[count].setChildren(left, right);
+ consensusTree->tree[count].setLabel((nodePairsInTree[nodeSet]/(float)numTrees));
+ consensusTree->tree[left].setParent(count);
+ consensusTree->tree[right].setParent(count);
+ count++;
+ return (count-1);
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "buildConcensusTree");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int Consensus::getSets(vector<Tree*>& t) {
+ try {
+ vector<string> temp;
+ treeSet.clear();
+
+ //for each tree add the possible pairs you find
+ for (int i = 0; i < t.size(); i++) {
+
+ //for each non-leaf node get descendant info.
+ for (int j = numLeaves; j < numNodes; j++) {
+
+ if (m->control_pressed) { return 1; }
+
+ temp.clear();
+ //go through pcounts and pull out descendants
+ for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
+ temp.push_back(it->first);
+ }
+
+ //sort temp
+ sort(temp.begin(), temp.end());
+
+ it2 = nodePairs.find(temp);
+ if (it2 != nodePairs.end()) {
+ nodePairs[temp]++;
+ }else{
+ nodePairs[temp] = 1;
+ }
+ }
+ }
+
+
+ //add each leaf to terminate recursion in consensus
+ //you want the leaves in there but with insignifigant sightings value so it is added last
+ //for each leaf node get descendant info.
+ for (int j = 0; j < numLeaves; j++) {
+
+ if (m->control_pressed) { return 1; }
+
+ //only need the first one since leaves have no descendants but themselves
+ it = t[0]->tree[j].pcount.begin();
+ temp.clear(); temp.push_back(it->first);
+
+ //fill treeSet
+ treeSet.push_back(it->first);
+
+ //add leaf to list but with sighting value less then all non leaf pairs
+ nodePairs[temp] = 0;
+ }
+
+ sort(treeSet.begin(), treeSet.end());
+
+
+ map< vector<string>, int> nodePairsCopy = nodePairs;
+
+
+ //set initial rating on pairs to sightings + subgroup sightings
+ while (nodePairsCopy.size() != 0) {
+ if (m->control_pressed) { return 1; }
+
+ vector<string> smallOne = getSmallest(nodePairsCopy);
+
+ int subgrouprate = getSubgroupRating(smallOne);
+
+ nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
+
+ nodePairsCopy.erase(smallOne);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "getSets");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> Consensus::getSmallest(map< vector<string>, int> nodes) {
+ try{
+ vector<string> smallest = nodes.begin()->first;
+ int smallsize = smallest.size();
+
+ for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
+ if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
+ }
+
+ return smallest;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "getSmallest");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+vector<string> Consensus::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
+ try {
+//cout << "new call " << endl << endl << endl;
+ vector<string> largest; largest.clear();
+ rest.clear();
+
+ //if you are just 2 groups
+ if (bigset.size() == 2) {
+ rest.push_back(bigset[0]);
+ largest.push_back(bigset[1]);
+ }else{
+ rest = bestSplit[bigset][0];
+ largest = bestSplit[bigset][1];
+ }
+
+
+ //save for printing out later and for branch lengths
+ nodePairsInTree[rest] = nodePairs[rest];
+
+ //delete whatever set you return because it is no longer available
+ nodePairs.erase(rest);
+
+ //save for printing out later and for branch lengths
+ nodePairsInTree[largest] = nodePairs[largest];
+
+ //delete whatever set you return because it is no longer available
+ nodePairs.erase(largest);
+
+ return largest;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "getNextAvailableSet");
+ exit(1);
+ }
+}
+
+/**********************************************************************************************************************/
+int Consensus::getSubgroupRating(vector<string> group) {
+ try {
+ map< vector<string>, int>::iterator ittemp;
+ map< vector< vector<string> > , int >::iterator it3;
+ int rate = 0;
+
+ // ***********************************************************************************//
+ //1. this function must be called passing it littlest sets to biggest
+ // since it the rating is made from your sighting plus you best splits rating
+ //2. it saves the top pair to use later
+ // ***********************************************************************************//
+
+
+ if (group.size() < 3) { return rate; }
+
+ map< vector<string>, int> possiblePairing; //this is all the subsets of group
+
+ //go through the sets
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+ //are you a subset of bigset, then save in possiblePairings
+ if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
+ }
+
+ map< vector< vector<string> > , int > rating;
+
+ while (possiblePairing.size() != 0) {
+
+ it2 = possiblePairing.begin();
+ vector<string> temprest = getRestSet(group, it2->first);
+
+ //is the rest a set available in possiblePairings
+ ittemp = possiblePairing.find(temprest);
+ if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
+ vector< vector<string> > temprate;
+ temprate.push_back(it2->first); temprate.push_back(temprest);
+
+ rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
+
+ //erase so you dont add 1,2 and 2,1.
+ possiblePairing.erase(temprest);
+ }
+
+ possiblePairing.erase(it2);
+ }
+
+
+ it3 = rating.begin();
+ rate = it3->second;
+ vector< vector<string> > topPair = it3->first;
+
+ //choose the split with the best rating
+ for (it3 = rating.begin(); it3 != rating.end(); it3++) {
+
+ if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
+ }
+
+ bestSplit[group] = topPair;
+
+ return rate;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "getSubgroupRating");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+vector<string> Consensus::getRestSet(vector<string> bigset, vector<string> subset) {
+ try {
+ vector<string> rest;
+
+ for (int i = 0; i < bigset.size(); i++) {
+ bool inSubset = false;
+ for (int j = 0; j < subset.size(); j++) {
+ if (bigset[i] == subset[j]) { inSubset = true; break; }
+ }
+
+ //its not in the subset so put it in the rest
+ if (inSubset == false) { rest.push_back(bigset[i]); }
+ }
+
+ return rest;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "getRestSet");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+bool Consensus::isSubset(vector<string> bigset, vector<string> subset) {
+ try {
+
+
+ if (subset.size() > bigset.size()) { return false; }
+
+ //check if each guy in suset is also in bigset
+ for (int i = 0; i < subset.size(); i++) {
+ bool match = false;
+ for (int j = 0; j < bigset.size(); j++) {
+ if (subset[i] == bigset[j]) { match = true; break; }
+ }
+
+ //you have a guy in subset that had no match in bigset
+ if (match == false) { return false; }
+ }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "isSubset");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int Consensus::findSpot(string node) {
+ try {
+ int spot;
+
+ //check if each guy in suset is also in bigset
+ for (int i = 0; i < treeSet.size(); i++) {
+ if (treeSet[i] == node) { spot = i; break; }
+ }
+
+ return spot;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Consensus", "findSpot");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
+
+
--- /dev/null
+#ifndef CONCENSUS_H
+#define CONCENSUS_H
+/*
+ * consensus.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 4/29/09.
+ * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
+ *
+ */
+
+
+#include "tree.h"
+#include "treemap.h"
+
+//NOTE: This class assumes all leaf nodes have 1 member.
+// Mothur does allow for names files with trees which would make a tree with multiple members at one leaf.
+// This class is currently only called internally by commands that have leaf node containing only 1 member.
+// But if in the future, this changes things will need to be reworked in getSets and buildConsensus.
+
+
+class Consensus {
+
+public:
+ Consensus() { m = MothurOut::getInstance(); }
+ ~Consensus() {}
+
+ Tree* getTree(vector<Tree*>&, TreeMap*);
+
+private:
+ MothurOut* m;
+ Tree* consensusTree;
+
+ vector<string> treeSet; //set containing all members of the tree to start recursion. filled in getSets().
+ map< vector<string>, int > nodePairs; //<map of possible combinations these combos are the pcounts or descendants info, to how many times they occured
+ // ie. combos FI and EGK would create nodePairs[vector containing F and I] = 1; nodePairs[vector containing E, G and K] = 1
+ // if you saw the combo FI again in another tree you would then update nodePairs[vector containing F and I] = 2;
+ // requires vectors to be sorted to find key.
+ map< vector<string>, vector< vector<string> > > bestSplit; //maps a group to its best split
+ map< vector<string>, int > nodePairsInitialRate;
+ map< vector<string>, int > nodePairsInTree;
+ map<string, int>::iterator it;
+ map< vector<string>, int>::iterator it2;
+ string outputFile, notIncluded, filename;
+ int numNodes, numLeaves, count, numTrees; //count is the next available spot in the tree vector
+ vector<string> outputNames;
+
+ int getSets(vector<Tree*>&);
+ int getSubgroupRating(vector<string>);
+ vector<string> getSmallest(map< vector<string>, int>);
+ vector<string> getNextAvailableSet(vector<string>, vector<string>&);
+ vector<string> getRestSet(vector<string>, vector<string>);
+ bool isSubset(vector<string>, vector<string>);
+ int findSpot(string);
+ int buildConsensusTree(vector<string>);
+ int printSetsInfo();
+
+};
+
+#endif
+
setParameters();
vector<string> tempOutNames;
outputTypes["phylip"] = tempOutNames;
- outputTypes["subsample"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "MatrixOutputCommand", "MatrixOutputCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["phylip"] = tempOutNames;
- outputTypes["subsample"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
/***********************************************************/
int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
try {
- EstOutput data;
- vector<SharedRAbundVector*> subset;
vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
-
vector< vector<seqDist> > calcDists; calcDists.resize(matrixCalculators.size());
for (int thisIter = 0; thisIter < iters; thisIter++) {
else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl;
+ if (m->binLabelsInFile.size() != 0) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+ else { out << i+1 << '\t' << k+1 << '\t' << coef << '\t' << sig << endl; }
}
}
else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << coef << '\t' << sig << endl;
+ if (m->binLabelsInFile.size() != 0) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+ else { out << i+1 << '\t' << k+1 << '\t' << coef << '\t' << sig << endl; }
}
}
pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break;
}else {
if (pDataArray->keepdots) {
- currSeq.filterToPos(start);
- currSeq.filterFromPos(end);
+ currSeq.filterToPos(pDataArray->start);
+ currSeq.filterFromPos(pDataArray->end);
}else {
- string seqString = currSeq.getAligned().substr(0, end);
- seqString = seqString.substr(start);
+ string seqString = currSeq.getAligned().substr(0, pDataArray->end);
+ seqString = seqString.substr(pDataArray->start);
currSeq.setAligned(seqString);
}
}
if (pDataArray->end != -1) {
if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
else {
- if (pDataArray->keepdots) { currSeq.filterFromPos(end); }
+ if (pDataArray->keepdots) { currSeq.filterFromPos(pDataArray->end); }
else {
- string seqString = currSeq.getAligned().substr(0, end);
+ string seqString = currSeq.getAligned().substr(0, pDataArray->end);
currSeq.setAligned(seqString);
}
}
}
if (pDataArray->start != -1) {
- if (pDataArray->keepdots) { currSeq.filterToPos(start); }
+ if (pDataArray->keepdots) { currSeq.filterToPos(pDataArray->start); }
else {
- string seqString = currSeq.getAligned().substr(start);
+ string seqString = currSeq.getAligned().substr(pDataArray->start);
currSeq.setAligned(seqString);
}
}
}else { label = m->saveNextLabel; }
//reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
- m->currentBinLabels = m-> binLabelsInFile;
+ m->currentBinLabels = m->binLabelsInFile;
//read in first row since you know there is at least 1 group.
f >> groupN >> num;
//read the rest of the groups info in
while ((nextLabel == holdLabel) && (f.eof() != true)) {
f >> groupN >> num;
+
+ if (num != 1000) { break; }
count++;
allGroups.push_back(groupN);
An individual which knows the OTU from which it came,
the group it is in and its abundance. */
-class GlobalData;
class SharedRAbundFloatVector : public DataVector {
*/
#include "treegroupscommand.h"
-#include "sharedsobscollectsummary.h"
-#include "sharedchao1.h"
-#include "sharedace.h"
-#include "sharednseqs.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedkstest.h"
-#include "whittaker.h"
-#include "sharedochiai.h"
-#include "sharedanderbergs.h"
-#include "sharedkulczynski.h"
-#include "sharedkulczynskicody.h"
-#include "sharedlennon.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-#include "sharedjackknife.h"
-#include "whittaker.h"
-#include "odum.h"
-#include "canberra.h"
-#include "structeuclidean.h"
-#include "structchord.h"
-#include "hellinger.h"
-#include "manhattan.h"
-#include "structpearson.h"
-#include "soergel.h"
-#include "spearman.h"
-#include "structkulczynski.h"
-#include "structchi2.h"
-#include "speciesprofile.h"
-#include "hamming.h"
-#include "gower.h"
-#include "memchi2.h"
-#include "memchord.h"
-#include "memeuclidean.h"
-#include "mempearson.h"
+#include "subsample.h"
+#include "consensus.h"
//**********************************************************************************************************************
vector<string> TreeGroupCommand::setParameters(){
CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
- CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);
- CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
+ CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
+ CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
- CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
+
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+ CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string helpString = "";
ValidCalculators validCalculator;
helpString += "The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n";
- helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision and label.\n";
+ helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision, processors, subsample, iters and label.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n";
helpString += "The group names are separated by dashes. The label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n";
helpString += "The phylip or column parameter are required if you do not provide a sharedfile, and only one may be used. If you use a column file the name filename is required. \n";
helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
helpString += "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
+ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a shared file.\n";
helpString += "Example tree.shared(groups=A-B-C, calc=jabund-sorabund).\n";
helpString += "The default value for groups is all the groups in your groupfile.\n";
helpString += "The default value for calc is jclass-thetayc.\n";
m->mothurConvert(temp, cutoff);
cutoff += (5 / (precision * 10.0));
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 1; }
+
+ if (subsample && (format != "sharedfile")) { m->mothurOut("[ERROR]: the subsample parameter can only be used with a shared file.\n"); abort=true; }
+
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
outputDir = "";
if (m->control_pressed) { return 0; }
- makeSimsDist();
+ vector< vector<double> > matrix = makeSimsDist();
if (m->control_pressed) { return 0; }
//create a new filename
- outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre";
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre";
outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
- createTree();
+ Tree* newTree = createTree(matrix);
+
+ if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; }
if (m->control_pressed) { return 0; }
}
//**********************************************************************************************************************
-int TreeGroupCommand::createTree(){
+Tree* TreeGroupCommand::createTree(vector< vector<double> >& simMatrix){
try {
//create tree
t = new Tree(tmap);
+
+ //initialize index
+ map<int, int> index; //maps row in simMatrix to vector index in the tree
+ for (int g = 0; g < numGroups; g++) { index[g] = g; }
//do merges and create tree structure by setting parents and children
//there are numGroups - 1 merges to do
for (int i = 0; i < (numGroups - 1); i++) {
float largest = -1000.0;
- if (m->control_pressed) { delete t; return 1; }
+ if (m->control_pressed) { delete t; t = NULL; return t; }
int row, column;
//find largest value in sims matrix by searching lower triangle
//assemble tree
t->assembleTree();
- if (m->control_pressed) { delete t; return 1; }
-
- //print newick file
- t->createNewickFile(outputFile);
-
- //delete tree
- delete t;
+ if (m->control_pressed) { delete t; t = NULL; return t; }
- if (m->control_pressed) { m->mothurRemove(outputFile); outputNames.pop_back(); return 1; }
-
- return 0;
+ return t;
}
catch(exception& e) {
}
}
/***********************************************************/
-void TreeGroupCommand::printSims(ostream& out) {
+int TreeGroupCommand::writeTree(string out, Tree* T) {
try {
- //output column headers
- //out << '\t';
- //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
- //out << endl;
+ //print newick file
+ t->createNewickFile(out);
+ if (m->control_pressed) { m->mothurRemove(out); outputNames.pop_back(); return 1; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeGroupCommand", "printSims");
+ exit(1);
+ }
+}
+
+/***********************************************************/
+void TreeGroupCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
+ try {
- for (int m = 0; m < simMatrix.size(); m++) {
+ for (int m = 0; m < simMatrix.size(); m++) {
//out << lookup[m]->getGroup() << '\t';
for (int n = 0; n < simMatrix.size(); n++) {
out << simMatrix[m][n] << '\t';
}
}
/***********************************************************/
-int TreeGroupCommand::makeSimsDist() {
+vector< vector<double> > TreeGroupCommand::makeSimsDist() {
try {
numGroups = list->size();
- //initialize index
- index.clear();
- for (int g = 0; g < numGroups; g++) { index[g] = g; }
-
//initialize simMatrix
- simMatrix.clear();
+ vector< vector<double> > simMatrix;
simMatrix.resize(numGroups);
for (int k = 0; k < simMatrix.size(); k++) {
for (int j = 0; j < simMatrix.size(); j++) {
simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
- if (m->control_pressed) { return 1; }
+ if (m->control_pressed) { return simMatrix; }
}
- return 0;
+ return simMatrix;
}
catch(exception& e) {
m->errorOut(e, "TreeGroupCommand", "makeSimsDist");
/***********************************************************/
int TreeGroupCommand::makeSimsShared() {
try {
+
+ numGroups = lookup.size();
+ lines.resize(processors);
+ for (int i = 0; i < processors; i++) {
+ lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
+ lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
+ }
+
+ if (subsample) {
+ if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+ subsampleSize = lookup[0]->getNumSeqs();
+ for (int i = 1; i < lookup.size(); i++) {
+ int thisSize = lookup[i]->getNumSeqs();
+
+ if (thisSize < subsampleSize) { subsampleSize = thisSize; }
+ }
+ }else {
+ m->clearGroups();
+ Groups.clear();
+ vector<SharedRAbundVector*> temp;
+ for (int i = 0; i < lookup.size(); i++) {
+ if (lookup[i]->getNumSeqs() < subsampleSize) {
+ m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+ delete lookup[i];
+ }else {
+ Groups.push_back(lookup[i]->getGroup());
+ temp.push_back(lookup[i]);
+ }
+ }
+ lookup = temp;
+ m->setGroups(Groups);
+ }
+ }
+
set<string> processedLabels;
set<string> userLabels = labels;
/***********************************************************/
int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
try{
- EstOutput data;
- vector<SharedRAbundVector*> subset;
- numGroups = thisLookup.size();
-
- //for each calculator
- for(int i = 0 ; i < treeCalculators.size(); i++) {
- //initialize simMatrix
- simMatrix.clear();
- simMatrix.resize(numGroups);
- for (int k = 0; k < simMatrix.size(); k++) {
- for (int j = 0; j < simMatrix.size(); j++) {
- simMatrix[k].push_back(0.0);
- }
- }
+ vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
+ vector< vector<seqDist> > calcDists; calcDists.resize(treeCalculators.size());
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) {
+
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+
+ if (subsample) {
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+ }
+
+ if(processors == 1){
+ driver(thisItersLookup, 0, numGroups, calcDists);
+ }else{
+ int process = 1;
+ vector<int> processIDS;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid);
+ process++;
+ }else if (pid == 0){
+
+ driver(thisItersLookup, lines[process].start, lines[process].end, calcDists);
+
+ string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(getpid()) + ".dist";
+ ofstream outtemp;
+ m->openOutputFile(tempdistFileName, outtemp);
+
+ for (int i = 0; i < calcDists.size(); i++) {
+ outtemp << calcDists[i].size() << endl;
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+ }
+ }
+ outtemp.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //parent do your part
+ driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);
+
+ //force parent to wait until all the processes are done
+ for (int i = 0; i < processIDS.size(); i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempdistFileName = m->getRootName(m->getSimpleName(sharedfile)) + toString(processIDS[i]) + ".dist";
+ ifstream intemp;
+ m->openInputFile(tempdistFileName, intemp);
+
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = 0;
+ intemp >> size; m->gobble(intemp);
+
+ for (int j = 0; j < size; j++) {
+ int seq1 = 0;
+ int seq2 = 0;
+ float dist = 1.0;
+
+ intemp >> seq1 >> seq2 >> dist; m->gobble(intemp);
+
+ seqDist tempDist(seq1, seq2, dist);
+ calcDists[k].push_back(tempDist);
+ }
+ }
+ intemp.close();
+ m->mothurRemove(tempdistFileName);
+ }
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the treeSharedData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to pass results vectors.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<treeSharedData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ // Allocate memory for thread data.
+ treeSharedData* tempSum = new treeSharedData(m, lines[i].start, lines[i].end, Estimators, newLookup);
+ pDataArray.push_back(tempSum);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyTreeSharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //parent do your part
+ driver(thisItersLookup, lines[0].start, lines[0].end, calcDists);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) { delete pDataArray[i]->thisLookup[j]; }
+
+ for (int k = 0; k < calcDists.size(); k++) {
+ int size = pDataArray[i]->calcDists[k].size();
+ for (int j = 0; j < size; j++) { calcDists[k].push_back(pDataArray[i]->calcDists[k][j]); }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+ }
+
+ calcDistsTotals.push_back(calcDists);
+
+ if (subsample) {
+
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
+ }
+ }
- //initialize index
- index.clear();
- for (int g = 0; g < numGroups; g++) { index[g] = g; }
-
- //create a new filename
- outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
- outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
-
- for (int k = 0; k < thisLookup.size(); k++) {
- for (int l = k; l < thisLookup.size(); l++) {
- if (k != l) { //we dont need to similiarity of a groups to itself
- //get estimated similarity between 2 groups
-
- subset.clear(); //clear out old pair of sharedrabunds
- //add new pair of sharedrabunds
- subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
-
- //if this calc needs all groups to calculate the pair load all groups
- if (treeCalculators[i]->getNeedsAll()) {
- //load subset with rest of lookup for those calcs that need everyone to calc for a pair
- for (int w = 0; w < thisLookup.size(); w++) {
- if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
- }
- }
-
- data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
- //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl;
- if (m->control_pressed) { return 1; }
-
- //save values in similarity matrix
- simMatrix[k][l] = -(data[0]-1.0);
- simMatrix[l][k] = -(data[0]-1.0);
- }
- }
- }
+ if (iters != 1) {
+ //we need to find the average distance and standard deviation for each groups distance
+
+ vector< vector<seqDist> > calcAverages; calcAverages.resize(treeCalculators.size());
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ calcAverages[i].resize(calcDistsTotals[0][i].size());
+
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].seq1 = calcDists[i][j].seq1;
+ calcAverages[i][j].seq2 = calcDists[i][j].seq2;
+ calcAverages[i][j].dist = 0.0;
+ }
+ }
+
+ for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) iters;
+ }
+ }
+
+ //create average tree for each calc
+ for (int i = 0; i < calcDists.size(); i++) {
+ vector< vector<double> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ int row = calcAverages[i][j].seq1;
+ int column = calcAverages[i][j].seq2;
+ float dist = calcAverages[i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
+ }
+
+ //create a new filename
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave.tre";
+ outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
+
+ //creates tree from similarity matrix and write out file
+ Tree* newTree = createTree(matrix);
+ if (newTree != NULL) { writeTree(outputFile, newTree); }
+ }
+
+ //create all trees for each calc and find their consensus tree
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ //create a new filename
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all.tre";
+ outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
+
+ ofstream outAll;
+ m->openOutputFile(outputFile, outAll);
+
+ vector<Tree*> trees;
+ for (int myIter = 0; myIter < iters; myIter++) {
+
+ if(m->control_pressed) { break; }
+
+ //initialize matrix
+ vector< vector<double> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcDistsTotals[myIter][i].size(); j++) {
+ int row = calcDistsTotals[myIter][i][j].seq1;
+ int column = calcDistsTotals[myIter][i][j].seq2;
+ double dist = calcDistsTotals[myIter][i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
+ }
+
+ //creates tree from similarity matrix and write out file
+ Tree* newTree = createTree(matrix);
+ if (newTree != NULL) {
+ newTree->print(outAll);
+ trees.push_back(newTree);
+ }
+ }
+ outAll.close();
+ if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
+
+ Consensus consensus;
+ Tree* conTree = consensus.getTree(trees, tmap);
+
+ //create a new filename
+ string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre";
+ outputNames.push_back(conFile); outputTypes["tree"].push_back(outputFile);
+ ofstream outTree;
+ m->openOutputFile(conFile, outTree);
+
+ if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
+ }
+
+ }else {
+
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ //initialize matrix
+ vector< vector<double> > matrix; //square matrix to represent the distance
+ matrix.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ double dist = calcDists[i][j].dist;
+
+ matrix[row][column] = dist;
+ matrix[column][row] = dist;
+ }
+
+ //create a new filename
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
+ outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
+
+ //creates tree from similarity matrix and write out file
+ Tree* newTree = createTree(matrix);
+ if (newTree != NULL) { writeTree(outputFile, newTree); delete newTree; }
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TreeGroupCommand", "process");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int TreeGroupCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, vector< vector<seqDist> >& calcDists) {
+ try {
+ vector<SharedRAbundVector*> subset;
+ for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
+
+ for (int l = 0; l < k; l++) {
+
+ if (k != l) { //we dont need to similiarity of a groups to itself
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
- //createdistance file from simMatrix
- /*string o = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
- ofstream outDist;
- m->openOutputFile(o, outDist);
- outDist << simMatrix.size() << endl;
- for (int k = 0; k < simMatrix.size(); k++) {
- outDist << thisLookup[k]->getGroup() << '\t';
- for (int l = 0; l < k; l++) {
- outDist << (1.0-simMatrix[k][l]) << '\t';
+ for(int i=0;i<treeCalculators.size();i++) {
+
+ //if this calc needs all groups to calculate the pair load all groups
+ if (treeCalculators[i]->getNeedsAll()) {
+ //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+ for (int w = 0; w < thisLookup.size(); w++) {
+ if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+ }
}
- outDist << endl;
+
+ vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
+
+ if (m->control_pressed) { return 1; }
+
+ seqDist temp(l, k, -(tempdata[0]-1.0));
+ calcDists[i].push_back(temp);
}
- outDist.close();*/
-
-
- if (m->control_pressed) { return 1; }
- //creates tree from similarity matrix and write out file
- createTree();
-
- if (m->control_pressed) { return 1; }
}
-
- return 0;
-
+ }
+ }
+
+ return 0;
}
catch(exception& e) {
- m->errorOut(e, "TreeGroupCommand", "process");
+ m->errorOut(e, "TreeGroupCommand", "driver");
exit(1);
}
}
#include "readcolumn.h"
#include "readphylip.h"
#include "sparsematrix.hpp"
+#include "sharedsobscollectsummary.h"
+#include "sharedchao1.h"
+#include "sharedace.h"
+#include "sharednseqs.h"
+#include "sharedjabund.h"
+#include "sharedsorabund.h"
+#include "sharedjclass.h"
+#include "sharedsorclass.h"
+#include "sharedjest.h"
+#include "sharedsorest.h"
+#include "sharedthetayc.h"
+#include "sharedthetan.h"
+#include "sharedkstest.h"
+#include "whittaker.h"
+#include "sharedochiai.h"
+#include "sharedanderbergs.h"
+#include "sharedkulczynski.h"
+#include "sharedkulczynskicody.h"
+#include "sharedlennon.h"
+#include "sharedmorisitahorn.h"
+#include "sharedbraycurtis.h"
+#include "sharedjackknife.h"
+#include "whittaker.h"
+#include "odum.h"
+#include "canberra.h"
+#include "structeuclidean.h"
+#include "structchord.h"
+#include "hellinger.h"
+#include "manhattan.h"
+#include "structpearson.h"
+#include "soergel.h"
+#include "spearman.h"
+#include "structkulczynski.h"
+#include "structchi2.h"
+#include "speciesprofile.h"
+#include "hamming.h"
+#include "gower.h"
+#include "memchi2.h"
+#include "memchord.h"
+#include "memeuclidean.h"
+#include "mempearson.h"
+
/* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
void help() { m->mothurOut(getHelpString()); }
private:
- int createTree();
- void printSims(ostream&);
+
+ struct linePair {
+ int start;
+ int end;
+ };
+ vector<linePair> lines;
+
+ Tree* createTree(vector< vector<double> >&);
+ void printSims(ostream&, vector< vector<double> >&);
int makeSimsShared();
- int makeSimsDist();
+ vector< vector<double> > makeSimsDist();
+ int writeTree(string, Tree*);
+ int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
ReadMatrix* readMatrix;
SparseMatrix* matrix;
ListVector* list;
TreeMap* tmap;
Tree* t;
+ InputData* input;
vector<Calculator*> treeCalculators;
- vector< vector<float> > simMatrix;
- map<int, int> index; //maps row in simMatrix to vector index in the tree
- InputData* input;
vector<SharedRAbundVector*> lookup;
string lastLabel;
- string format, outputFile, groupNames, filename, sharedfile, inputfile;
- int numGroups;
+ string format, groupNames, filename, sharedfile, inputfile;
+ int numGroups, subsampleSize, iters, processors;
ofstream out;
float precision, cutoff;
- bool abort, allLines;
+ bool abort, allLines, subsample;
set<string> labels; //holds labels to be used
string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
vector<string> Estimators, Groups, outputNames; //holds estimators to be used
};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct treeSharedData {
+ vector<SharedRAbundVector*> thisLookup;
+ vector< vector<seqDist> > calcDists;
+ vector<string> Estimators;
+ unsigned long long start;
+ unsigned long long end;
+ MothurOut* m;
+
+ treeSharedData(){}
+ treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
+ m = mout;
+ start = st;
+ end = en;
+ Estimators = est;
+ thisLookup = lu;
+ }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
+ treeSharedData* pDataArray;
+ pDataArray = (treeSharedData*)lpParam;
+ try {
+
+ vector<Calculator*> treeCalculators;
+ ValidCalculators validCalculator;
+ for (int i=0; i<pDataArray->Estimators.size(); i++) {
+ if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
+ if (pDataArray->Estimators[i] == "sharedsobs") {
+ treeCalculators.push_back(new SharedSobsCS());
+ }else if (pDataArray->Estimators[i] == "sharedchao") {
+ treeCalculators.push_back(new SharedChao1());
+ }else if (pDataArray->Estimators[i] == "sharedace") {
+ treeCalculators.push_back(new SharedAce());
+ }else if (pDataArray->Estimators[i] == "jabund") {
+ treeCalculators.push_back(new JAbund());
+ }else if (pDataArray->Estimators[i] == "sorabund") {
+ treeCalculators.push_back(new SorAbund());
+ }else if (pDataArray->Estimators[i] == "jclass") {
+ treeCalculators.push_back(new Jclass());
+ }else if (pDataArray->Estimators[i] == "sorclass") {
+ treeCalculators.push_back(new SorClass());
+ }else if (pDataArray->Estimators[i] == "jest") {
+ treeCalculators.push_back(new Jest());
+ }else if (pDataArray->Estimators[i] == "sorest") {
+ treeCalculators.push_back(new SorEst());
+ }else if (pDataArray->Estimators[i] == "thetayc") {
+ treeCalculators.push_back(new ThetaYC());
+ }else if (pDataArray->Estimators[i] == "thetan") {
+ treeCalculators.push_back(new ThetaN());
+ }else if (pDataArray->Estimators[i] == "kstest") {
+ treeCalculators.push_back(new KSTest());
+ }else if (pDataArray->Estimators[i] == "sharednseqs") {
+ treeCalculators.push_back(new SharedNSeqs());
+ }else if (pDataArray->Estimators[i] == "ochiai") {
+ treeCalculators.push_back(new Ochiai());
+ }else if (pDataArray->Estimators[i] == "anderberg") {
+ treeCalculators.push_back(new Anderberg());
+ }else if (pDataArray->Estimators[i] == "kulczynski") {
+ treeCalculators.push_back(new Kulczynski());
+ }else if (pDataArray->Estimators[i] == "kulczynskicody") {
+ treeCalculators.push_back(new KulczynskiCody());
+ }else if (pDataArray->Estimators[i] == "lennon") {
+ treeCalculators.push_back(new Lennon());
+ }else if (pDataArray->Estimators[i] == "morisitahorn") {
+ treeCalculators.push_back(new MorHorn());
+ }else if (pDataArray->Estimators[i] == "braycurtis") {
+ treeCalculators.push_back(new BrayCurtis());
+ }else if (pDataArray->Estimators[i] == "whittaker") {
+ treeCalculators.push_back(new Whittaker());
+ }else if (pDataArray->Estimators[i] == "odum") {
+ treeCalculators.push_back(new Odum());
+ }else if (pDataArray->Estimators[i] == "canberra") {
+ treeCalculators.push_back(new Canberra());
+ }else if (pDataArray->Estimators[i] == "structeuclidean") {
+ treeCalculators.push_back(new StructEuclidean());
+ }else if (pDataArray->Estimators[i] == "structchord") {
+ treeCalculators.push_back(new StructChord());
+ }else if (pDataArray->Estimators[i] == "hellinger") {
+ treeCalculators.push_back(new Hellinger());
+ }else if (pDataArray->Estimators[i] == "manhattan") {
+ treeCalculators.push_back(new Manhattan());
+ }else if (pDataArray->Estimators[i] == "structpearson") {
+ treeCalculators.push_back(new StructPearson());
+ }else if (pDataArray->Estimators[i] == "soergel") {
+ treeCalculators.push_back(new Soergel());
+ }else if (pDataArray->Estimators[i] == "spearman") {
+ treeCalculators.push_back(new Spearman());
+ }else if (pDataArray->Estimators[i] == "structkulczynski") {
+ treeCalculators.push_back(new StructKulczynski());
+ }else if (pDataArray->Estimators[i] == "speciesprofile") {
+ treeCalculators.push_back(new SpeciesProfile());
+ }else if (pDataArray->Estimators[i] == "hamming") {
+ treeCalculators.push_back(new Hamming());
+ }else if (pDataArray->Estimators[i] == "structchi2") {
+ treeCalculators.push_back(new StructChi2());
+ }else if (pDataArray->Estimators[i] == "gower") {
+ treeCalculators.push_back(new Gower());
+ }else if (pDataArray->Estimators[i] == "memchi2") {
+ treeCalculators.push_back(new MemChi2());
+ }else if (pDataArray->Estimators[i] == "memchord") {
+ treeCalculators.push_back(new MemChord());
+ }else if (pDataArray->Estimators[i] == "memeuclidean") {
+ treeCalculators.push_back(new MemEuclidean());
+ }else if (pDataArray->Estimators[i] == "mempearson") {
+ treeCalculators.push_back(new MemPearson());
+ }
+ }
+ }
+
+ pDataArray->calcDists.resize(treeCalculators.size());
+
+ vector<SharedRAbundVector*> subset;
+ for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
+
+ for (int l = 0; l < k; l++) {
+
+ if (k != l) { //we dont need to similiarity of a groups to itself
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
+
+ for(int i=0;i<treeCalculators.size();i++) {
+
+ //if this calc needs all groups to calculate the pair load all groups
+ if (treeCalculators[i]->getNeedsAll()) {
+ //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+ for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
+ if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
+ }
+ }
+
+ vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
+
+ if (pDataArray->m->control_pressed) { return 1; }
+
+ seqDist temp(l, k, -(tempdata[0]-1.0));
+ pDataArray->calcDists[i].push_back(temp);
+ }
+ }
+ }
+ }
+
+ for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
#endif