2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string, string> parameters = parser. getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
54 phylipfile = validParameter.validFile(parameters, "phylip", true);
55 if (phylipfile == "not open") { abort = true; }
56 else if (phylipfile == "not found") { phylipfile = ""; }
57 else { format = "phylip"; }
59 columnfile = validParameter.validFile(parameters, "column", true);
60 if (columnfile == "not open") { abort = true; }
61 else if (columnfile == "not found") { columnfile = ""; }
62 else { format = "column"; }
64 namefile = validParameter.validFile(parameters, "name", true);
65 if (namefile == "not open") { abort = true; }
66 else if (namefile == "not found") { namefile = ""; }
67 else { globaldata->setNameFile(namefile); }
69 format = globaldata->getFormat();
71 //error checking on files
72 if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { mothurOut("You must run the read.otu command or provide a distance file before running the tree.shared command."); mothurOutEndLine(); abort = true; }
73 else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
75 if (columnfile != "") {
76 if (namefile == "") { mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
79 //check for optional parameter and set defaults
80 // ...at some point should added some additional type checking...
81 line = validParameter.validFile(parameters, "line", false);
82 if (line == "not found") { line = ""; }
84 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
85 else { allLines = 1; }
88 label = validParameter.validFile(parameters, "label", false);
89 if (label == "not found") { label = ""; }
91 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
92 else { allLines = 1; }
95 //make sure user did not use both the line and label parameters
96 if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
97 //if the user has not specified any line or labels use the ones from read.otu
98 else if((line == "") && (label == "")) {
99 allLines = globaldata->allLines;
100 labels = globaldata->labels;
101 lines = globaldata->lines;
104 groups = validParameter.validFile(parameters, "groups", false);
105 if (groups == "not found") { groups = ""; }
107 splitAtDash(groups, Groups);
108 globaldata->Groups = Groups;
111 calc = validParameter.validFile(parameters, "calc", false);
112 if (calc == "not found") { calc = "jclass-thetayc"; }
114 if (calc == "default") { calc = "jclass-thetayc"; }
116 splitAtDash(calc, Estimators);
119 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
120 convert(temp, precision);
122 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
123 convert(temp, cutoff);
124 cutoff += (5 / (precision * 10.0));
127 if (abort == false) {
129 validCalculator = new ValidCalculators();
131 if (format == "sharedfile") {
133 for (i=0; i<Estimators.size(); i++) {
134 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
135 if (Estimators[i] == "jabund") {
136 treeCalculators.push_back(new JAbund());
137 }else if (Estimators[i] == "sorabund") {
138 treeCalculators.push_back(new SorAbund());
139 }else if (Estimators[i] == "jclass") {
140 treeCalculators.push_back(new Jclass());
141 }else if (Estimators[i] == "sorclass") {
142 treeCalculators.push_back(new SorClass());
143 }else if (Estimators[i] == "jest") {
144 treeCalculators.push_back(new Jest());
145 }else if (Estimators[i] == "sorest") {
146 treeCalculators.push_back(new SorEst());
147 }else if (Estimators[i] == "thetayc") {
148 treeCalculators.push_back(new ThetaYC());
149 }else if (Estimators[i] == "thetan") {
150 treeCalculators.push_back(new ThetaN());
151 }else if (Estimators[i] == "morisitahorn") {
152 treeCalculators.push_back(new MorHorn());
153 }else if (Estimators[i] == "braycurtis") {
154 treeCalculators.push_back(new BrayCurtis());
163 catch(exception& e) {
164 errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
169 //**********************************************************************************************************************
171 void TreeGroupCommand::help(){
173 mothurOut("The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n");
174 mothurOut("The tree.shared command can only be executed after a successful read.otu command or by providing a distance file.\n");
175 mothurOut("The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label. You may not use line and label at the same time.\n");
176 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
177 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
178 mothurOut("The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used. If you use a column file the name filename is required. \n");
179 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
180 mothurOut("The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels).\n");
181 mothurOut("Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund).\n");
182 mothurOut("The default value for groups is all the groups in your groupfile.\n");
183 mothurOut("The default value for calc is jclass-thetayc.\n");
184 mothurOut("The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n");
185 validCalculator->printCalc("treegroup", cout);
186 mothurOut("Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n");
187 mothurOut("Example tree.shared(phylip=abrecovery.dist).\n");
188 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
190 catch(exception& e) {
191 errorOut(e, "TreeGroupCommand", "help");
197 //**********************************************************************************************************************
199 TreeGroupCommand::~TreeGroupCommand(){
200 if (abort == false) {
202 if (format == "sharedfile") { delete read; delete input; globaldata->ginput = NULL;}
203 else { delete readMatrix; delete matrix; delete list; }
205 delete validCalculator;
210 //**********************************************************************************************************************
212 int TreeGroupCommand::execute(){
215 if (abort == true) { return 0; }
217 if (format == "sharedfile") {
218 //if the users entered no valid calculators don't execute command
219 if (treeCalculators.size() == 0) { mothurOut("You have given no valid calculators."); mothurOutEndLine(); return 0; }
222 read = new ReadOTUFile(globaldata->inputFileName);
223 read->read(&*globaldata);
225 input = globaldata->ginput;
226 lookup = input->getSharedRAbundVectors();
227 lastLabel = lookup[0]->getLabel();
229 if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups. I cannot run the command."); mothurOutEndLine(); return 0; }
231 globaldata->runParse = false;
237 filename = globaldata->inputFileName;
239 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
240 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
242 readMatrix->setCutoff(cutoff);
245 nameMap = new NameAssignment(namefile);
252 readMatrix->read(nameMap);
253 list = readMatrix->getListVector();
254 matrix = readMatrix->getMatrix();
257 tmap = new TreeMap();
259 globaldata->gTreemap = tmap;
261 globaldata->Groups = tmap->namesOfGroups;
263 //clear globaldatas old tree names if any
264 globaldata->Treenames.clear();
266 //fills globaldatas tree names
267 globaldata->Treenames = globaldata->Groups;
269 globaldata->runParse = false;
273 //create a new filename
274 outputFile = getRootName(globaldata->inputFileName) + "tre";
277 mothurOut("Tree complete. "); mothurOutEndLine();
280 //reset groups parameter
281 globaldata->Groups.clear();
285 catch(exception& e) {
286 errorOut(e, "TreeGroupCommand", "execute");
290 //**********************************************************************************************************************
292 void TreeGroupCommand::createTree(){
297 //do merges and create tree structure by setting parents and children
298 //there are numGroups - 1 merges to do
299 for (int i = 0; i < (numGroups - 1); i++) {
300 float largest = -1000.0;
303 //find largest value in sims matrix by searching lower triangle
304 for (int j = 1; j < simMatrix.size(); j++) {
305 for (int k = 0; k < j; k++) {
306 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
310 //set non-leaf node info and update leaves to know their parents
312 t->tree[numGroups + i].setChildren(index[row], index[column]);
315 t->tree[index[row]].setParent(numGroups + i);
316 t->tree[index[column]].setParent(numGroups + i);
318 //blength = distance / 2;
319 float blength = ((1.0 - largest) / 2);
322 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
323 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
325 //set your length to leaves to your childs length plus branchlength
326 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
330 index[row] = numGroups+i;
331 index[column] = numGroups+i;
333 //remove highest value that caused the merge.
334 simMatrix[row][column] = -1000.0;
335 simMatrix[column][row] = -1000.0;
337 //merge values in simsMatrix
338 for (int n = 0; n < simMatrix.size(); n++) {
339 //row becomes merge of 2 groups
340 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
341 simMatrix[n][row] = simMatrix[row][n];
343 simMatrix[column][n] = -1000.0;
344 simMatrix[n][column] = -1000.0;
348 //adjust tree to make sure root to tip length is .5
349 int root = t->findRoot();
350 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
356 t->createNewickFile(outputFile);
362 catch(exception& e) {
363 errorOut(e, "TreeGroupCommand", "createTree");
367 /***********************************************************/
368 void TreeGroupCommand::printSims(ostream& out) {
371 //output column headers
373 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
377 for (int m = 0; m < simMatrix.size(); m++) {
378 //out << lookup[m]->getGroup() << '\t';
379 for (int n = 0; n < simMatrix.size(); n++) {
380 out << simMatrix[m][n] << '\t';
386 catch(exception& e) {
387 errorOut(e, "TreeGroupCommand", "printSims");
391 /***********************************************************/
392 void TreeGroupCommand::makeSimsDist() {
394 numGroups = list->size();
398 for (int g = 0; g < numGroups; g++) { index[g] = g; }
400 //initialize simMatrix
402 simMatrix.resize(numGroups);
403 for (int m = 0; m < simMatrix.size(); m++) {
404 for (int j = 0; j < simMatrix.size(); j++) {
405 simMatrix[m].push_back(0.0);
409 //go through sparse matrix and fill sims
410 //go through each cell in the sparsematrix
411 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
412 //similairity = -(distance-1)
413 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
414 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
419 catch(exception& e) {
420 errorOut(e, "TreeGroupCommand", "makeSimsDist");
425 /***********************************************************/
426 void TreeGroupCommand::makeSimsShared() {
430 //clear globaldatas old tree names if any
431 globaldata->Treenames.clear();
433 //fills globaldatas tree names
434 globaldata->Treenames = globaldata->Groups;
436 //create treemap class from groupmap for tree class to use
437 tmap = new TreeMap();
438 tmap->makeSim(globaldata->gGroupmap);
439 globaldata->gTreemap = tmap;
441 set<string> processedLabels;
442 set<string> userLabels = labels;
443 set<int> userLines = lines;
445 //as long as you are not at the end of the file or done wih the lines you want
446 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
448 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
449 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
452 processedLabels.insert(lookup[0]->getLabel());
453 userLabels.erase(lookup[0]->getLabel());
454 userLines.erase(count);
457 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
458 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
459 lookup = input->getSharedRAbundVectors(lastLabel);
461 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
464 processedLabels.insert(lookup[0]->getLabel());
465 userLabels.erase(lookup[0]->getLabel());
468 lastLabel = lookup[0]->getLabel();
470 //get next line to process
471 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
472 lookup = input->getSharedRAbundVectors();
476 //output error messages about any remaining user labels
477 set<string>::iterator it;
478 bool needToRun = false;
479 for (it = userLabels.begin(); it != userLabels.end(); it++) {
480 mothurOut("Your file does not include the label " + *it);
481 if (processedLabels.count(lastLabel) != 1) {
482 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
485 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
489 //run last line if you need to
490 if (needToRun == true) {
491 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
492 lookup = input->getSharedRAbundVectors(lastLabel);
494 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
496 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
499 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
501 catch(exception& e) {
502 errorOut(e, "TreeGroupCommand", "makeSimsShared");
507 /***********************************************************/
508 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
511 vector<SharedRAbundVector*> subset;
512 numGroups = thisLookup.size();
514 //for each calculator
515 for(int i = 0 ; i < treeCalculators.size(); i++) {
516 //initialize simMatrix
518 simMatrix.resize(numGroups);
519 for (int m = 0; m < simMatrix.size(); m++) {
520 for (int j = 0; j < simMatrix.size(); j++) {
521 simMatrix[m].push_back(0.0);
527 for (int g = 0; g < numGroups; g++) { index[g] = g; }
529 //create a new filename
530 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
532 for (int k = 0; k < thisLookup.size(); k++) {
533 for (int l = k; l < thisLookup.size(); l++) {
534 if (k != l) { //we dont need to similiarity of a groups to itself
535 //get estimated similarity between 2 groups
537 subset.clear(); //clear out old pair of sharedrabunds
538 //add new pair of sharedrabunds
539 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
541 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
542 //save values in similarity matrix
543 simMatrix[k][l] = data[0];
544 simMatrix[l][k] = data[0];
549 //creates tree from similarity matrix and write out file
554 catch(exception& e) {
555 errorOut(e, "TreeGroupCommand", "process");
559 /***********************************************************/