2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string, string> parameters = parser. getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
54 phylipfile = validParameter.validFile(parameters, "phylip", true);
55 if (phylipfile == "not open") { abort = true; }
56 else if (phylipfile == "not found") { phylipfile = ""; }
57 else { format = "phylip"; }
59 columnfile = validParameter.validFile(parameters, "column", true);
60 if (columnfile == "not open") { abort = true; }
61 else if (columnfile == "not found") { columnfile = ""; }
62 else { format = "column"; }
64 namefile = validParameter.validFile(parameters, "name", true);
65 if (namefile == "not open") { abort = true; }
66 else if (namefile == "not found") { namefile = ""; }
67 else { globaldata->setNameFile(namefile); }
69 format = globaldata->getFormat();
71 //error checking on files
72 if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { mothurOut("You must run the read.otu command or provide a distance file before running the tree.shared command."); mothurOutEndLine(); abort = true; }
73 else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
75 if (columnfile != "") {
76 if (namefile == "") { mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
79 //check for optional parameter and set defaults
80 // ...at some point should added some additional type checking...
81 line = validParameter.validFile(parameters, "line", false);
82 if (line == "not found") { line = ""; }
84 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
85 else { allLines = 1; }
88 label = validParameter.validFile(parameters, "label", false);
89 if (label == "not found") { label = ""; }
91 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
92 else { allLines = 1; }
95 //make sure user did not use both the line and label parameters
96 if ((line != "") && (label != "")) { mothurOut("You cannot use both the line and label parameters at the same time. "); mothurOutEndLine(); abort = true; }
97 //if the user has not specified any line or labels use the ones from read.otu
98 else if((line == "") && (label == "")) {
99 allLines = globaldata->allLines;
100 labels = globaldata->labels;
101 lines = globaldata->lines;
104 groups = validParameter.validFile(parameters, "groups", false);
105 if (groups == "not found") { groups = ""; }
107 splitAtDash(groups, Groups);
108 globaldata->Groups = Groups;
111 calc = validParameter.validFile(parameters, "calc", false);
112 if (calc == "not found") { calc = "jclass-thetayc"; }
114 if (calc == "default") { calc = "jclass-thetayc"; }
116 splitAtDash(calc, Estimators);
119 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
120 convert(temp, precision);
122 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
123 convert(temp, cutoff);
124 cutoff += (5 / (precision * 10.0));
127 if (abort == false) {
129 validCalculator = new ValidCalculators();
131 if (format == "sharedfile") {
133 for (i=0; i<Estimators.size(); i++) {
134 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
135 if (Estimators[i] == "jabund") {
136 treeCalculators.push_back(new JAbund());
137 }else if (Estimators[i] == "sorabund") {
138 treeCalculators.push_back(new SorAbund());
139 }else if (Estimators[i] == "jclass") {
140 treeCalculators.push_back(new Jclass());
141 }else if (Estimators[i] == "sorclass") {
142 treeCalculators.push_back(new SorClass());
143 }else if (Estimators[i] == "jest") {
144 treeCalculators.push_back(new Jest());
145 }else if (Estimators[i] == "sorest") {
146 treeCalculators.push_back(new SorEst());
147 }else if (Estimators[i] == "thetayc") {
148 treeCalculators.push_back(new ThetaYC());
149 }else if (Estimators[i] == "thetan") {
150 treeCalculators.push_back(new ThetaN());
151 }else if (Estimators[i] == "morisitahorn") {
152 treeCalculators.push_back(new MorHorn());
153 }else if (Estimators[i] == "braycurtis") {
154 treeCalculators.push_back(new BrayCurtis());
163 catch(exception& e) {
164 errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
169 //**********************************************************************************************************************
171 void TreeGroupCommand::help(){
173 mothurOut("The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n");
174 mothurOut("The tree.shared command can only be executed after a successful read.otu command or by providing a distance file.\n");
175 mothurOut("The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label. You may not use line and label at the same time.\n");
176 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n");
177 mothurOut("The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n");
178 mothurOut("The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used. If you use a column file the name filename is required. \n");
179 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
180 mothurOut("The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels).\n");
181 mothurOut("Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund).\n");
182 mothurOut("The default value for groups is all the groups in your groupfile.\n");
183 mothurOut("The default value for calc is jclass-thetayc.\n");
184 mothurOut("The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n");
185 validCalculator->printCalc("treegroup", cout);
186 mothurOut("Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n");
187 mothurOut("Example tree.shared(phylip=abrecovery.dist).\n");
188 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
190 catch(exception& e) {
191 errorOut(e, "TreeGroupCommand", "help");
197 //**********************************************************************************************************************
199 TreeGroupCommand::~TreeGroupCommand(){
200 if (abort == false) {
202 if (format == "sharedfile") { delete read; delete input; globaldata->ginput = NULL;}
203 else { delete readMatrix; delete matrix; delete list; }
205 delete validCalculator;
210 //**********************************************************************************************************************
212 int TreeGroupCommand::execute(){
215 if (abort == true) { return 0; }
217 if (format == "sharedfile") {
218 //if the users entered no valid calculators don't execute command
219 if (treeCalculators.size() == 0) { mothurOut("You have given no valid calculators."); mothurOutEndLine(); return 0; }
222 read = new ReadOTUFile(globaldata->inputFileName);
223 read->read(&*globaldata);
225 input = globaldata->ginput;
226 lookup = input->getSharedRAbundVectors();
227 lastLabel = lookup[0]->getLabel();
229 if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups. I cannot run the command."); mothurOutEndLine(); return 0; }
231 //used in tree constructor
232 globaldata->runParse = false;
238 filename = globaldata->inputFileName;
240 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
241 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
243 readMatrix->setCutoff(cutoff);
246 nameMap = new NameAssignment(namefile);
253 readMatrix->read(nameMap);
254 list = readMatrix->getListVector();
255 matrix = readMatrix->getMatrix();
258 tmap = new TreeMap();
260 globaldata->gTreemap = tmap;
262 globaldata->Groups = tmap->namesOfGroups;
264 //clear globaldatas old tree names if any
265 globaldata->Treenames.clear();
267 //fills globaldatas tree names
268 globaldata->Treenames = globaldata->Groups;
270 //used in tree constructor
271 globaldata->runParse = false;
275 //create a new filename
276 outputFile = getRootName(globaldata->inputFileName) + "tre";
279 mothurOut("Tree complete. "); mothurOutEndLine();
282 //reset groups parameter
283 globaldata->Groups.clear();
287 catch(exception& e) {
288 errorOut(e, "TreeGroupCommand", "execute");
292 //**********************************************************************************************************************
294 void TreeGroupCommand::createTree(){
299 //do merges and create tree structure by setting parents and children
300 //there are numGroups - 1 merges to do
301 for (int i = 0; i < (numGroups - 1); i++) {
302 float largest = -1000.0;
305 //find largest value in sims matrix by searching lower triangle
306 for (int j = 1; j < simMatrix.size(); j++) {
307 for (int k = 0; k < j; k++) {
308 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
312 //set non-leaf node info and update leaves to know their parents
314 t->tree[numGroups + i].setChildren(index[row], index[column]);
317 t->tree[index[row]].setParent(numGroups + i);
318 t->tree[index[column]].setParent(numGroups + i);
320 //blength = distance / 2;
321 float blength = ((1.0 - largest) / 2);
324 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
325 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
327 //set your length to leaves to your childs length plus branchlength
328 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
332 index[row] = numGroups+i;
333 index[column] = numGroups+i;
335 //remove highest value that caused the merge.
336 simMatrix[row][column] = -1000.0;
337 simMatrix[column][row] = -1000.0;
339 //merge values in simsMatrix
340 for (int n = 0; n < simMatrix.size(); n++) {
341 //row becomes merge of 2 groups
342 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
343 simMatrix[n][row] = simMatrix[row][n];
345 simMatrix[column][n] = -1000.0;
346 simMatrix[n][column] = -1000.0;
350 //adjust tree to make sure root to tip length is .5
351 int root = t->findRoot();
352 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
358 t->createNewickFile(outputFile);
364 catch(exception& e) {
365 errorOut(e, "TreeGroupCommand", "createTree");
369 /***********************************************************/
370 void TreeGroupCommand::printSims(ostream& out) {
373 //output column headers
375 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
379 for (int m = 0; m < simMatrix.size(); m++) {
380 //out << lookup[m]->getGroup() << '\t';
381 for (int n = 0; n < simMatrix.size(); n++) {
382 out << simMatrix[m][n] << '\t';
388 catch(exception& e) {
389 errorOut(e, "TreeGroupCommand", "printSims");
393 /***********************************************************/
394 void TreeGroupCommand::makeSimsDist() {
396 numGroups = list->size();
400 for (int g = 0; g < numGroups; g++) { index[g] = g; }
402 //initialize simMatrix
404 simMatrix.resize(numGroups);
405 for (int m = 0; m < simMatrix.size(); m++) {
406 for (int j = 0; j < simMatrix.size(); j++) {
407 simMatrix[m].push_back(0.0);
411 //go through sparse matrix and fill sims
412 //go through each cell in the sparsematrix
413 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
414 //similairity = -(distance-1)
415 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
416 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
421 catch(exception& e) {
422 errorOut(e, "TreeGroupCommand", "makeSimsDist");
427 /***********************************************************/
428 void TreeGroupCommand::makeSimsShared() {
432 //clear globaldatas old tree names if any
433 globaldata->Treenames.clear();
435 //fills globaldatas tree names
436 globaldata->Treenames = globaldata->Groups;
438 //create treemap class from groupmap for tree class to use
439 tmap = new TreeMap();
440 tmap->makeSim(globaldata->gGroupmap);
441 globaldata->gTreemap = tmap;
443 set<string> processedLabels;
444 set<string> userLabels = labels;
445 set<int> userLines = lines;
447 //as long as you are not at the end of the file or done wih the lines you want
448 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
450 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
451 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
454 processedLabels.insert(lookup[0]->getLabel());
455 userLabels.erase(lookup[0]->getLabel());
456 userLines.erase(count);
459 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
460 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
461 lookup = input->getSharedRAbundVectors(lastLabel);
463 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
466 processedLabels.insert(lookup[0]->getLabel());
467 userLabels.erase(lookup[0]->getLabel());
470 lastLabel = lookup[0]->getLabel();
472 //get next line to process
473 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
474 lookup = input->getSharedRAbundVectors();
478 //output error messages about any remaining user labels
479 set<string>::iterator it;
480 bool needToRun = false;
481 for (it = userLabels.begin(); it != userLabels.end(); it++) {
482 mothurOut("Your file does not include the label " + *it);
483 if (processedLabels.count(lastLabel) != 1) {
484 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
487 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
491 //run last line if you need to
492 if (needToRun == true) {
493 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
494 lookup = input->getSharedRAbundVectors(lastLabel);
496 mothurOut(lookup[0]->getLabel() + "\t" + toString(count)); mothurOutEndLine();
498 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
501 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
503 catch(exception& e) {
504 errorOut(e, "TreeGroupCommand", "makeSimsShared");
509 /***********************************************************/
510 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
513 vector<SharedRAbundVector*> subset;
514 numGroups = thisLookup.size();
516 //for each calculator
517 for(int i = 0 ; i < treeCalculators.size(); i++) {
518 //initialize simMatrix
520 simMatrix.resize(numGroups);
521 for (int m = 0; m < simMatrix.size(); m++) {
522 for (int j = 0; j < simMatrix.size(); j++) {
523 simMatrix[m].push_back(0.0);
529 for (int g = 0; g < numGroups; g++) { index[g] = g; }
531 //create a new filename
532 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
534 for (int k = 0; k < thisLookup.size(); k++) {
535 for (int l = k; l < thisLookup.size(); l++) {
536 if (k != l) { //we dont need to similiarity of a groups to itself
537 //get estimated similarity between 2 groups
539 subset.clear(); //clear out old pair of sharedrabunds
540 //add new pair of sharedrabunds
541 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
543 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
544 //save values in similarity matrix
545 simMatrix[k][l] = data[0];
546 simMatrix[l][k] = data[0];
551 //creates tree from similarity matrix and write out file
556 catch(exception& e) {
557 errorOut(e, "TreeGroupCommand", "process");
561 /***********************************************************/