2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string, string> parameters = parser. getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
54 phylipfile = validParameter.validFile(parameters, "phylip", true);
55 if (phylipfile == "not open") { abort = true; }
56 else if (phylipfile == "not found") { phylipfile = ""; }
57 else { format = "phylip"; }
59 columnfile = validParameter.validFile(parameters, "column", true);
60 if (columnfile == "not open") { abort = true; }
61 else if (columnfile == "not found") { columnfile = ""; }
62 else { format = "column"; }
64 namefile = validParameter.validFile(parameters, "name", true);
65 if (namefile == "not open") { abort = true; }
66 else if (namefile == "not found") { namefile = ""; }
67 else { globaldata->setNameFile(namefile); }
69 format = globaldata->getFormat();
71 //error checking on files
72 if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { cout << "You must run the read.otu command or provide a distance file before running the tree.shared command." << endl; abort = true; }
73 else if ((phylipfile != "") && (columnfile != "")) { cout << "When running the tree.shared command with a distance file you may not use both the column and the phylip parameters." << endl; abort = true; }
75 if (columnfile != "") {
76 if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
79 //check for optional parameter and set defaults
80 // ...at some point should added some additional type checking...
81 line = validParameter.validFile(parameters, "line", false);
82 if (line == "not found") { line = ""; }
84 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
85 else { allLines = 1; }
88 label = validParameter.validFile(parameters, "label", false);
89 if (label == "not found") { label = ""; }
91 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
92 else { allLines = 1; }
95 //make sure user did not use both the line and label parameters
96 if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
97 //if the user has not specified any line or labels use the ones from read.otu
98 else if((line == "") && (label == "")) {
99 allLines = globaldata->allLines;
100 labels = globaldata->labels;
101 lines = globaldata->lines;
104 groups = validParameter.validFile(parameters, "groups", false);
105 if (groups == "not found") { groups = ""; }
107 splitAtDash(groups, Groups);
108 globaldata->Groups = Groups;
111 calc = validParameter.validFile(parameters, "calc", false);
112 if (calc == "not found") { calc = "jclass-thetayc"; }
114 if (calc == "default") { calc = "jclass-thetayc"; }
116 splitAtDash(calc, Estimators);
119 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
120 convert(temp, precision);
122 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
123 convert(temp, cutoff);
124 cutoff += (5 / (precision * 10.0));
127 if (abort == false) {
129 validCalculator = new ValidCalculators();
131 if (format == "sharedfile") {
133 for (i=0; i<Estimators.size(); i++) {
134 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
135 if (Estimators[i] == "jabund") {
136 treeCalculators.push_back(new JAbund());
137 }else if (Estimators[i] == "sorabund") {
138 treeCalculators.push_back(new SorAbund());
139 }else if (Estimators[i] == "jclass") {
140 treeCalculators.push_back(new Jclass());
141 }else if (Estimators[i] == "sorclass") {
142 treeCalculators.push_back(new SorClass());
143 }else if (Estimators[i] == "jest") {
144 treeCalculators.push_back(new Jest());
145 }else if (Estimators[i] == "sorest") {
146 treeCalculators.push_back(new SorEst());
147 }else if (Estimators[i] == "thetayc") {
148 treeCalculators.push_back(new ThetaYC());
149 }else if (Estimators[i] == "thetan") {
150 treeCalculators.push_back(new ThetaN());
151 }else if (Estimators[i] == "morisitahorn") {
152 treeCalculators.push_back(new MorHorn());
153 }else if (Estimators[i] == "braycurtis") {
154 treeCalculators.push_back(new BrayCurtis());
163 catch(exception& e) {
164 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 //**********************************************************************************************************************
175 void TreeGroupCommand::help(){
177 cout << "The tree.shared command creates a .tre to represent the similiarity between groups or sequences." << "\n";
178 cout << "The tree.shared command can only be executed after a successful read.otu command or by providing a distance file." << "\n";
179 cout << "The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label. You may not use line and label at the same time." << "\n";
180 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
181 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n";
182 cout << "The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used. If you use a column file the name filename is required. " << "\n";
183 cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n";
184 cout << "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
185 cout << "Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
186 cout << "The default value for groups is all the groups in your groupfile." << "\n";
187 cout << "The default value for calc is jclass-thetayc." << "\n";
188 cout << "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose." << "\n";
189 validCalculator->printCalc("treegroup", cout);
190 cout << "Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile)." << "\n";
191 cout << "Example tree.shared(phylip=abrecovery.dist)." << "\n";
192 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
194 catch(exception& e) {
195 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
199 cout << "An unknown error has occurred in the TreeGroupCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205 //**********************************************************************************************************************
207 TreeGroupCommand::~TreeGroupCommand(){
208 if (abort == false) {
210 if (format == "sharedfile") { delete read; delete input; globaldata->ginput = NULL;}
211 else { delete readMatrix; delete matrix; delete list; }
213 delete validCalculator;
218 //**********************************************************************************************************************
220 int TreeGroupCommand::execute(){
223 if (abort == true) { return 0; }
225 if (format == "sharedfile") {
226 //if the users entered no valid calculators don't execute command
227 if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
230 read = new ReadOTUFile(globaldata->inputFileName);
231 read->read(&*globaldata);
233 input = globaldata->ginput;
234 lookup = input->getSharedRAbundVectors();
237 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; }
239 globaldata->runParse = false;
245 filename = globaldata->inputFileName;
247 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
248 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
250 readMatrix->setCutoff(cutoff);
253 nameMap = new NameAssignment(namefile);
254 nameMap->readMap(1,2);
260 readMatrix->read(nameMap);
261 list = readMatrix->getListVector();
262 matrix = readMatrix->getMatrix();
265 tmap = new TreeMap();
267 globaldata->gTreemap = tmap;
269 globaldata->Groups = tmap->namesOfGroups;
271 //clear globaldatas old tree names if any
272 globaldata->Treenames.clear();
274 //fills globaldatas tree names
275 globaldata->Treenames = globaldata->Groups;
277 globaldata->runParse = false;
281 //create a new filename
282 outputFile = getRootName(globaldata->inputFileName) + "tre";
285 cout << "Tree complete. " << endl;
288 //reset groups parameter
289 globaldata->Groups.clear();
293 catch(exception& e) {
294 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
298 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302 //**********************************************************************************************************************
304 void TreeGroupCommand::createTree(){
309 //do merges and create tree structure by setting parents and children
310 //there are numGroups - 1 merges to do
311 for (int i = 0; i < (numGroups - 1); i++) {
312 float largest = -1000.0;
315 //find largest value in sims matrix by searching lower triangle
316 for (int j = 1; j < simMatrix.size(); j++) {
317 for (int k = 0; k < j; k++) {
318 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
322 //set non-leaf node info and update leaves to know their parents
324 t->tree[numGroups + i].setChildren(index[row], index[column]);
327 t->tree[index[row]].setParent(numGroups + i);
328 t->tree[index[column]].setParent(numGroups + i);
330 //blength = distance / 2;
331 float blength = ((1.0 - largest) / 2);
334 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
335 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
337 //set your length to leaves to your childs length plus branchlength
338 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
342 index[row] = numGroups+i;
343 index[column] = numGroups+i;
345 //remove highest value that caused the merge.
346 simMatrix[row][column] = -1000.0;
347 simMatrix[column][row] = -1000.0;
349 //merge values in simsMatrix
350 for (int n = 0; n < simMatrix.size(); n++) {
351 //row becomes merge of 2 groups
352 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
353 simMatrix[n][row] = simMatrix[row][n];
355 simMatrix[column][n] = -1000.0;
356 simMatrix[n][column] = -1000.0;
360 //adjust tree to make sure root to tip length is .5
361 int root = t->findRoot();
362 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
368 t->createNewickFile(outputFile);
374 catch(exception& e) {
375 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
379 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
383 /***********************************************************/
384 void TreeGroupCommand::printSims(ostream& out) {
387 //output column headers
389 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
393 for (int m = 0; m < simMatrix.size(); m++) {
394 //out << lookup[m]->getGroup() << '\t';
395 for (int n = 0; n < simMatrix.size(); n++) {
396 out << simMatrix[m][n] << '\t';
402 catch(exception& e) {
403 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
407 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
411 /***********************************************************/
412 void TreeGroupCommand::makeSimsDist() {
414 numGroups = list->size();
418 for (int g = 0; g < numGroups; g++) { index[g] = g; }
420 //initialize simMatrix
422 simMatrix.resize(numGroups);
423 for (int m = 0; m < simMatrix.size(); m++) {
424 for (int j = 0; j < simMatrix.size(); j++) {
425 simMatrix[m].push_back(0.0);
429 //go through sparse matrix and fill sims
430 //go through each cell in the sparsematrix
431 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
432 //similairity = -(distance-1)
433 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
434 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
439 catch(exception& e) {
440 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
444 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
449 /***********************************************************/
450 void TreeGroupCommand::makeSimsShared() {
454 //clear globaldatas old tree names if any
455 globaldata->Treenames.clear();
457 //fills globaldatas tree names
458 globaldata->Treenames = globaldata->Groups;
460 //create treemap class from groupmap for tree class to use
461 tmap = new TreeMap();
462 tmap->makeSim(globaldata->gGroupmap);
463 globaldata->gTreemap = tmap;
465 set<string> processedLabels;
466 set<string> userLabels = labels;
467 set<int> userLines = lines;
469 //as long as you are not at the end of the file or done wih the lines you want
470 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
472 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
473 cout << lookup[0]->getLabel() << '\t' << count << endl;
476 processedLabels.insert(lookup[0]->getLabel());
477 userLabels.erase(lookup[0]->getLabel());
478 userLines.erase(count);
481 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
482 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
485 processedLabels.insert(lastLookup[0]->getLabel());
486 userLabels.erase(lastLookup[0]->getLabel());
489 //prevent memory leak
490 if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } }
493 //get next line to process
494 lookup = input->getSharedRAbundVectors();
498 //output error messages about any remaining user labels
499 set<string>::iterator it;
500 bool needToRun = false;
501 for (it = userLabels.begin(); it != userLabels.end(); it++) {
502 cout << "Your file does not include the label "<< *it;
503 if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
504 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
507 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
511 //run last line if you need to
512 if (needToRun == true) {
513 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
517 for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; }
518 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
520 catch(exception& e) {
521 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
525 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
530 /***********************************************************/
531 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
534 vector<SharedRAbundVector*> subset;
535 numGroups = thisLookup.size();
537 //for each calculator
538 for(int i = 0 ; i < treeCalculators.size(); i++) {
539 //initialize simMatrix
541 simMatrix.resize(numGroups);
542 for (int m = 0; m < simMatrix.size(); m++) {
543 for (int j = 0; j < simMatrix.size(); j++) {
544 simMatrix[m].push_back(0.0);
550 for (int g = 0; g < numGroups; g++) { index[g] = g; }
552 //create a new filename
553 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
555 for (int k = 0; k < thisLookup.size(); k++) {
556 for (int l = k; l < thisLookup.size(); l++) {
557 if (k != l) { //we dont need to similiarity of a groups to itself
558 //get estimated similarity between 2 groups
560 subset.clear(); //clear out old pair of sharedrabunds
561 //add new pair of sharedrabunds
562 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
564 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
565 //save values in similarity matrix
566 simMatrix[k][l] = data[0];
567 simMatrix[l][k] = data[0];
572 //creates tree from similarity matrix and write out file
577 catch(exception& e) {
578 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
582 cout << "An unknown error has occurred in the TreeGroupCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
586 /***********************************************************/