2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups", "phylip", "column", "name", "precision","cutoff"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string, string> parameters = parser. getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
54 phylipfile = validParameter.validFile(parameters, "phylip", true);
55 if (phylipfile == "not open") { abort = true; }
56 else if (phylipfile == "not found") { phylipfile = ""; }
57 else { format = "phylip"; }
59 columnfile = validParameter.validFile(parameters, "column", true);
60 if (columnfile == "not open") { abort = true; }
61 else if (columnfile == "not found") { columnfile = ""; }
62 else { format = "column"; }
64 namefile = validParameter.validFile(parameters, "name", true);
65 if (namefile == "not open") { abort = true; }
66 else if (namefile == "not found") { namefile = ""; }
67 else { globaldata->setNameFile(namefile); }
69 // format = globaldata->getFormat();
71 //error checking on files
72 if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { cout << "You must run the read.otu command or provide a distance file before running the tree.shared command." << endl; abort = true; }
73 else if ((phylipfile != "") && (columnfile != "")) { cout << "When running the tree.shared command with a distance file you may not use both the column and the phylip parameters." << endl; abort = true; }
75 if (columnfile != "") {
76 if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
79 //check for optional parameter and set defaults
80 // ...at some point should added some additional type checking...
81 line = validParameter.validFile(parameters, "line", false);
82 if (line == "not found") { line = ""; }
84 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
85 else { allLines = 1; }
88 label = validParameter.validFile(parameters, "label", false);
89 if (label == "not found") { label = ""; }
91 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
92 else { allLines = 1; }
95 //make sure user did not use both the line and label parameters
96 if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
97 //if the user has not specified any line or labels use the ones from read.otu
98 else if((line == "") && (label == "")) {
99 allLines = globaldata->allLines;
100 labels = globaldata->labels;
101 lines = globaldata->lines;
104 groups = validParameter.validFile(parameters, "groups", false);
105 if (groups == "not found") { groups = ""; }
107 splitAtDash(groups, Groups);
108 globaldata->Groups = Groups;
111 calc = validParameter.validFile(parameters, "calc", false);
112 if (calc == "not found") { calc = "jclass-thetayc"; }
114 if (calc == "default") { calc = "jclass-thetayc"; }
116 splitAtDash(calc, Estimators);
119 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
120 convert(temp, precision);
122 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
123 convert(temp, cutoff);
124 cutoff += (5 / (precision * 10.0));
127 if (abort == false) {
129 validCalculator = new ValidCalculators();
131 if (format == "sharedfile") {
133 for (i=0; i<Estimators.size(); i++) {
134 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
135 if (Estimators[i] == "jabund") {
136 treeCalculators.push_back(new JAbund());
137 }else if (Estimators[i] == "sorabund") {
138 treeCalculators.push_back(new SorAbund());
139 }else if (Estimators[i] == "jclass") {
140 treeCalculators.push_back(new Jclass());
141 }else if (Estimators[i] == "sorclass") {
142 treeCalculators.push_back(new SorClass());
143 }else if (Estimators[i] == "jest") {
144 treeCalculators.push_back(new Jest());
145 }else if (Estimators[i] == "sorest") {
146 treeCalculators.push_back(new SorEst());
147 }else if (Estimators[i] == "thetayc") {
148 treeCalculators.push_back(new ThetaYC());
149 }else if (Estimators[i] == "thetan") {
150 treeCalculators.push_back(new ThetaN());
151 }else if (Estimators[i] == "morisitahorn") {
152 treeCalculators.push_back(new MorHorn());
153 }else if (Estimators[i] == "braycurtis") {
154 treeCalculators.push_back(new BrayCurtis());
163 catch(exception& e) {
164 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
168 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 //**********************************************************************************************************************
175 void TreeGroupCommand::help(){
177 cout << "The tree.shared command creates a .tre to represent the similiarity between groups or sequences." << "\n";
178 cout << "The tree.shared command can only be executed after a successful read.otu command or by providing a distance file." << "\n";
179 cout << "The tree.shared command parameters are groups, calc, phylip, column, name, cutoff, precision, line and label. You may not use line and label at the same time." << "\n";
180 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
181 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like trees created for, and are also separated by dashes." << "\n";
182 cout << "The phylip or column parameter are required if you do not run the read.otu command first, and only one may be used. If you use a column file the name filename is required. " << "\n";
183 cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n";
184 cout << "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
185 cout << "Example tree.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
186 cout << "The default value for groups is all the groups in your groupfile." << "\n";
187 cout << "The default value for calc is jclass-thetayc." << "\n";
188 cout << "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose." << "\n";
189 validCalculator->printCalc("treegroup", cout);
190 cout << "Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile)." << "\n";
191 cout << "Example tree.shared(phylip=abrecovery.dist)." << "\n";
192 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
194 catch(exception& e) {
195 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
199 cout << "An unknown error has occurred in the TreeGroupCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205 //**********************************************************************************************************************
207 TreeGroupCommand::~TreeGroupCommand(){
209 if (format == "sharedfile") {delete read;}
210 else { delete readMatrix; delete matrix; delete list; }
212 delete validCalculator;
216 //**********************************************************************************************************************
218 int TreeGroupCommand::execute(){
221 if (abort == true) { return 0; }
223 if (format == "sharedfile") {
224 //if the users entered no valid calculators don't execute command
225 if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
228 read = new ReadOTUFile(globaldata->inputFileName);
229 read->read(&*globaldata);
231 input = globaldata->ginput;
232 lookup = input->getSharedRAbundVectors();
235 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; }
241 filename = globaldata->inputFileName;
243 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
244 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
246 readMatrix->setCutoff(cutoff);
249 nameMap = new NameAssignment(namefile);
250 nameMap->readMap(1,2);
256 readMatrix->read(nameMap);
257 list = readMatrix->getListVector();
258 matrix = readMatrix->getMatrix();
261 tmap = new TreeMap();
263 globaldata->gTreemap = tmap;
265 globaldata->Groups = tmap->namesOfGroups;
267 //clear globaldatas old tree names if any
268 globaldata->Treenames.clear();
270 //fills globaldatas tree names
271 globaldata->Treenames = globaldata->Groups;
275 //create a new filename
276 outputFile = getRootName(globaldata->inputFileName) + "tre";
279 cout << "Tree complete. " << endl;
282 //reset groups parameter
283 globaldata->Groups.clear();
287 catch(exception& e) {
288 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
292 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
296 //**********************************************************************************************************************
298 void TreeGroupCommand::createTree(){
303 //do merges and create tree structure by setting parents and children
304 //there are numGroups - 1 merges to do
305 for (int i = 0; i < (numGroups - 1); i++) {
306 float largest = -1000.0;
309 //find largest value in sims matrix by searching lower triangle
310 for (int j = 1; j < simMatrix.size(); j++) {
311 for (int k = 0; k < j; k++) {
312 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
316 //set non-leaf node info and update leaves to know their parents
318 t->tree[numGroups + i].setChildren(index[row], index[column]);
321 t->tree[index[row]].setParent(numGroups + i);
322 t->tree[index[column]].setParent(numGroups + i);
324 //blength = distance / 2;
325 float blength = ((1.0 - largest) / 2);
328 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
329 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
331 //set your length to leaves to your childs length plus branchlength
332 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
336 index[row] = numGroups+i;
337 index[column] = numGroups+i;
339 //remove highest value that caused the merge.
340 simMatrix[row][column] = -1000.0;
341 simMatrix[column][row] = -1000.0;
343 //merge values in simsMatrix
344 for (int n = 0; n < simMatrix.size(); n++) {
345 //row becomes merge of 2 groups
346 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
347 simMatrix[n][row] = simMatrix[row][n];
349 simMatrix[column][n] = -1000.0;
350 simMatrix[n][column] = -1000.0;
354 //adjust tree to make sure root to tip length is .5
355 int root = t->findRoot();
356 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
362 t->createNewickFile(outputFile);
368 catch(exception& e) {
369 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
373 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
377 /***********************************************************/
378 void TreeGroupCommand::printSims(ostream& out) {
381 //output column headers
383 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
387 for (int m = 0; m < simMatrix.size(); m++) {
388 //out << lookup[m]->getGroup() << '\t';
389 for (int n = 0; n < simMatrix.size(); n++) {
390 out << simMatrix[m][n] << '\t';
396 catch(exception& e) {
397 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
401 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
405 /***********************************************************/
406 void TreeGroupCommand::makeSimsDist() {
408 numGroups = list->size();
412 for (int g = 0; g < numGroups; g++) { index[g] = g; }
414 //initialize simMatrix
416 simMatrix.resize(numGroups);
417 for (int m = 0; m < simMatrix.size(); m++) {
418 for (int j = 0; j < simMatrix.size(); j++) {
419 simMatrix[m].push_back(0.0);
423 //go through sparse matrix and fill sims
424 //go through each cell in the sparsematrix
425 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
426 //similairity = -(distance-1)
427 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
428 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
433 catch(exception& e) {
434 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
438 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
443 /***********************************************************/
444 void TreeGroupCommand::makeSimsShared() {
448 //clear globaldatas old tree names if any
449 globaldata->Treenames.clear();
451 //fills globaldatas tree names
452 globaldata->Treenames = globaldata->Groups;
454 //create treemap class from groupmap for tree class to use
455 tmap = new TreeMap();
456 tmap->makeSim(globaldata->gGroupmap);
457 globaldata->gTreemap = tmap;
459 set<string> processedLabels;
460 set<string> userLabels = labels;
461 set<int> userLines = lines;
463 //as long as you are not at the end of the file or done wih the lines you want
464 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
466 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
467 cout << lookup[0]->getLabel() << '\t' << count << endl;
470 processedLabels.insert(lookup[0]->getLabel());
471 userLabels.erase(lookup[0]->getLabel());
472 userLines.erase(count);
475 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
476 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
479 processedLabels.insert(lastLookup[0]->getLabel());
480 userLabels.erase(lastLookup[0]->getLabel());
483 //prevent memory leak
484 if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } }
487 //get next line to process
488 lookup = input->getSharedRAbundVectors();
492 //output error messages about any remaining user labels
493 set<string>::iterator it;
494 bool needToRun = false;
495 for (it = userLabels.begin(); it != userLabels.end(); it++) {
496 cout << "Your file does not include the label "<< *it;
497 if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
498 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
501 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
505 //run last line if you need to
506 if (needToRun == true) {
507 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
511 for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; }
512 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
514 catch(exception& e) {
515 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
519 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
524 /***********************************************************/
525 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
528 vector<SharedRAbundVector*> subset;
529 numGroups = thisLookup.size();
531 //for each calculator
532 for(int i = 0 ; i < treeCalculators.size(); i++) {
533 //initialize simMatrix
535 simMatrix.resize(numGroups);
536 for (int m = 0; m < simMatrix.size(); m++) {
537 for (int j = 0; j < simMatrix.size(); j++) {
538 simMatrix[m].push_back(0.0);
544 for (int g = 0; g < numGroups; g++) { index[g] = g; }
546 //create a new filename
547 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
549 for (int k = 0; k < thisLookup.size(); k++) {
550 for (int l = k; l < thisLookup.size(); l++) {
551 if (k != l) { //we dont need to similiarity of a groups to itself
552 //get estimated similarity between 2 groups
554 subset.clear(); //clear out old pair of sharedrabunds
555 //add new pair of sharedrabunds
556 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
558 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
559 //save values in similarity matrix
560 simMatrix[k][l] = data[0];
561 simMatrix[l][k] = data[0];
566 //creates tree from similarity matrix and write out file
571 catch(exception& e) {
572 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
576 cout << "An unknown error has occurred in the TreeGroupCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
580 /***********************************************************/