2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(){
27 globaldata = GlobalData::getInstance();
28 format = globaldata->getFormat();
29 validCalculator = new ValidCalculators();
31 if (format == "sharedfile") {
33 for (i=0; i<globaldata->Estimators.size(); i++) {
34 if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) {
35 if (globaldata->Estimators[i] == "jabund") {
36 treeCalculators.push_back(new JAbund());
37 }else if (globaldata->Estimators[i] == "sorabund") {
38 treeCalculators.push_back(new SorAbund());
39 }else if (globaldata->Estimators[i] == "jclass") {
40 treeCalculators.push_back(new Jclass());
41 }else if (globaldata->Estimators[i] == "sorclass") {
42 treeCalculators.push_back(new SorClass());
43 }else if (globaldata->Estimators[i] == "jest") {
44 treeCalculators.push_back(new Jest());
45 }else if (globaldata->Estimators[i] == "sorest") {
46 treeCalculators.push_back(new SorEst());
47 }else if (globaldata->Estimators[i] == "thetayc") {
48 treeCalculators.push_back(new ThetaYC());
49 }else if (globaldata->Estimators[i] == "thetan") {
50 treeCalculators.push_back(new ThetaN());
51 }else if (globaldata->Estimators[i] == "morisitahorn") {
52 treeCalculators.push_back(new MorHorn());
53 }else if (globaldata->Estimators[i] == "braycurtis") {
54 treeCalculators.push_back(new BrayCurtis());
60 //reset calc for next command
61 globaldata->setCalc("");
65 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73 //**********************************************************************************************************************
75 TreeGroupCommand::~TreeGroupCommand(){
77 if (format == "sharedfile") {delete read;}
78 else { delete readMatrix; delete matrix; delete list; }
83 //**********************************************************************************************************************
85 int TreeGroupCommand::execute(){
87 if (format == "sharedfile") {
88 //if the users entered no valid calculators don't execute command
89 if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
92 read = new ReadOTUFile(globaldata->inputFileName);
93 read->read(&*globaldata);
95 input = globaldata->ginput;
96 lookup = input->getSharedRAbundVectors();
98 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; }
104 filename = globaldata->inputFileName;
106 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
107 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
109 if(globaldata->getPrecision() != ""){
110 convert(globaldata->getPrecision(), precision);
113 if(globaldata->getCutOff() != ""){
114 convert(globaldata->getCutOff(), cutoff);
115 cutoff += (5 / (precision * 10.0));
117 readMatrix->setCutoff(cutoff);
119 if(globaldata->getNameFile() != ""){
120 nameMap = new NameAssignment(globaldata->getNameFile());
121 nameMap->readMap(1,2);
127 readMatrix->read(nameMap);
128 list = readMatrix->getListVector();
129 matrix = readMatrix->getMatrix();
132 tmap = new TreeMap();
134 globaldata->gTreemap = tmap;
136 globaldata->Groups = tmap->namesOfGroups;
138 //clear globaldatas old tree names if any
139 globaldata->Treenames.clear();
141 //fills globaldatas tree names
142 globaldata->Treenames = globaldata->Groups;
146 //create a new filename
147 outputFile = getRootName(globaldata->inputFileName) + "tre";
152 //reset groups parameter
153 globaldata->Groups.clear(); globaldata->setGroups("");
157 catch(exception& e) {
158 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
162 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
166 //**********************************************************************************************************************
168 void TreeGroupCommand::createTree(){
173 //do merges and create tree structure by setting parents and children
174 //there are numGroups - 1 merges to do
175 for (int i = 0; i < (numGroups - 1); i++) {
176 float largest = -1000.0;
179 //find largest value in sims matrix by searching lower triangle
180 for (int j = 1; j < simMatrix.size(); j++) {
181 for (int k = 0; k < j; k++) {
182 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
186 //set non-leaf node info and update leaves to know their parents
188 t->tree[numGroups + i].setChildren(index[row], index[column]);
191 t->tree[index[row]].setParent(numGroups + i);
192 t->tree[index[column]].setParent(numGroups + i);
194 //blength = distance / 2;
195 float blength = ((1.0 - largest) / 2);
198 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
199 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
201 //set your length to leaves to your childs length plus branchlength
202 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
206 index[row] = numGroups+i;
207 index[column] = numGroups+i;
209 //remove highest value that caused the merge.
210 simMatrix[row][column] = -1000.0;
211 simMatrix[column][row] = -1000.0;
213 //merge values in simsMatrix
214 for (int n = 0; n < simMatrix.size(); n++) {
215 //row becomes merge of 2 groups
216 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
217 simMatrix[n][row] = simMatrix[row][n];
219 simMatrix[column][n] = -1000.0;
220 simMatrix[n][column] = -1000.0;
224 //adjust tree to make sure root to tip length is .5
225 int root = t->findRoot();
226 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
232 t->createNewickFile(outputFile);
238 catch(exception& e) {
239 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
243 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
247 /***********************************************************/
248 void TreeGroupCommand::printSims(ostream& out) {
251 //output column headers
253 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
257 for (int m = 0; m < simMatrix.size(); m++) {
258 //out << lookup[m]->getGroup() << '\t';
259 for (int n = 0; n < simMatrix.size(); n++) {
260 out << simMatrix[m][n] << '\t';
266 catch(exception& e) {
267 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
275 /***********************************************************/
276 void TreeGroupCommand::makeSimsDist() {
278 numGroups = list->size();
282 for (int g = 0; g < numGroups; g++) { index[g] = g; }
284 //initialize simMatrix
286 simMatrix.resize(numGroups);
287 for (int m = 0; m < simMatrix.size(); m++) {
288 for (int j = 0; j < simMatrix.size(); j++) {
289 simMatrix[m].push_back(0.0);
293 //go through sparse matrix and fill sims
294 //go through each cell in the sparsematrix
295 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
296 //similairity = -(distance-1)
297 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
298 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
303 catch(exception& e) {
304 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
308 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
313 /***********************************************************/
314 void TreeGroupCommand::makeSimsShared() {
318 vector<SharedRAbundVector*> subset;
320 numGroups = globaldata->Groups.size();
322 //clear globaldatas old tree names if any
323 globaldata->Treenames.clear();
325 //fills globaldatas tree names
326 globaldata->Treenames = globaldata->Groups;
328 //create treemap class from groupmap for tree class to use
329 tmap = new TreeMap();
330 tmap->makeSim(globaldata->gGroupmap);
331 globaldata->gTreemap = tmap;
333 while(lookup[0] != NULL){
335 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){
337 cout << lookup[0]->getLabel() << '\t' << count << endl;
339 //for each calculator
340 for(int i = 0 ; i < treeCalculators.size(); i++) {
341 //initialize simMatrix
343 simMatrix.resize(numGroups);
344 for (int m = 0; m < simMatrix.size(); m++) {
345 for (int j = 0; j < simMatrix.size(); j++) {
346 simMatrix[m].push_back(0.0);
352 for (int g = 0; g < numGroups; g++) { index[g] = g; }
354 //create a new filename
355 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
357 for (int k = 0; k < lookup.size(); k++) {
358 for (int l = k; l < lookup.size(); l++) {
359 if (k != l) { //we dont need to similiarity of a groups to itself
360 //get estimated similarity between 2 groups
362 subset.clear(); //clear out old pair of sharedrabunds
363 //add new pair of sharedrabunds
364 subset.push_back(lookup[k]); subset.push_back(lookup[l]);
366 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
367 //save values in similarity matrix
368 simMatrix[k][l] = data[0];
369 simMatrix[l][k] = data[0];
374 //creates tree from similarity matrix and write out file
379 //prevent memory leak
380 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
382 //get next line to process
383 lookup = input->getSharedRAbundVectors();
387 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
389 catch(exception& e) {
390 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
394 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
399 /***********************************************************/