2 * treegroupscommand.cpp
5 * Created by Sarah Westcott on 4/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "treegroupscommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 TreeGroupCommand::TreeGroupCommand(){
27 globaldata = GlobalData::getInstance();
28 format = globaldata->getFormat();
29 validCalculator = new ValidCalculators();
31 if (format == "sharedfile") {
33 for (i=0; i<globaldata->Estimators.size(); i++) {
34 if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) {
35 if (globaldata->Estimators[i] == "jabund") {
36 treeCalculators.push_back(new JAbund());
37 }else if (globaldata->Estimators[i] == "sorabund") {
38 treeCalculators.push_back(new SorAbund());
39 }else if (globaldata->Estimators[i] == "jclass") {
40 treeCalculators.push_back(new Jclass());
41 }else if (globaldata->Estimators[i] == "sorclass") {
42 treeCalculators.push_back(new SorClass());
43 }else if (globaldata->Estimators[i] == "jest") {
44 treeCalculators.push_back(new Jest());
45 }else if (globaldata->Estimators[i] == "sorest") {
46 treeCalculators.push_back(new SorEst());
47 }else if (globaldata->Estimators[i] == "thetayc") {
48 treeCalculators.push_back(new ThetaYC());
49 }else if (globaldata->Estimators[i] == "thetan") {
50 treeCalculators.push_back(new ThetaN());
51 }else if (globaldata->Estimators[i] == "morisitahorn") {
52 treeCalculators.push_back(new MorHorn());
53 }else if (globaldata->Estimators[i] == "braycurtis") {
54 treeCalculators.push_back(new BrayCurtis());
60 //reset calc for next command
61 globaldata->setCalc("");
65 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69 cout << "An unknown error has occurred in the TreeGroupCommand class function TreeGroupCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73 //**********************************************************************************************************************
75 TreeGroupCommand::~TreeGroupCommand(){
77 if (format == "sharedfile") {delete read;}
78 else { delete readMatrix; delete matrix; delete list; }
83 //**********************************************************************************************************************
85 int TreeGroupCommand::execute(){
87 if (format == "sharedfile") {
88 //if the users entered no valid calculators don't execute command
89 if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
92 read = new ReadOTUFile(globaldata->inputFileName);
93 read->read(&*globaldata);
95 input = globaldata->ginput;
96 lookup = input->getSharedRAbundVectors();
99 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0; }
105 filename = globaldata->inputFileName;
107 if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }
108 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
110 if(globaldata->getPrecision() != ""){
111 convert(globaldata->getPrecision(), precision);
114 if(globaldata->getCutOff() != ""){
115 convert(globaldata->getCutOff(), cutoff);
116 cutoff += (5 / (precision * 10.0));
118 readMatrix->setCutoff(cutoff);
120 if(globaldata->getNameFile() != ""){
121 nameMap = new NameAssignment(globaldata->getNameFile());
122 nameMap->readMap(1,2);
128 readMatrix->read(nameMap);
129 list = readMatrix->getListVector();
130 matrix = readMatrix->getMatrix();
133 tmap = new TreeMap();
135 globaldata->gTreemap = tmap;
137 globaldata->Groups = tmap->namesOfGroups;
139 //clear globaldatas old tree names if any
140 globaldata->Treenames.clear();
142 //fills globaldatas tree names
143 globaldata->Treenames = globaldata->Groups;
147 //create a new filename
148 outputFile = getRootName(globaldata->inputFileName) + "tre";
153 //reset groups parameter
154 globaldata->Groups.clear(); globaldata->setGroups("");
158 catch(exception& e) {
159 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
163 cout << "An unknown error has occurred in the TreeGroupCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167 //**********************************************************************************************************************
169 void TreeGroupCommand::createTree(){
174 //do merges and create tree structure by setting parents and children
175 //there are numGroups - 1 merges to do
176 for (int i = 0; i < (numGroups - 1); i++) {
177 float largest = -1000.0;
180 //find largest value in sims matrix by searching lower triangle
181 for (int j = 1; j < simMatrix.size(); j++) {
182 for (int k = 0; k < j; k++) {
183 if (simMatrix[j][k] > largest) { largest = simMatrix[j][k]; row = j; column = k; }
187 //set non-leaf node info and update leaves to know their parents
189 t->tree[numGroups + i].setChildren(index[row], index[column]);
192 t->tree[index[row]].setParent(numGroups + i);
193 t->tree[index[column]].setParent(numGroups + i);
195 //blength = distance / 2;
196 float blength = ((1.0 - largest) / 2);
199 t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
200 t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
202 //set your length to leaves to your childs length plus branchlength
203 t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
207 index[row] = numGroups+i;
208 index[column] = numGroups+i;
210 //remove highest value that caused the merge.
211 simMatrix[row][column] = -1000.0;
212 simMatrix[column][row] = -1000.0;
214 //merge values in simsMatrix
215 for (int n = 0; n < simMatrix.size(); n++) {
216 //row becomes merge of 2 groups
217 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
218 simMatrix[n][row] = simMatrix[row][n];
220 simMatrix[column][n] = -1000.0;
221 simMatrix[n][column] = -1000.0;
225 //adjust tree to make sure root to tip length is .5
226 int root = t->findRoot();
227 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
233 t->createNewickFile(outputFile);
239 catch(exception& e) {
240 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
244 cout << "An unknown error has occurred in the TreeGroupCommand class function createTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248 /***********************************************************/
249 void TreeGroupCommand::printSims(ostream& out) {
252 //output column headers
254 //for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
258 for (int m = 0; m < simMatrix.size(); m++) {
259 //out << lookup[m]->getGroup() << '\t';
260 for (int n = 0; n < simMatrix.size(); n++) {
261 out << simMatrix[m][n] << '\t';
267 catch(exception& e) {
268 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
272 cout << "An unknown error has occurred in the TreeGroupCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
276 /***********************************************************/
277 void TreeGroupCommand::makeSimsDist() {
279 numGroups = list->size();
283 for (int g = 0; g < numGroups; g++) { index[g] = g; }
285 //initialize simMatrix
287 simMatrix.resize(numGroups);
288 for (int m = 0; m < simMatrix.size(); m++) {
289 for (int j = 0; j < simMatrix.size(); j++) {
290 simMatrix[m].push_back(0.0);
294 //go through sparse matrix and fill sims
295 //go through each cell in the sparsematrix
296 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
297 //similairity = -(distance-1)
298 simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);
299 simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);
304 catch(exception& e) {
305 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
309 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
314 /***********************************************************/
315 void TreeGroupCommand::makeSimsShared() {
319 //clear globaldatas old tree names if any
320 globaldata->Treenames.clear();
322 //fills globaldatas tree names
323 globaldata->Treenames = globaldata->Groups;
325 //create treemap class from groupmap for tree class to use
326 tmap = new TreeMap();
327 tmap->makeSim(globaldata->gGroupmap);
328 globaldata->gTreemap = tmap;
330 set<string> processedLabels;
331 set<string> userLabels = globaldata->labels;
333 //as long as you are not at the end of the file or done wih the lines you want
334 while((lookup[0] != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
336 if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){
337 cout << lookup[0]->getLabel() << '\t' << count << endl;
340 processedLabels.insert(lookup[0]->getLabel());
341 userLabels.erase(lookup[0]->getLabel());
344 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
345 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
348 processedLabels.insert(lastLookup[0]->getLabel());
349 userLabels.erase(lastLookup[0]->getLabel());
352 //prevent memory leak
353 if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } }
356 //get next line to process
357 lookup = input->getSharedRAbundVectors();
361 //output error messages about any remaining user labels
362 set<string>::iterator it;
363 bool needToRun = false;
364 for (it = userLabels.begin(); it != userLabels.end(); it++) {
365 cout << "Your file does not include the label "<< *it;
366 if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
367 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
370 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
374 //run last line if you need to
375 if (needToRun == true) {
376 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
380 for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; }
381 for(int i = 0 ; i < treeCalculators.size(); i++) { delete treeCalculators[i]; }
383 catch(exception& e) {
384 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
388 cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
393 /***********************************************************/
394 void TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
397 vector<SharedRAbundVector*> subset;
398 numGroups = globaldata->Groups.size();
400 //for each calculator
401 for(int i = 0 ; i < treeCalculators.size(); i++) {
402 //initialize simMatrix
404 simMatrix.resize(numGroups);
405 for (int m = 0; m < simMatrix.size(); m++) {
406 for (int j = 0; j < simMatrix.size(); j++) {
407 simMatrix[m].push_back(0.0);
413 for (int g = 0; g < numGroups; g++) { index[g] = g; }
415 //create a new filename
416 outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";
418 for (int k = 0; k < thisLookup.size(); k++) {
419 for (int l = k; l < thisLookup.size(); l++) {
420 if (k != l) { //we dont need to similiarity of a groups to itself
421 //get estimated similarity between 2 groups
423 subset.clear(); //clear out old pair of sharedrabunds
424 //add new pair of sharedrabunds
425 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
427 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
428 //save values in similarity matrix
429 simMatrix[k][l] = data[0];
430 simMatrix[l][k] = data[0];
435 //creates tree from similarity matrix and write out file
440 catch(exception& e) {
441 cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
445 cout << "An unknown error has occurred in the TreeGroupCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
449 /***********************************************************/