]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.cpp
working on current change
[mothur.git] / treegroupscommand.cpp
1 /*
2  *  treegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treegroupscommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34 #include "odum.h"
35 #include "canberra.h"
36 #include "structeuclidean.h"
37 #include "structchord.h"
38 #include "hellinger.h"
39 #include "manhattan.h"
40 #include "structpearson.h"
41 #include "soergel.h"
42 #include "spearman.h"
43 #include "structkulczynski.h"
44 #include "structchi2.h"
45 #include "speciesprofile.h"
46 #include "hamming.h"
47 #include "gower.h"
48 #include "memchi2.h"
49 #include "memchord.h"
50 #include "memeuclidean.h"
51 #include "mempearson.h"
52
53 //**********************************************************************************************************************
54 vector<string> TreeGroupCommand::setParameters(){       
55         try {
56                 CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
57                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
58                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
59                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);          
60                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
61                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);                
62                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
63                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
64                 CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
65                 CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
66                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
67                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
68                 
69                 vector<string> myArray;
70                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
71                 return myArray;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "TreeGroupCommand", "setParameters");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 string TreeGroupCommand::getHelpString(){       
80         try {
81                 string helpString = "";
82                 ValidCalculators validCalculator;
83                 helpString += "The tree.shared command creates a .tre to represent the similiarity between groups or sequences.\n";
84                 helpString += "The tree.shared command parameters are shared, groups, calc, phylip, column, name, cutoff, precision and label.\n";
85                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included used.\n";
86                 helpString += "The group names are separated by dashes. The label allow you to select what distance levels you would like trees created for, and are also separated by dashes.\n";
87                 helpString += "The phylip or column parameter are required if you do not provide a sharedfile, and only one may be used.  If you use a column file the name filename is required. \n";
88                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
89                 helpString += "The tree.shared command should be in the following format: tree.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
90                 helpString += "Example tree.shared(groups=A-B-C, calc=jabund-sorabund).\n";
91                 helpString += "The default value for groups is all the groups in your groupfile.\n";
92                 helpString += "The default value for calc is jclass-thetayc.\n";
93                 helpString += "The tree.shared command outputs a .tre file for each calculator you specify at each distance you choose.\n";
94                 helpString += validCalculator.printCalc("treegroup");
95                 helpString += "Or the tree.shared command can be in the following format: tree.shared(phylip=yourPhylipFile).\n";
96                 helpString += "Example tree.shared(phylip=abrecovery.dist).\n";
97                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
98                 return helpString;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "TreeGroupCommand", "getHelpString");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 TreeGroupCommand::TreeGroupCommand(){   
107         try {
108                 abort = true; calledHelp = true;
109                 setParameters();
110                 //initialize outputTypes
111                 vector<string> tempOutNames;
112                 outputTypes["tree"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
116                 exit(1);
117         }
118 }
119 //**********************************************************************************************************************
120
121 TreeGroupCommand::TreeGroupCommand(string option)  {
122         try {
123                 abort = false; calledHelp = false;   
124                 allLines = 1;
125                 
126                 //allow user to run help
127                 if(option == "help") { help(); abort = true; calledHelp = true; }
128                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
129                 
130                 else {
131                         vector<string> myArray = setParameters();
132                         
133                         OptionParser parser(option);
134                         map<string, string> parameters = parser. getParameters();
135                         
136                         ValidParameters validParameter;
137                         map<string, string>::iterator it;
138                 
139                         //check to make sure all parameters are valid for command
140                         for (it = parameters.begin(); it != parameters.end(); it++) { 
141                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
142                         }
143                         
144                         //initialize outputTypes
145                         vector<string> tempOutNames;
146                         outputTypes["tree"] = tempOutNames;
147                         
148                         //if the user changes the input directory command factory will send this info to us in the output parameter 
149                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
150                         if (inputDir == "not found"){   inputDir = "";          }
151                         else {
152                                 string path;
153                                 it = parameters.find("phylip");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
159                                 }
160                                 
161                                 it = parameters.find("column");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
167                                 }
168                                 
169                                 it = parameters.find("name");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
175                                 }
176                         }
177                         
178                         //check for required parameters
179                         phylipfile = validParameter.validFile(parameters, "phylip", true);
180                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
181                         else if (phylipfile == "not found") { phylipfile = ""; }        
182                         else {  inputfile = phylipfile;  format = "phylip"; m->setPhylipFile(phylipfile);       }
183                         
184                         columnfile = validParameter.validFile(parameters, "column", true);
185                         if (columnfile == "not open") { columnfile = ""; abort = true; }        
186                         else if (columnfile == "not found") { columnfile = ""; }
187                         else {  inputfile = columnfile; format = "column";      m->setColumnFile(columnfile); }
188                         
189                         sharedfile = validParameter.validFile(parameters, "shared", true);
190                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
191                         else if (sharedfile == "not found") { sharedfile = ""; }
192                         else {  inputfile = sharedfile; format = "sharedfile";  m->setSharedFile(sharedfile); }
193                         
194                         namefile = validParameter.validFile(parameters, "name", true);
195                         if (namefile == "not open") { abort = true; }   
196                         else if (namefile == "not found") { namefile = ""; }
197                         else { m->setNameFile(namefile); }
198                         
199                         if ((phylipfile == "") && (columnfile == "") && (sharedfile == "")) { 
200                                 //is there are current file available for either of these?
201                                 //give priority to shared, then column, then phylip
202                                 sharedfile = m->getSharedFile(); 
203                                 if (sharedfile != "") {  inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
204                                 else { 
205                                         columnfile = m->getColumnFile(); 
206                                         if (columnfile != "") { inputfile = columnfile; format = "column";  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
207                                         else { 
208                                                 phylipfile = m->getPhylipFile(); 
209                                                 if (phylipfile != "") { inputfile = phylipfile;  format = "phylip";  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
210                                                 else { 
211                                                         m->mothurOut("No valid current files. You must provide a shared, phylip or column file."); m->mothurOutEndLine(); 
212                                                         abort = true;
213                                                 }
214                                         }
215                                 }
216                         }
217                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); m->mothurOutEndLine(); abort = true; }
218                         
219                         if (columnfile != "") {
220                                 if (namefile == "") { 
221                                         namefile = m->getNameFile(); 
222                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
223                                         else { 
224                                                 m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
225                                                 abort = true; 
226                                         }       
227                                 }
228                         }
229                         
230                         //check for optional parameter and set defaults
231                         // ...at some point should added some additional type checking...
232                         label = validParameter.validFile(parameters, "label", false);                   
233                         if (label == "not found") { label = ""; }
234                         else { 
235                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
236                                 else { allLines = 1;  }
237                         }
238                         
239                         groups = validParameter.validFile(parameters, "groups", false);                 
240                         if (groups == "not found") { groups = ""; }
241                         else { 
242                                 m->splitAtDash(groups, Groups);
243                                 m->Groups = Groups;
244                         }
245                                 
246                         calc = validParameter.validFile(parameters, "calc", false);                     
247                         if (calc == "not found") { calc = "jclass-thetayc";  }
248                         else { 
249                                  if (calc == "default")  {  calc = "jclass-thetayc";  }
250                         }
251                         m->splitAtDash(calc, Estimators);
252                         if (m->inUsersGroups("citation", Estimators)) { 
253                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
254                                 //remove citation from list of calcs
255                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
256                         }
257
258                         string temp;
259                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
260                         convert(temp, precision); 
261                         
262                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
263                         convert(temp, cutoff); 
264                         cutoff += (5 / (precision * 10.0));
265                         
266                         //if the user changes the output directory command factory will send this info to us in the output parameter 
267                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
268                                 outputDir = ""; 
269                                 outputDir += m->hasPath(inputfile); //if user entered a file with a path then preserve it       
270                         }
271                 }
272
273         }
274         catch(exception& e) {
275                 m->errorOut(e, "TreeGroupCommand", "TreeGroupCommand");
276                 exit(1);
277         }
278 }
279 //**********************************************************************************************************************
280
281 TreeGroupCommand::~TreeGroupCommand(){
282         if (abort == false) {
283                 if (format == "sharedfile") {  delete input; }
284                 else { delete readMatrix;  delete matrix; delete list; }
285                 delete tmap;  
286         }
287         
288 }
289
290 //**********************************************************************************************************************
291
292 int TreeGroupCommand::execute(){
293         try {
294         
295                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
296                 
297                 if (format == "sharedfile") {
298                         
299                         ValidCalculators validCalculator;
300                 
301                         for (int i=0; i<Estimators.size(); i++) {
302                                 if (validCalculator.isValidCalculator("treegroup", Estimators[i]) == true) { 
303                                         if (Estimators[i] == "sharedsobs") { 
304                                                 treeCalculators.push_back(new SharedSobsCS());
305                                         }else if (Estimators[i] == "sharedchao") { 
306                                                 treeCalculators.push_back(new SharedChao1());
307                                         }else if (Estimators[i] == "sharedace") { 
308                                                 treeCalculators.push_back(new SharedAce());
309                                         }else if (Estimators[i] == "jabund") {  
310                                                 treeCalculators.push_back(new JAbund());
311                                         }else if (Estimators[i] == "sorabund") { 
312                                                 treeCalculators.push_back(new SorAbund());
313                                         }else if (Estimators[i] == "jclass") { 
314                                                 treeCalculators.push_back(new Jclass());
315                                         }else if (Estimators[i] == "sorclass") { 
316                                                 treeCalculators.push_back(new SorClass());
317                                         }else if (Estimators[i] == "jest") { 
318                                                 treeCalculators.push_back(new Jest());
319                                         }else if (Estimators[i] == "sorest") { 
320                                                 treeCalculators.push_back(new SorEst());
321                                         }else if (Estimators[i] == "thetayc") { 
322                                                 treeCalculators.push_back(new ThetaYC());
323                                         }else if (Estimators[i] == "thetan") { 
324                                                 treeCalculators.push_back(new ThetaN());
325                                         }else if (Estimators[i] == "kstest") { 
326                                                 treeCalculators.push_back(new KSTest());
327                                         }else if (Estimators[i] == "sharednseqs") { 
328                                                 treeCalculators.push_back(new SharedNSeqs());
329                                         }else if (Estimators[i] == "ochiai") { 
330                                                 treeCalculators.push_back(new Ochiai());
331                                         }else if (Estimators[i] == "anderberg") { 
332                                                 treeCalculators.push_back(new Anderberg());
333                                         }else if (Estimators[i] == "kulczynski") { 
334                                                 treeCalculators.push_back(new Kulczynski());
335                                         }else if (Estimators[i] == "kulczynskicody") { 
336                                                 treeCalculators.push_back(new KulczynskiCody());
337                                         }else if (Estimators[i] == "lennon") { 
338                                                 treeCalculators.push_back(new Lennon());
339                                         }else if (Estimators[i] == "morisitahorn") { 
340                                                 treeCalculators.push_back(new MorHorn());
341                                         }else if (Estimators[i] == "braycurtis") { 
342                                                 treeCalculators.push_back(new BrayCurtis());
343                                         }else if (Estimators[i] == "whittaker") { 
344                                                 treeCalculators.push_back(new Whittaker());
345                                         }else if (Estimators[i] == "odum") { 
346                                                 treeCalculators.push_back(new Odum());
347                                         }else if (Estimators[i] == "canberra") { 
348                                                 treeCalculators.push_back(new Canberra());
349                                         }else if (Estimators[i] == "structeuclidean") { 
350                                                 treeCalculators.push_back(new StructEuclidean());
351                                         }else if (Estimators[i] == "structchord") { 
352                                                 treeCalculators.push_back(new StructChord());
353                                         }else if (Estimators[i] == "hellinger") { 
354                                                 treeCalculators.push_back(new Hellinger());
355                                         }else if (Estimators[i] == "manhattan") { 
356                                                 treeCalculators.push_back(new Manhattan());
357                                         }else if (Estimators[i] == "structpearson") { 
358                                                 treeCalculators.push_back(new StructPearson());
359                                         }else if (Estimators[i] == "soergel") { 
360                                                 treeCalculators.push_back(new Soergel());
361                                         }else if (Estimators[i] == "spearman") { 
362                                                 treeCalculators.push_back(new Spearman());
363                                         }else if (Estimators[i] == "structkulczynski") { 
364                                                 treeCalculators.push_back(new StructKulczynski());
365                                         }else if (Estimators[i] == "speciesprofile") { 
366                                                 treeCalculators.push_back(new SpeciesProfile());
367                                         }else if (Estimators[i] == "hamming") { 
368                                                 treeCalculators.push_back(new Hamming());
369                                         }else if (Estimators[i] == "structchi2") { 
370                                                 treeCalculators.push_back(new StructChi2());
371                                         }else if (Estimators[i] == "gower") { 
372                                                 treeCalculators.push_back(new Gower());
373                                         }else if (Estimators[i] == "memchi2") { 
374                                                 treeCalculators.push_back(new MemChi2());
375                                         }else if (Estimators[i] == "memchord") { 
376                                                 treeCalculators.push_back(new MemChord());
377                                         }else if (Estimators[i] == "memeuclidean") { 
378                                                 treeCalculators.push_back(new MemEuclidean());
379                                         }else if (Estimators[i] == "mempearson") { 
380                                                 treeCalculators.push_back(new MemPearson());
381                                         }
382                                 }
383                         }
384                         
385                         //if the users entered no valid calculators don't execute command
386                         if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; }
387                         
388                         input = new InputData(sharedfile, "sharedfile");
389                         lookup = input->getSharedRAbundVectors();
390                         lastLabel = lookup[0]->getLabel();
391                         
392                         if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); return 0; }
393                         
394                         //used in tree constructor 
395                         m->runParse = false;
396                         
397                         //create treemap class from groupmap for tree class to use
398                         tmap = new TreeMap();
399                         tmap->makeSim(m->namesOfGroups);
400                         
401                         //clear globaldatas old tree names if any
402                         m->Treenames.clear();
403                         
404                         //fills globaldatas tree names
405                         m->Treenames = m->Groups;
406                 
407                         if (m->control_pressed) { return 0; }
408                         
409                         //create tree file
410                         makeSimsShared();
411                         
412                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
413                 }else{
414                         //read in dist file
415                         filename = inputfile;
416                 
417                         if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
418                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
419                                 
420                         readMatrix->setCutoff(cutoff);
421         
422                         if(namefile != ""){     
423                                 nameMap = new NameAssignment(namefile);
424                                 nameMap->readMap();
425                         }
426                         else{
427                                 nameMap = NULL;
428                         }
429         
430                         readMatrix->read(nameMap);
431                         list = readMatrix->getListVector();
432                         matrix = readMatrix->getMatrix();
433
434                         //make treemap
435                         tmap = new TreeMap();
436                         
437                         if (m->control_pressed) { return 0; }
438                         
439                         tmap->makeSim(list);
440                         
441                         m->Groups = tmap->namesOfGroups;
442                 
443                         //clear globaldatas old tree names if any
444                         m->Treenames.clear();
445                 
446                         //fills globaldatas tree names
447                         m->Treenames = m->Groups;
448                         
449                         //used in tree constructor 
450                         m->runParse = false;
451                         
452                         if (m->control_pressed) { return 0; }
453                         
454                         makeSimsDist();
455                         
456                         if (m->control_pressed) { return 0; }
457
458                         //create a new filename
459                         outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre";   
460                         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
461                                 
462                         createTree();
463                         
464                         if (m->control_pressed) { return 0; }
465
466                         m->mothurOut("Tree complete. "); m->mothurOutEndLine();
467                         
468                 }
469                                 
470                 //reset groups parameter
471                 m->Groups.clear(); 
472                 
473                 //set tree file as new current treefile
474                 string current = "";
475                 itTypes = outputTypes.find("tree");
476                 if (itTypes != outputTypes.end()) {
477                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
478                 }
479                 
480                 m->mothurOutEndLine();
481                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
482                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
483                 m->mothurOutEndLine();
484
485                 return 0;
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "TreeGroupCommand", "execute");
489                 exit(1);
490         }
491 }
492 //**********************************************************************************************************************
493
494 int TreeGroupCommand::createTree(){
495         try {
496                 //create tree
497                 t = new Tree(tmap);
498                 
499                 //do merges and create tree structure by setting parents and children
500                 //there are numGroups - 1 merges to do
501                 for (int i = 0; i < (numGroups - 1); i++) {
502                         float largest = -1000.0;
503                         
504                         if (m->control_pressed) { delete t; return 1; }
505                         
506                         int row, column;
507                         //find largest value in sims matrix by searching lower triangle
508                         for (int j = 1; j < simMatrix.size(); j++) {
509                                 for (int k = 0; k < j; k++) {
510                                         if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
511                                 }
512                         }
513
514                         //set non-leaf node info and update leaves to know their parents
515                         //non-leaf
516                         t->tree[numGroups + i].setChildren(index[row], index[column]);
517                         
518                         //parents
519                         t->tree[index[row]].setParent(numGroups + i);
520                         t->tree[index[column]].setParent(numGroups + i);
521                         
522                         //blength = distance / 2;
523                         float blength = ((1.0 - largest) / 2);
524                         
525                         //branchlengths
526                         t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
527                         t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
528                         
529                         //set your length to leaves to your childs length plus branchlength
530                         t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
531                         
532                         
533                         //update index 
534                         index[row] = numGroups+i;
535                         index[column] = numGroups+i;
536                         
537                         //remove highest value that caused the merge.
538                         simMatrix[row][column] = -1000.0;
539                         simMatrix[column][row] = -1000.0;
540                         
541                         //merge values in simsMatrix
542                         for (int n = 0; n < simMatrix.size(); n++)      {
543                                 //row becomes merge of 2 groups
544                                 simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
545                                 simMatrix[n][row] = simMatrix[row][n];
546                                 //delete column
547                                 simMatrix[column][n] = -1000.0;
548                                 simMatrix[n][column] = -1000.0;
549                         }
550                 }
551                 
552                 //adjust tree to make sure root to tip length is .5
553                 int root = t->findRoot();
554                 t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
555                 
556                 //assemble tree
557                 t->assembleTree();
558                 
559                 if (m->control_pressed) { delete t; return 1; }
560                 
561                 //print newick file
562                 t->createNewickFile(outputFile);
563                 
564                 //delete tree
565                 delete t;
566                 
567                 if (m->control_pressed) { remove(outputFile.c_str()); outputNames.pop_back(); return 1; }
568                 
569                 return 0;
570         
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "TreeGroupCommand", "createTree");
574                 exit(1);
575         }
576 }
577 /***********************************************************/
578 void TreeGroupCommand::printSims(ostream& out) {
579         try {
580                 
581                 //output column headers
582                 //out << '\t';
583                 //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
584                 //out << endl;
585                 
586                 
587                 for (int m = 0; m < simMatrix.size(); m++)      {
588                         //out << lookup[m]->getGroup() << '\t';
589                         for (int n = 0; n < simMatrix.size(); n++)      {
590                                 out << simMatrix[m][n] << '\t'; 
591                         }
592                         out << endl;
593                 }
594
595         }
596         catch(exception& e) {
597                 m->errorOut(e, "TreeGroupCommand", "printSims");
598                 exit(1);
599         }
600 }
601 /***********************************************************/
602 int TreeGroupCommand::makeSimsDist() {
603         try {
604                 numGroups = list->size();
605                 
606                 //initialize index
607                 index.clear();
608                 for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
609                 
610                 //initialize simMatrix
611                 simMatrix.clear();
612                 simMatrix.resize(numGroups);
613                 for (int k = 0; k < simMatrix.size(); k++)      {
614                         for (int j = 0; j < simMatrix.size(); j++)      {
615                                 simMatrix[k].push_back(0.0);
616                         }
617                 }
618                 
619                 //go through sparse matrix and fill sims
620                 //go through each cell in the sparsematrix
621                 for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
622                         //similairity = -(distance-1)
623                         simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
624                         simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);   
625                         
626                         if (m->control_pressed) { return 1; }
627                         
628                 }
629
630                 return 0;
631         }
632         catch(exception& e) {
633                 m->errorOut(e, "TreeGroupCommand", "makeSimsDist");
634                 exit(1);
635         }
636 }
637
638 /***********************************************************/
639 int TreeGroupCommand::makeSimsShared() {
640         try {
641                 set<string> processedLabels;
642                 set<string> userLabels = labels;
643                 
644                 //as long as you are not at the end of the file or done wih the lines you want
645                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
646                         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; } return 1; }
647                 
648                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
649                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
650                                 process(lookup);
651                                 
652                                 processedLabels.insert(lookup[0]->getLabel());
653                                 userLabels.erase(lookup[0]->getLabel());
654                         }
655                         
656                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
657                                 string saveLabel = lookup[0]->getLabel();
658                         
659                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
660                                 lookup = input->getSharedRAbundVectors(lastLabel);
661
662                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
663                                 process(lookup);
664                                         
665                                 processedLabels.insert(lookup[0]->getLabel());
666                                 userLabels.erase(lookup[0]->getLabel());
667                                 
668                                 //restore real lastlabel to save below
669                                 lookup[0]->setLabel(saveLabel);
670                         }
671
672                         lastLabel = lookup[0]->getLabel();                      
673                         
674                         //get next line to process
675                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
676                         lookup = input->getSharedRAbundVectors();
677                 }
678                 
679                 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; } return 1; }
680
681                 //output error messages about any remaining user labels
682                 set<string>::iterator it;
683                 bool needToRun = false;
684                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
685                         m->mothurOut("Your file does not include the label " + *it); 
686                         if (processedLabels.count(lastLabel) != 1) {
687                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
688                                 needToRun = true;
689                         }else {
690                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
691                         }
692                 }
693                 
694                 //run last label if you need to
695                 if (needToRun == true)  {
696                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {             delete lookup[i]; }             } 
697                         lookup = input->getSharedRAbundVectors(lastLabel);
698
699                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
700                         process(lookup);
701                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }         
702                 }
703                 
704                 for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
705                 
706                 return 0;
707         }
708         catch(exception& e) {
709                 m->errorOut(e, "TreeGroupCommand", "makeSimsShared");
710                 exit(1);
711         }
712 }
713
714 /***********************************************************/
715 int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
716         try{
717                                 EstOutput data;
718                                 vector<SharedRAbundVector*> subset;
719                                 numGroups = thisLookup.size();
720                                 
721                                 //for each calculator                                                                                           
722                                 for(int i = 0 ; i < treeCalculators.size(); i++) {
723                                         //initialize simMatrix
724                                         simMatrix.clear();
725                                         simMatrix.resize(numGroups);
726                                         for (int k = 0; k < simMatrix.size(); k++)      {
727                                                 for (int j = 0; j < simMatrix.size(); j++)      {
728                                                         simMatrix[k].push_back(0.0);
729                                                 }
730                                         }
731                 
732                                         //initialize index
733                                         index.clear();
734                                         for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
735                 
736                                         //create a new filename
737                                         outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                                
738                                         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
739                                                                                                 
740                                         for (int k = 0; k < thisLookup.size(); k++) { 
741                                                 for (int l = k; l < thisLookup.size(); l++) {
742                                                         if (k != l) { //we dont need to similiarity of a groups to itself
743                                                                 //get estimated similarity between 2 groups
744                                                                 
745                                                                 subset.clear(); //clear out old pair of sharedrabunds
746                                                                 //add new pair of sharedrabunds
747                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
748                                                                 
749                                                                 //if this calc needs all groups to calculate the pair load all groups
750                                                                 if (treeCalculators[i]->getNeedsAll()) { 
751                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
752                                                                         for (int w = 0; w < thisLookup.size(); w++) {
753                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
754                                                                         }
755                                                                 }
756                                                                 
757                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
758                                                 //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl;
759                                                                 if (m->control_pressed) { return 1; }
760                                                                 
761                                                                 //save values in similarity matrix
762                                                                 simMatrix[k][l] = -(data[0]-1.0);
763                                                                 simMatrix[l][k] = -(data[0]-1.0);
764                                                         }
765                                                 }
766                                         }
767                                         
768                                         //createdistance file from simMatrix
769                                         /*string o = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
770                                         ofstream outDist;
771                                         m->openOutputFile(o, outDist);
772                                         outDist << simMatrix.size() << endl;
773                                         for (int k = 0; k < simMatrix.size(); k++) {
774                                                 outDist << thisLookup[k]->getGroup() << '\t';
775                                                 for (int l = 0; l < k; l++) {
776                                                         outDist << (1.0-simMatrix[k][l]) << '\t';
777                                                 }
778                                                 outDist << endl;
779                                         }
780                                         outDist.close();*/
781
782                                         
783                                         if (m->control_pressed) { return 1; }
784                                         //creates tree from similarity matrix and write out file
785                                         createTree();
786                                         
787                                         if (m->control_pressed) { return 1; }
788                                 }
789                                 
790                                 return 0;
791
792         }
793         catch(exception& e) {
794                 m->errorOut(e, "TreeGroupCommand", "process");
795                 exit(1);
796         }
797 }
798 /***********************************************************/
799
800         
801