]> git.donarmstrong.com Git - mothur.git/blob - heatmapsimcommand.cpp
you can now use a distance matrix as input for the heatmap.sim command.
[mothur.git] / heatmapsimcommand.cpp
1 /*
2  *  heatmapsimcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 6/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "heatmapsimcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
21
22
23 //**********************************************************************************************************************
24
25 HeatMapSimCommand::HeatMapSimCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Groups.clear();
32                 Estimators.clear();
33                 
34                 //allow user to run help
35                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
36                 
37                 else {
38                         //valid paramters for this command
39                         string AlignArray[] =  {"groups","label", "calc","phylip","column","name"};
40                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
41                         
42                         OptionParser parser(option);
43                         map<string,string> parameters = parser.getParameters();
44                         
45                         ValidParameters validParameter;
46                         
47                         //check to make sure all parameters are valid for command
48                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
49                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
50                         }
51                         
52                         format = "";
53                         
54                         //required parameters
55                         phylipfile = validParameter.validFile(parameters, "phylip", true);
56                         if (phylipfile == "not open") { abort = true; }
57                         else if (phylipfile == "not found") { phylipfile = ""; }        
58                         else {  format = "phylip";      }
59                         
60                         columnfile = validParameter.validFile(parameters, "column", true);
61                         if (columnfile == "not open") { abort = true; } 
62                         else if (columnfile == "not found") { columnfile = ""; }
63                         else {  format = "column";      }
64                         
65                         namefile = validParameter.validFile(parameters, "name", true);
66                         if (namefile == "not open") { abort = true; }   
67                         else if (namefile == "not found") { namefile = ""; }
68                         
69                         
70                         //error checking on files                       
71                         if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == "")))  { mothurOut("You must run the read.otu command or provide a distance file before running the heatmap.sim command."); mothurOutEndLine(); abort = true; }
72                         else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When running the heatmap.sim command with a distance file you may not use both the column and the phylip parameters."); mothurOutEndLine(); abort = true; }
73                         
74                         if (columnfile != "") {
75                                 if (namefile == "") {  mothurOut("You need to provide a namefile if you are going to use the column format."); mothurOutEndLine(); abort = true; }
76                         }
77                         
78                         if (format == "") { format = "shared"; }
79                         
80                         //check for optional parameter and set defaults
81                         // ...at some point should added some additional type checking...
82                         if (format == "shared") {
83                                 label = validParameter.validFile(parameters, "label", false);                   
84                                 if (label == "not found") { label = ""; }
85                                 else { 
86                                         if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
87                                         else { allLines = 1;  }
88                                 }
89                                 
90                                 //if the user has not specified any labels use the ones from read.otu
91                                 if (label == "") {  
92                                         allLines = globaldata->allLines; 
93                                         labels = globaldata->labels; 
94                                 }
95                                 
96                                 calc = validParameter.validFile(parameters, "calc", false);                     
97                                 if (calc == "not found") { calc = "jest-thetayc";  }
98                                 else { 
99                                         if (calc == "default")  {  calc = "jest-thetayc";  }
100                                 }
101                                 splitAtDash(calc, Estimators);
102                                 
103                                 groups = validParameter.validFile(parameters, "groups", false);                 
104                                 if (groups == "not found") { groups = ""; }
105                                 else { 
106                                         splitAtDash(groups, Groups);
107                                         globaldata->Groups = Groups;
108                                 }
109                         }
110                         
111                         if (abort == false) {
112                                 validCalculator = new ValidCalculators();
113                         
114                                 int i;
115                                 for (i=0; i<Estimators.size(); i++) {
116                                         if (validCalculator->isValidCalculator("heat", Estimators[i]) == true) { 
117                                                 if (Estimators[i] == "jabund") {        
118                                                         heatCalculators.push_back(new JAbund());
119                                                 }else if (Estimators[i] == "sorabund") { 
120                                                         heatCalculators.push_back(new SorAbund());
121                                                 }else if (Estimators[i] == "jclass") { 
122                                                         heatCalculators.push_back(new Jclass());
123                                                 }else if (Estimators[i] == "sorclass") { 
124                                                         heatCalculators.push_back(new SorClass());
125                                                 }else if (Estimators[i] == "jest") { 
126                                                         heatCalculators.push_back(new Jest());
127                                                 }else if (Estimators[i] == "sorest") { 
128                                                         heatCalculators.push_back(new SorEst());
129                                                 }else if (Estimators[i] == "thetayc") { 
130                                                         heatCalculators.push_back(new ThetaYC());
131                                                 }else if (Estimators[i] == "thetan") { 
132                                                         heatCalculators.push_back(new ThetaN());
133                                                 }else if (Estimators[i] == "morisitahorn") { 
134                                                         heatCalculators.push_back(new MorHorn());
135                                                 }else if (Estimators[i] == "braycurtis") { 
136                                                         heatCalculators.push_back(new BrayCurtis());
137                                                 }
138                                         }
139                                 }
140                                 
141                         }
142                 }
143
144                                 
145
146         }
147         catch(exception& e) {
148                 errorOut(e, "HeatMapSimCommand", "HeatMapSimCommand");
149                 exit(1);
150         }
151 }
152
153 //**********************************************************************************************************************
154
155 void HeatMapSimCommand::help(){
156         try {
157                 mothurOut("The heatmap.sim command can only be executed after a successful read.otu command, or by providing a distance file.\n");
158                 mothurOut("The heatmap.sim command parameters are phylip, column, name, groups, calc and label.  No parameters are required.\n");
159                 mothurOut("There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n");
160                 mothurOut("With the read.otu command you may use the groups, label and calc parameters. \n");
161                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n");
162                 mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n");
163                 mothurOut("The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, label=yourLabels).\n");
164                 mothurOut("Example heatmap.sim(groups=A-B-C, calc=jabund).\n");
165                 mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
166                 validCalculator->printCalc("heat", cout);
167                 mothurOut("The default value for calc is jclass-thetayc.\n");
168                 mothurOut("The heatmap.sim command outputs a .svg file for each calculator you choose at each label you specify.\n");
169                 mothurOut("The second way to use the heatmap.sim command is with a distance file representing the distance bewteen your groups. \n");
170                 mothurOut("Using the command this way, the phylip or column parameter are required, and only one may be used.  If you use a column file the name filename is required. \n");
171                 mothurOut("The heatmap.sim command should be in the following format: heatmap.sim(phylip=yourDistanceFile).\n");
172                 mothurOut("Example heatmap.sim(phylip=amazonGroups.dist).\n");
173                 mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
174
175         }
176         catch(exception& e) {
177                 errorOut(e, "HeatMapSimCommand", "help");
178                 exit(1);
179         }
180 }
181
182 //**********************************************************************************************************************
183
184 HeatMapSimCommand::~HeatMapSimCommand(){}
185
186 //**********************************************************************************************************************
187
188 int HeatMapSimCommand::execute(){
189         try {
190         
191                 if (abort == true)  { return 0; }
192                 
193                 heatmap = new HeatMapSim();
194                 
195                 if (format == "shared") {
196                         runCommandShared();
197                 }else if (format == "phylip") {
198                         globaldata->inputFileName = phylipfile;
199                         runCommandDist();
200                 }else if (format == "column") {
201                         globaldata->inputFileName = columnfile;
202                         runCommandDist();
203                 }
204                 
205                 delete heatmap;
206                 delete validCalculator;
207
208                 return 0;
209         }
210         catch(exception& e) {
211                 errorOut(e, "HeatMapSimCommand", "execute");
212                 exit(1);
213         }
214 }
215
216 //**********************************************************************************************************************
217 int HeatMapSimCommand::runCommandShared() {
218         try {
219                 //if the users entered no valid calculators don't execute command
220                 if (heatCalculators.size() == 0) { mothurOut("No valid calculators."); mothurOutEndLine(); return 0; }
221                 
222                 //you have groups
223                 read = new ReadOTUFile(globaldata->inputFileName);      
224                 read->read(&*globaldata); 
225                         
226                 input = globaldata->ginput;
227                 lookup = input->getSharedRAbundVectors();
228                 string lastLabel = lookup[0]->getLabel();
229                 
230                 if (lookup.size() < 2) { mothurOut("You have not provided enough valid groups.  I cannot run the command."); mothurOutEndLine(); return 0;}
231                                 
232                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
233                 set<string> processedLabels;
234                 set<string> userLabels = labels;
235                 
236                 //as long as you are not at the end of the file or done wih the lines you want
237                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
238                 
239                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
240         
241                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
242                                 heatmap->getPic(lookup, heatCalculators);
243                                         
244                                 processedLabels.insert(lookup[0]->getLabel());
245                                 userLabels.erase(lookup[0]->getLabel());
246                         }
247                                 
248                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
249                                 string saveLabel = lookup[0]->getLabel();
250                         
251                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
252                                 lookup = input->getSharedRAbundVectors(lastLabel);                              
253
254                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
255                                 heatmap->getPic(lookup, heatCalculators);
256                                         
257                                 processedLabels.insert(lookup[0]->getLabel());
258                                 userLabels.erase(lookup[0]->getLabel());
259                                 
260                                 //restore real lastlabel to save below
261                                 lookup[0]->setLabel(saveLabel);
262                         }
263                                 
264                         //prevent memory leak
265                          
266                         lastLabel = lookup[0]->getLabel();                      
267
268                         //get next line to process
269                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
270                         lookup = input->getSharedRAbundVectors();                               
271                 }
272                         
273                 //output error messages about any remaining user labels
274                 set<string>::iterator it;
275                 bool needToRun = false;
276                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
277                         mothurOut("Your file does not include the label " + *it); 
278                         if (processedLabels.count(lastLabel) != 1) {
279                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
280                                 needToRun = true;
281                         }else {
282                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
283                         }
284                 }
285                 
286                 //run last label if you need to
287                 if (needToRun == true)  {
288                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) { delete lookup[i]; } } 
289                         lookup = input->getSharedRAbundVectors(lastLabel);                              
290
291                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
292                         heatmap->getPic(lookup, heatCalculators);
293                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
294                 }
295                 
296                         
297                 //reset groups parameter
298                 globaldata->Groups.clear();  
299                 
300                 delete input;  globaldata->ginput = NULL;
301                 delete read;
302
303                 return 0;
304         }
305         catch(exception& e) {
306                 errorOut(e, "HeatMapSimCommand", "runCommandShared");
307                 exit(1);
308         }
309 }
310 //**********************************************************************************************************************
311 int HeatMapSimCommand::runCommandDist() {
312         try {
313         
314                 vector< vector<double> > matrix;
315                 vector<string> names;
316                 ifstream in;
317                 
318                 //read distance file and create distance vector and names vector
319                 if (format == "phylip") {
320                         //read phylip file
321                         openInputFile(phylipfile, in);
322                         
323                         string name;
324                         int numSeqs;
325                         in >> numSeqs >> name; 
326                         
327                         //save name
328                         names.push_back(name);
329                 
330                         //resize the matrix and fill with zeros
331                         matrix.resize(numSeqs); 
332                         for(int i = 0; i < numSeqs; i++) {
333                                 matrix[i].resize(numSeqs, 0.0);
334                         }
335                                         
336                         //determine if matrix is square or lower triangle
337                         //if it is square read the distances for the first sequence
338                         char d;
339                         bool square;
340                         while((d=in.get()) != EOF){
341                                 
342                                 //is d a number meaning its square
343                                 if(isalnum(d)){ 
344                                         square = true;
345                                         in.putback(d);
346                                         
347                                         for(int i=0;i<numSeqs;i++){
348                                                 in >> matrix[0][i];
349                                         }
350                                         break;
351                                 }
352                                 
353                                 //is d a line return meaning its lower triangle
354                                 if(d == '\n'){
355                                         square = false;
356                                         break;
357                                 }
358                         }
359                         
360                         //read rest of matrix
361                         if (square == true) { 
362                                 for(int i=1;i<numSeqs;i++){
363                                         in >> name;             
364                                         names.push_back(name);
365                                         
366                                         for(int j=0;j<numSeqs;j++) {  in >> matrix[i][j];  }
367                                         gobble(in);
368                                 }
369                         }else { 
370                                 double dist;
371                                 for(int i=1;i<numSeqs;i++){
372                                         in >> name;     
373                                         names.push_back(name);  
374                                         
375                                         for(int j=0;j<i;j++){
376                                                 in >> dist;
377                                                 matrix[i][j] = dist;  matrix[j][i] = dist;
378                                         }
379                                         gobble(in);
380                                 }
381                         }
382                         in.close();
383                 }else {
384                         //read names file
385                         NameAssignment* nameMap = new NameAssignment(namefile);
386                         nameMap->readMap();
387                         
388                         //put names in order in vector
389                         for (int i = 0; i < nameMap->size(); i++) {
390                                 names.push_back(nameMap->get(i));
391                         }
392                         
393                         //resize matrix
394                         matrix.resize(nameMap->size());
395                         for (int i = 0; i < nameMap->size(); i++) {
396                                 matrix[i].resize(nameMap->size(), 0.0);
397                         }
398                         
399                         //read column file
400                         string first, second;
401                         double dist;
402                         openInputFile(columnfile, in);
403                         
404                         while (!in.eof()) {
405                                 in >> first >> second >> dist; gobble(in);
406                                 
407                                 map<string, int>::iterator itA = nameMap->find(first);
408                                 map<string, int>::iterator itB = nameMap->find(second);
409                                 
410                                 if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << first << "' was not found in the names file, please correct\n"; exit(1);  }
411                                 if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << second << "' was not found in the names file, please correct\n"; exit(1);  }
412                                 
413                                 //save distance
414                                 matrix[itA->second][itB->second] = dist;
415                                 matrix[itB->second][itA->second] = dist;
416                         }
417                         in.close();
418                         
419                         delete nameMap;
420                 }
421                 
422
423                 heatmap->getPic(matrix, names); //vector<vector<double>>, vector<string>
424                 
425                 return 0;
426         }
427         catch(exception& e) {
428                 errorOut(e, "HeatMapSimCommand", "runCommandDist");
429                 exit(1);
430         }
431 }
432 //**********************************************************************************************************************
433
434
435
436
437
438