]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
fixed bug with tree reading
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){        
14         try {
15                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17                 //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
18                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
19                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20                 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
21                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22                 CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SensSpecCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SensSpecCommand::getHelpString(){        
37         try {
38                 string helpString = "";
39                 helpString += "The sens.spec command....\n";
40                 return helpString;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "SensSpecCommand", "getHelpString");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 SensSpecCommand::SensSpecCommand(){     
49         try {
50                 abort = true; calledHelp = true; 
51                 setParameters();
52                 vector<string> tempOutNames;
53                 outputTypes["sensspec"] = tempOutNames;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
57                 exit(1);
58         }
59 }
60 //***************************************************************************************************************
61
62 SensSpecCommand::SensSpecCommand(string option)  {
63         try {
64                 
65                 abort = false; calledHelp = false;   
66                 
67                 //allow user to run help
68                 if(option == "help") { help(); abort = true; calledHelp = true; }
69                 
70                 else {
71                         string temp;
72                         
73                         vector<string> myArray = setParameters();
74                         
75                         OptionParser parser(option);
76                         map<string,string> parameters = parser.getParameters();
77                         
78                         ValidParameters validParameter;
79                         map<string,string>::iterator it;
80                         
81                         //check to make sure all parameters are valid for command
82                         for (it = parameters.begin(); it != parameters.end(); it++) { 
83                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
84                         }
85                         
86                         //initialize outputTypes
87                         vector<string> tempOutNames;
88                         outputTypes["sensspec"] = tempOutNames;
89                         
90                         //if the user changes the input directory command factory will send this info to us in the output parameter 
91                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
92                         if (inputDir == "not found"){   inputDir = "";          }
93                         else {
94                                 string path;
95                                 it = parameters.find("list");
96                                 //user has given a template file
97                                 if(it != parameters.end()){ 
98                                         path = m->hasPath(it->second);
99                                         //if the user has not given a path then, add inputdir. else leave path alone.
100                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
101                                 }
102                                 
103                                 it = parameters.find("phylip");
104                                 //user has given a template file
105                                 if(it != parameters.end()){ 
106                                         path = m->hasPath(it->second);
107                                         //if the user has not given a path then, add inputdir. else leave path alone.
108                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
109                                 }
110                                 
111                                 it = parameters.find("column");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 //it = parameters.find("name");
120                                 //user has given a template file
121                                 //if(it != parameters.end()){ 
122                                         //path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         //if (path == "") {     parameters["name"] = inputDir + it->second;             }
125                                 //}
126                                 
127                         }
128                         //check for required parameters
129                         listFile = validParameter.validFile(parameters, "list", true);
130                         if (listFile == "not found") {          
131                                 listFile = m->getListFile(); 
132                                 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
133                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
134                         }
135                         else if (listFile == "not open") { abort = true; }      
136                         
137                         phylipfile = validParameter.validFile(parameters, "phylip", true);
138                         if (phylipfile == "not found") { phylipfile = "";  }
139                         else if (phylipfile == "not open") { abort = true; }    
140                         else { distFile = phylipfile; format = "phylip";   }
141                         
142                         columnfile = validParameter.validFile(parameters, "column", true);
143                         if (columnfile == "not found") { columnfile = ""; }
144                         else if (columnfile == "not open") { abort = true; }    
145                         else { distFile = columnfile; format = "column";   }
146                         
147                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
148                                 //give priority to column, then phylip
149                                 columnfile = m->getColumnFile(); 
150                                 if (columnfile != "") {  distFile = columnfile; format = "column";  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
151                                 else { 
152                                         phylipfile = m->getPhylipFile(); 
153                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
154                                         else { 
155                                                 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
156                                                 abort = true;
157                                         }
158                                 }
159                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
160                         
161                         
162                         //if the user changes the output directory command factory will send this info to us in the output parameter 
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);
164                         if (outputDir == "not found"){  
165                                 outputDir = ""; 
166                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
167                         }
168
169                         //check for optional parameter and set defaults
170                         // ...at some point should added some additional type checking...
171                         temp = validParameter.validFile(parameters, "hard", false);
172                         if (temp == "not found"){       hard = 0;       }
173                         else if(!m->isTrue(temp))       {       hard = 0;       }
174                         else if(m->isTrue(temp))        {       hard = 1;       }
175                         
176 //                      temp = validParameter.validFile(parameters, "name", true);
177 //                      if (temp == "not found")        {       nameFile = "";          }
178 //                      else if(temp == "not open")     {       abort = true;           }
179 //                      else                                            {       nameFile = temp;        }
180 //                      cout << "name:\t" << nameFile << endl;
181
182                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
183                         convert(temp, cutoff);  
184 //                      cout << cutoff << endl;
185                         
186                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
187                         convert(temp, precision);  
188 //                      cout << precision << endl;
189                         
190                         lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
191                         
192                         sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
193                 }
194         }
195         catch(exception& e) {
196                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
197                 exit(1);
198         }
199 }
200 //***************************************************************************************************************
201
202 int SensSpecCommand::execute(){
203         try{
204                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
205
206                 setUpOutput();
207                 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
208                 if(format == "phylip")          {       processPhylip();        }
209                 else if(format == "column")     {       processColumn();        }
210                 
211                 m->mothurOutEndLine();
212                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
213                 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();  
214                 m->mothurOutEndLine();
215                 
216                 
217                 return 0;       
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "SensSpecCommand", "execute");
221                 exit(1);
222         }
223 }
224
225 //***************************************************************************************************************
226
227 void SensSpecCommand::processPhylip(){
228         try{
229                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
230                 
231                 ifstream inputListFile;
232                 m->openInputFile(listFile, inputListFile);
233                 
234                 string origCutoff = "";
235                 bool getCutoff = 0;
236                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
237                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
238                 
239                 string label;
240                 int numOTUs;
241
242                 map<string, int> seqMap;
243                 string seqList;
244                 
245                 while(inputListFile){
246                         inputListFile >> label >> numOTUs;
247                         for(int i=0;i<numOTUs;i++){
248                                 inputListFile >> seqList;
249                                 int seqListLength = seqList.length();
250                                 string seqName = "";
251                                 for(int j=0;j<seqListLength;j++){
252                                         
253                                         if(seqList[j] == ','){
254                                                 seqMap[seqName] = i;
255                                                 seqName = "";
256                                         }
257                                         else{
258                                                 seqName += seqList[j];
259                                         }
260                                         
261                                 }
262                                 seqMap[seqName] = i;
263                         }
264                         m->gobble(inputListFile);
265                 
266                         int lNumSeqs = seqMap.size();
267                         int pNumSeqs = 0;
268
269                         ifstream phylipFile;
270                         m->openInputFile(distFile, phylipFile);
271                         phylipFile >> pNumSeqs;
272                         if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
273                         
274                         string seqName;
275                         double distance;
276                         vector<int> otuIndices(lNumSeqs, -1);
277                                 
278                         truePositives = 0;
279                         falsePositives = 0;
280                         trueNegatives = 0;
281                         falseNegatives = 0;
282                         
283                         if(getCutoff == 1){
284                                 if(label != "unique"){
285                                         origCutoff = label;
286                                         convert(label, cutoff);
287                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
288                                 }
289                                 else{
290                                         origCutoff = "unique";
291                                         cutoff = 0.0000;
292                                 }
293                         }
294                                    
295                         cout << label << endl;
296                         
297                         for(int i=0;i<lNumSeqs;i++){
298                                 phylipFile >> seqName;
299                                 otuIndices[i] = seqMap[seqName];
300                                 
301                                 for(int j=0;j<i;j++){
302                                         phylipFile >> distance;
303                                         
304                                         if(distance <= cutoff){
305                                                 if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
306                                                 else                                                            {       falseNegatives++;       }
307                                         }
308                                         else{
309                                                 if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
310                                                 else                                                            {       trueNegatives++;        }
311                                         }
312                                 }
313                         }
314                         phylipFile.close();
315                         
316                         outputStatistics(label, origCutoff);
317                 }
318                 inputListFile.close();
319
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "SensSpecCommand", "processPhylip");
323                 exit(1);
324         }
325 }
326
327 //***************************************************************************************************************
328
329 void SensSpecCommand::processColumn(){
330         try{            
331                 ifstream inputListFile;
332                 m->openInputFile(listFile, inputListFile);
333                 
334                 string origCutoff = "";
335                 bool getCutoff = 0;
336                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
337                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
338                 
339                 set<string> seqPairSet;
340                 
341                 string label, seqList;
342                 int numOTUs;
343                 int numSeqs;
344                 
345                 while(inputListFile){
346                         numSeqs = 0;
347                         
348                         inputListFile >> label >> numOTUs;
349                         for(int i=0;i<numOTUs;i++){
350                                 
351                                 vector<string> seqNameVector;
352                                 
353                                 inputListFile >> seqList;
354                                 int seqListLength = seqList.length();
355                                 string seqName = "";
356                                 for(int j=0;j<seqListLength;j++){
357                                         
358                                         if(seqList[j] == ','){
359                                                 seqNameVector.push_back(seqName);
360                                                 seqName = "";
361                                         }
362                                         else{
363                                                 seqName += seqList[j];
364                                         }
365                                 }
366                                 seqNameVector.push_back(seqName);
367
368                                 numSeqs += seqNameVector.size();
369                                 
370                                 int numSeqsInOTU = seqNameVector.size();
371                                 for(int j=0;j<numSeqsInOTU;j++){
372                                         string seqPairString = "";                              
373                                         for(int k=0;k<j;k++){
374                                                 if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
375                                                 else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
376                                                 seqPairSet.insert(seqPairString);
377                                         }
378                                 }
379                         }
380                         m->gobble(inputListFile);
381                         
382                         int numDists = (numSeqs * (numSeqs-1) / 2);
383
384                         ifstream columnFile;
385                         m->openInputFile(distFile, columnFile);
386                         string seqNameA, seqNameB, seqPairString;
387                         double distance;
388                         
389                         truePositives = 0;
390                         falsePositives = 0;
391                         trueNegatives = numDists;
392                         falseNegatives = 0;
393                         
394                         if(getCutoff == 1){
395                                 if(label != "unique"){
396                                         origCutoff = label;
397                                         convert(label, cutoff);
398                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
399                                 }
400                                 else{
401                                         origCutoff = "unique";
402                                         cutoff = 0.0000;
403                                 }
404                         }
405                         
406                         cout << label << endl;
407                         
408                         while(columnFile){
409                                 columnFile >> seqNameA >> seqNameB >> distance;
410                                 if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
411                                 else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
412
413                                 set<string>::iterator it = seqPairSet.find(seqPairString);
414                         
415                                 if(distance <= cutoff){
416                                         if(it != seqPairSet.end()){
417                                                 truePositives++;
418                                                 seqPairSet.erase(it);   
419                                         }
420                                         else{
421                                                 falseNegatives++;
422                                         }
423                                         trueNegatives--;
424                                 }
425                                 else if(it != seqPairSet.end()){        
426                                         falsePositives++;
427                                         trueNegatives--;
428                                         seqPairSet.erase(it);   
429                                 }
430                                 
431                                 m->gobble(columnFile);
432                         }
433                         falsePositives += seqPairSet.size();
434                         
435                         outputStatistics(label, origCutoff);
436                 }
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "SensSpecCommand", "processColumn");
440                 exit(1);
441         }
442 }
443
444 //***************************************************************************************************************
445
446 void SensSpecCommand::setUpOutput(){
447         try{            
448                 ofstream sensSpecFile;
449                 m->openOutputFile(sensSpecFileName, sensSpecFile);
450                 
451                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
452
453                 sensSpecFile.close();
454         }
455         catch(exception& e) {
456                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
457                 exit(1);
458         }
459 }
460
461 //***************************************************************************************************************
462
463 void SensSpecCommand::outputStatistics(string label, string cutoff){
464         try{            
465                 double tp = (double) truePositives;
466                 double fp = (double) falsePositives;
467                 double tn = (double) trueNegatives;
468                 double fn = (double) falseNegatives;
469                 
470                 double p = tp + fn;
471                 double n = fp + tn;
472                 double pPrime = tp + fp;
473                 double nPrime = tn + fn;
474                 
475                 double sensitivity = tp / p;    
476                 double specificity = tn / n;
477                 double positivePredictiveValue = tp / pPrime;
478                 double negativePredictiveValue = tn / nPrime;
479                 double falseDiscoveryRate = fp / pPrime;
480                 
481                 double accuracy = (tp + tn) / (p + n);
482                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
483                 double f1Score = 2.0 * tp / (p + pPrime);
484                 
485                 
486                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
487                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
488                 if(p + n == 0)          {       accuracy = 0;                                                           }
489                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
490                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
491                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
492                 
493                 ofstream sensSpecFile;
494                 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
495                 
496                 sensSpecFile << label << '\t' << cutoff << '\t';
497                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
498                 sensSpecFile << setprecision(4);
499                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
500                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
501                 
502                 sensSpecFile.close();
503         }
504         catch(exception& e) {
505                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
506                 exit(1);
507         }
508 }
509
510 //***************************************************************************************************************
511
512
513