]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::getValidParameters(){   
14         try {
15                 string Array[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SensSpecCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SensSpecCommand::SensSpecCommand(){     
26         try {
27                 abort = true; calledHelp = true; 
28                 vector<string> tempOutNames;
29                 outputTypes["sensspec"] = tempOutNames;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 vector<string> SensSpecCommand::getRequiredParameters(){        
38         try {
39                 string Array[] =  {"list","phylip","column","or"};
40                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
41                 return myArray;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "SensSpecCommand", "getRequiredParameters");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 vector<string> SensSpecCommand::getRequiredFiles(){     
50         try {
51                 vector<string> myArray;
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "SensSpecCommand", "getRequiredFiles");
56                 exit(1);
57         }
58 }
59 //***************************************************************************************************************
60
61 SensSpecCommand::SensSpecCommand(string option)  {
62         try {
63                 
64                 abort = false; calledHelp = false;   
65                 
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; calledHelp = true; }
68                 
69                 else {
70                         string temp;
71
72                         //valid paramters for this command
73                         string AlignArray[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
74                         
75                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string,string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string,string>::iterator it;
82                         
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["sensspec"] = tempOutNames;
91                         
92                         //if the user changes the input directory command factory will send this info to us in the output parameter 
93                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
94                         if (inputDir == "not found"){   inputDir = "";          }
95                         else {
96                                 string path;
97                                 it = parameters.find("list");
98                                 //user has given a template file
99                                 if(it != parameters.end()){ 
100                                         path = m->hasPath(it->second);
101                                         //if the user has not given a path then, add inputdir. else leave path alone.
102                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
103                                 }
104                                 
105                                 it = parameters.find("phylip");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("column");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("name");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
127                                 }
128                                 
129                         }
130                         //check for required parameters
131                         listFile = validParameter.validFile(parameters, "list", true);
132                         if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
133                         else if (listFile == "not open") { abort = true; }      
134                         
135                         distFile = validParameter.validFile(parameters, "column", true);
136                         format = "column";
137                         if(distFile == "not found")     {
138                                 distFile = validParameter.validFile(parameters, "phylip", true);
139                                 format = "phylip";      
140                         }
141                         if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
142                         else if (distFile == "not open") { abort = true; }      
143                 
144                         //if the user changes the output directory command factory will send this info to us in the output parameter 
145                         outputDir = validParameter.validFile(parameters, "outputdir", false);
146                         if (outputDir == "not found"){  
147                                 outputDir = ""; 
148                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
149                         }
150
151                         //check for optional parameter and set defaults
152                         // ...at some point should added some additional type checking...
153                         temp = validParameter.validFile(parameters, "hard", false);
154                         if (temp == "not found"){       hard = 0;       }
155                         else if(!m->isTrue(temp))       {       hard = 0;       }
156                         else if(m->isTrue(temp))        {       hard = 1;       }
157                         
158 //                      temp = validParameter.validFile(parameters, "name", true);
159 //                      if (temp == "not found")        {       nameFile = "";          }
160 //                      else if(temp == "not open")     {       abort = true;           }
161 //                      else                                            {       nameFile = temp;        }
162 //                      cout << "name:\t" << nameFile << endl;
163
164                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
165                         convert(temp, cutoff);  
166 //                      cout << cutoff << endl;
167                         
168                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
169                         convert(temp, precision);  
170 //                      cout << precision << endl;
171                         
172                         lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
173                         
174                         sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
175                 }
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
179                 exit(1);
180         }
181 }
182
183 //**********************************************************************************************************************
184
185 void SensSpecCommand::help(){
186         try {
187                 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
188
189                 
190                 
191                 m->mothurOut("Example sens.spec(...).\n");
192                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
193                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
194                 
195         }
196         catch(exception& e) {
197                 m->errorOut(e, "SensSpecCommand", "help");
198                 exit(1);
199         }
200 }
201
202 //***************************************************************************************************************
203
204 SensSpecCommand::~SensSpecCommand(){    /*      do nothing      */      }
205
206 //***************************************************************************************************************
207
208 int SensSpecCommand::execute(){
209         try{
210                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
211
212                 setUpOutput();
213                 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
214                 if(format == "phylip")          {       processPhylip();        }
215                 else if(format == "column")     {       processColumn();        }
216                 
217                 
218                 return 0;       
219         }
220         catch(exception& e) {
221                 m->errorOut(e, "SensSpecCommand", "execute");
222                 exit(1);
223         }
224 }
225
226 //***************************************************************************************************************
227
228 void SensSpecCommand::processPhylip(){
229         try{
230                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
231                 
232                 ifstream inputListFile;
233                 m->openInputFile(listFile, inputListFile);
234                 
235                 string origCutoff = "";
236                 bool getCutoff = 0;
237                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
238                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
239                 
240                 string label;
241                 int numOTUs;
242
243                 map<string, int> seqMap;
244                 string seqList;
245                 
246                 while(inputListFile){
247                         inputListFile >> label >> numOTUs;
248                         for(int i=0;i<numOTUs;i++){
249                                 inputListFile >> seqList;
250                                 int seqListLength = seqList.length();
251                                 string seqName = "";
252                                 for(int j=0;j<seqListLength;j++){
253                                         
254                                         if(seqList[j] == ','){
255                                                 seqMap[seqName] = i;
256                                                 seqName = "";
257                                         }
258                                         else{
259                                                 seqName += seqList[j];
260                                         }
261                                         
262                                 }
263                                 seqMap[seqName] = i;
264                         }
265                         m->gobble(inputListFile);
266                 
267                         int lNumSeqs = seqMap.size();
268                         int pNumSeqs = 0;
269
270                         ifstream phylipFile;
271                         m->openInputFile(distFile, phylipFile);
272                         phylipFile >> pNumSeqs;
273                         if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
274                         
275                         string seqName;
276                         double distance;
277                         vector<int> otuIndices(lNumSeqs, -1);
278                                 
279                         truePositives = 0;
280                         falsePositives = 0;
281                         trueNegatives = 0;
282                         falseNegatives = 0;
283                         
284                         if(getCutoff == 1){
285                                 if(label != "unique"){
286                                         origCutoff = label;
287                                         convert(label, cutoff);
288                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
289                                 }
290                                 else{
291                                         origCutoff = "unique";
292                                         cutoff = 0.0000;
293                                 }
294                         }
295                                    
296                         cout << label << endl;
297                         
298                         for(int i=0;i<lNumSeqs;i++){
299                                 phylipFile >> seqName;
300                                 otuIndices[i] = seqMap[seqName];
301                                 
302                                 for(int j=0;j<i;j++){
303                                         phylipFile >> distance;
304                                         
305                                         if(distance <= cutoff){
306                                                 if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
307                                                 else                                                            {       falseNegatives++;       }
308                                         }
309                                         else{
310                                                 if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
311                                                 else                                                            {       trueNegatives++;        }
312                                         }
313                                 }
314                         }
315                         phylipFile.close();
316                         
317                         outputStatistics(label, origCutoff);
318                 }
319                 inputListFile.close();
320
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "SensSpecCommand", "processPhylip");
324                 exit(1);
325         }
326 }
327
328 //***************************************************************************************************************
329
330 void SensSpecCommand::processColumn(){
331         try{            
332                 ifstream inputListFile;
333                 m->openInputFile(listFile, inputListFile);
334                 
335                 string origCutoff = "";
336                 bool getCutoff = 0;
337                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
338                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
339                 
340                 set<string> seqPairSet;
341                 
342                 string label, seqList;
343                 int numOTUs;
344                 int numSeqs;
345                 
346                 while(inputListFile){
347                         numSeqs = 0;
348                         
349                         inputListFile >> label >> numOTUs;
350                         for(int i=0;i<numOTUs;i++){
351                                 
352                                 vector<string> seqNameVector;
353                                 
354                                 inputListFile >> seqList;
355                                 int seqListLength = seqList.length();
356                                 string seqName = "";
357                                 for(int j=0;j<seqListLength;j++){
358                                         
359                                         if(seqList[j] == ','){
360                                                 seqNameVector.push_back(seqName);
361                                                 seqName = "";
362                                         }
363                                         else{
364                                                 seqName += seqList[j];
365                                         }
366                                 }
367                                 seqNameVector.push_back(seqName);
368
369                                 numSeqs += seqNameVector.size();
370                                 
371                                 int numSeqsInOTU = seqNameVector.size();
372                                 for(int j=0;j<numSeqsInOTU;j++){
373                                         string seqPairString = "";                              
374                                         for(int k=0;k<j;k++){
375                                                 if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
376                                                 else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
377                                                 seqPairSet.insert(seqPairString);
378                                         }
379                                 }
380                         }
381                         m->gobble(inputListFile);
382                         
383                         int numDists = (numSeqs * (numSeqs-1) / 2);
384
385                         ifstream columnFile;
386                         m->openInputFile(distFile, columnFile);
387                         string seqNameA, seqNameB, seqPairString;
388                         double distance;
389                         
390                         truePositives = 0;
391                         falsePositives = 0;
392                         trueNegatives = numDists;
393                         falseNegatives = 0;
394                         
395                         if(getCutoff == 1){
396                                 if(label != "unique"){
397                                         origCutoff = label;
398                                         convert(label, cutoff);
399                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
400                                 }
401                                 else{
402                                         origCutoff = "unique";
403                                         cutoff = 0.0000;
404                                 }
405                         }
406                         
407                         cout << label << endl;
408                         
409                         while(columnFile){
410                                 columnFile >> seqNameA >> seqNameB >> distance;
411                                 if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
412                                 else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
413
414                                 set<string>::iterator it = seqPairSet.find(seqPairString);
415                         
416                                 if(distance <= cutoff){
417                                         if(it != seqPairSet.end()){
418                                                 truePositives++;
419                                                 seqPairSet.erase(it);   
420                                         }
421                                         else{
422                                                 falseNegatives++;
423                                         }
424                                         trueNegatives--;
425                                 }
426                                 else if(it != seqPairSet.end()){        
427                                         falsePositives++;
428                                         trueNegatives--;
429                                         seqPairSet.erase(it);   
430                                 }
431                                 
432                                 m->gobble(columnFile);
433                         }
434                         falsePositives += seqPairSet.size();
435                         
436                         outputStatistics(label, origCutoff);
437                 }
438         }
439         catch(exception& e) {
440                 m->errorOut(e, "SensSpecCommand", "processColumn");
441                 exit(1);
442         }
443 }
444
445 //***************************************************************************************************************
446
447 void SensSpecCommand::setUpOutput(){
448         try{            
449                 ofstream sensSpecFile;
450                 m->openOutputFile(sensSpecFileName, sensSpecFile);
451                 
452                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
453
454                 sensSpecFile.close();
455         }
456         catch(exception& e) {
457                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
458                 exit(1);
459         }
460 }
461
462 //***************************************************************************************************************
463
464 void SensSpecCommand::outputStatistics(string label, string cutoff){
465         try{            
466                 double tp = (double) truePositives;
467                 double fp = (double) falsePositives;
468                 double tn = (double) trueNegatives;
469                 double fn = (double) falseNegatives;
470                 
471                 double p = tp + fn;
472                 double n = fp + tn;
473                 double pPrime = tp + fp;
474                 double nPrime = tn + fn;
475                 
476                 double sensitivity = tp / p;    
477                 double specificity = tn / n;
478                 double positivePredictiveValue = tp / pPrime;
479                 double negativePredictiveValue = tn / nPrime;
480                 double falseDiscoveryRate = fp / pPrime;
481                 
482                 double accuracy = (tp + tn) / (p + n);
483                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
484                 double f1Score = 2.0 * tp / (p + pPrime);
485                 
486                 
487                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
488                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
489                 if(p + n == 0)          {       accuracy = 0;                                                           }
490                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
491                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
492                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
493                 
494                 ofstream sensSpecFile;
495                 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
496                 
497                 sensSpecFile << label << '\t' << cutoff << '\t';
498                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
499                 sensSpecFile << setprecision(4);
500                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
501                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
502                 
503                 sensSpecFile.close();
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
507                 exit(1);
508         }
509 }
510
511 //***************************************************************************************************************
512
513
514