]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
fixes while testing
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::getValidParameters(){   
14         try {
15                 string Array[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SensSpecCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SensSpecCommand::SensSpecCommand(){     
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["sensspec"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> SensSpecCommand::getRequiredParameters(){        
39         try {
40                 string Array[] =  {"list","phylip","column","or"};
41                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "SensSpecCommand", "getRequiredParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 vector<string> SensSpecCommand::getRequiredFiles(){     
51         try {
52                 vector<string> myArray;
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "SensSpecCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 //***************************************************************************************************************
61
62 SensSpecCommand::SensSpecCommand(string option)  {
63         try {
64                 
65                 abort = false;
66                 
67                 //allow user to run help
68                 if(option == "help") { help(); abort = true; }
69                 
70                 else {
71                         string temp;
72
73                         //valid paramters for this command
74                         string AlignArray[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
75                         
76                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string,string>::iterator it;
83                         
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["sensspec"] = tempOutNames;
92                         
93                         //if the user changes the input directory command factory will send this info to us in the output parameter 
94                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
95                         if (inputDir == "not found"){   inputDir = "";          }
96                         else {
97                                 string path;
98                                 it = parameters.find("list");
99                                 //user has given a template file
100                                 if(it != parameters.end()){ 
101                                         path = m->hasPath(it->second);
102                                         //if the user has not given a path then, add inputdir. else leave path alone.
103                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
104                                 }
105                                 
106                                 it = parameters.find("phylip");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
112                                 }
113                                 
114                                 it = parameters.find("column");
115                                 //user has given a template file
116                                 if(it != parameters.end()){ 
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
120                                 }
121                                 
122                                 it = parameters.find("name");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
128                                 }
129                                 
130                         }
131                         //check for required parameters
132                         listFile = validParameter.validFile(parameters, "list", true);
133                         if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
134                         else if (listFile == "not open") { abort = true; }      
135                         
136                         distFile = validParameter.validFile(parameters, "column", true);
137                         format = "column";
138                         if(distFile == "not found")     {
139                                 distFile = validParameter.validFile(parameters, "phylip", true);
140                                 format = "phylip";      
141                         }
142                         if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
143                         else if (distFile == "not open") { abort = true; }      
144                 
145                         //if the user changes the output directory command factory will send this info to us in the output parameter 
146                         outputDir = validParameter.validFile(parameters, "outputdir", false);
147                         if (outputDir == "not found"){  
148                                 outputDir = ""; 
149                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
150                         }
151
152                         //check for optional parameter and set defaults
153                         // ...at some point should added some additional type checking...
154                         temp = validParameter.validFile(parameters, "hard", false);
155                         if (temp == "not found"){       hard = 0;       }
156                         else if(!m->isTrue(temp))       {       hard = 0;       }
157                         else if(m->isTrue(temp))        {       hard = 1;       }
158                         
159 //                      temp = validParameter.validFile(parameters, "name", true);
160 //                      if (temp == "not found")        {       nameFile = "";          }
161 //                      else if(temp == "not open")     {       abort = true;           }
162 //                      else                                            {       nameFile = temp;        }
163 //                      cout << "name:\t" << nameFile << endl;
164
165                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
166                         convert(temp, cutoff);  
167 //                      cout << cutoff << endl;
168                         
169                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
170                         convert(temp, precision);  
171 //                      cout << precision << endl;
172                         
173                         lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
174                         
175                         sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
176                 }
177         }
178         catch(exception& e) {
179                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
180                 exit(1);
181         }
182 }
183
184 //**********************************************************************************************************************
185
186 void SensSpecCommand::help(){
187         try {
188                 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
189
190                 
191                 
192                 m->mothurOut("Example sens.spec(...).\n");
193                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
194                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
195                 
196         }
197         catch(exception& e) {
198                 m->errorOut(e, "SensSpecCommand", "help");
199                 exit(1);
200         }
201 }
202
203 //***************************************************************************************************************
204
205 SensSpecCommand::~SensSpecCommand(){    /*      do nothing      */      }
206
207 //***************************************************************************************************************
208
209 int SensSpecCommand::execute(){
210         try{
211                 if (abort == true) { return 0; }
212
213                 setUpOutput();
214                 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
215                 if(format == "phylip")          {       processPhylip();        }
216                 else if(format == "column")     {       processColumn();        }
217                 
218                 
219                 return 0;       
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "SensSpecCommand", "execute");
223                 exit(1);
224         }
225 }
226
227 //***************************************************************************************************************
228
229 void SensSpecCommand::processPhylip(){
230         try{
231                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
232                 
233                 ifstream inputListFile;
234                 m->openInputFile(listFile, inputListFile);
235                 
236                 string origCutoff = "";
237                 bool getCutoff = 0;
238                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
239                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
240                 
241                 string label;
242                 int numOTUs;
243
244                 map<string, int> seqMap;
245                 string seqList;
246                 
247                 while(inputListFile){
248                         inputListFile >> label >> numOTUs;
249                         for(int i=0;i<numOTUs;i++){
250                                 inputListFile >> seqList;
251                                 int seqListLength = seqList.length();
252                                 string seqName = "";
253                                 for(int j=0;j<seqListLength;j++){
254                                         
255                                         if(seqList[j] == ','){
256                                                 seqMap[seqName] = i;
257                                                 seqName = "";
258                                         }
259                                         else{
260                                                 seqName += seqList[j];
261                                         }
262                                         
263                                 }
264                                 seqMap[seqName] = i;
265                         }
266                         m->gobble(inputListFile);
267                 
268                         int lNumSeqs = seqMap.size();
269                         int pNumSeqs = 0;
270
271                         ifstream phylipFile;
272                         m->openInputFile(distFile, phylipFile);
273                         phylipFile >> pNumSeqs;
274                         if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
275                         
276                         string seqName;
277                         double distance;
278                         vector<int> otuIndices(lNumSeqs, -1);
279                                 
280                         truePositives = 0;
281                         falsePositives = 0;
282                         trueNegatives = 0;
283                         falseNegatives = 0;
284                         
285                         if(getCutoff == 1){
286                                 if(label != "unique"){
287                                         origCutoff = label;
288                                         convert(label, cutoff);
289                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
290                                 }
291                                 else{
292                                         origCutoff = "unique";
293                                         cutoff = 0.0000;
294                                 }
295                         }
296                                    
297                         cout << label << endl;
298                         
299                         for(int i=0;i<lNumSeqs;i++){
300                                 phylipFile >> seqName;
301                                 otuIndices[i] = seqMap[seqName];
302                                 
303                                 for(int j=0;j<i;j++){
304                                         phylipFile >> distance;
305                                         
306                                         if(distance <= cutoff){
307                                                 if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
308                                                 else                                                            {       falseNegatives++;       }
309                                         }
310                                         else{
311                                                 if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
312                                                 else                                                            {       trueNegatives++;        }
313                                         }
314                                 }
315                         }
316                         phylipFile.close();
317                         
318                         outputStatistics(label, origCutoff);
319                 }
320                 inputListFile.close();
321
322         }
323         catch(exception& e) {
324                 m->errorOut(e, "SensSpecCommand", "processPhylip");
325                 exit(1);
326         }
327 }
328
329 //***************************************************************************************************************
330
331 void SensSpecCommand::processColumn(){
332         try{            
333                 ifstream inputListFile;
334                 m->openInputFile(listFile, inputListFile);
335                 
336                 string origCutoff = "";
337                 bool getCutoff = 0;
338                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
339                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
340                 
341                 set<string> seqPairSet;
342                 
343                 string label, seqList;
344                 int numOTUs;
345                 int numSeqs;
346                 
347                 while(inputListFile){
348                         numSeqs = 0;
349                         
350                         inputListFile >> label >> numOTUs;
351                         for(int i=0;i<numOTUs;i++){
352                                 
353                                 vector<string> seqNameVector;
354                                 
355                                 inputListFile >> seqList;
356                                 int seqListLength = seqList.length();
357                                 string seqName = "";
358                                 for(int j=0;j<seqListLength;j++){
359                                         
360                                         if(seqList[j] == ','){
361                                                 seqNameVector.push_back(seqName);
362                                                 seqName = "";
363                                         }
364                                         else{
365                                                 seqName += seqList[j];
366                                         }
367                                 }
368                                 seqNameVector.push_back(seqName);
369
370                                 numSeqs += seqNameVector.size();
371                                 
372                                 int numSeqsInOTU = seqNameVector.size();
373                                 for(int j=0;j<numSeqsInOTU;j++){
374                                         string seqPairString = "";                              
375                                         for(int k=0;k<j;k++){
376                                                 if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
377                                                 else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
378                                                 seqPairSet.insert(seqPairString);
379                                         }
380                                 }
381                         }
382                         m->gobble(inputListFile);
383                         
384                         int numDists = (numSeqs * (numSeqs-1) / 2);
385
386                         ifstream columnFile;
387                         m->openInputFile(distFile, columnFile);
388                         string seqNameA, seqNameB, seqPairString;
389                         double distance;
390                         
391                         truePositives = 0;
392                         falsePositives = 0;
393                         trueNegatives = numDists;
394                         falseNegatives = 0;
395                         
396                         if(getCutoff == 1){
397                                 if(label != "unique"){
398                                         origCutoff = label;
399                                         convert(label, cutoff);
400                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
401                                 }
402                                 else{
403                                         origCutoff = "unique";
404                                         cutoff = 0.0000;
405                                 }
406                         }
407                         
408                         cout << label << endl;
409                         
410                         while(columnFile){
411                                 columnFile >> seqNameA >> seqNameB >> distance;
412                                 if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
413                                 else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
414
415                                 set<string>::iterator it = seqPairSet.find(seqPairString);
416                         
417                                 if(distance <= cutoff){
418                                         if(it != seqPairSet.end()){
419                                                 truePositives++;
420                                                 seqPairSet.erase(it);   
421                                         }
422                                         else{
423                                                 falseNegatives++;
424                                         }
425                                         trueNegatives--;
426                                 }
427                                 else if(it != seqPairSet.end()){        
428                                         falsePositives++;
429                                         trueNegatives--;
430                                         seqPairSet.erase(it);   
431                                 }
432                                 
433                                 m->gobble(columnFile);
434                         }
435                         falsePositives += seqPairSet.size();
436                         
437                         outputStatistics(label, origCutoff);
438                 }
439         }
440         catch(exception& e) {
441                 m->errorOut(e, "SensSpecCommand", "processColumn");
442                 exit(1);
443         }
444 }
445
446 //***************************************************************************************************************
447
448 void SensSpecCommand::setUpOutput(){
449         try{            
450                 ofstream sensSpecFile;
451                 m->openOutputFile(sensSpecFileName, sensSpecFile);
452                 
453                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
454
455                 sensSpecFile.close();
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
459                 exit(1);
460         }
461 }
462
463 //***************************************************************************************************************
464
465 void SensSpecCommand::outputStatistics(string label, string cutoff){
466         try{            
467                 double tp = (double) truePositives;
468                 double fp = (double) falsePositives;
469                 double tn = (double) trueNegatives;
470                 double fn = (double) falseNegatives;
471                 
472                 double p = tp + fn;
473                 double n = fp + tn;
474                 double pPrime = tp + fp;
475                 double nPrime = tn + fn;
476                 
477                 double sensitivity = tp / p;    
478                 double specificity = tn / n;
479                 double positivePredictiveValue = tp / pPrime;
480                 double negativePredictiveValue = tn / nPrime;
481                 double falseDiscoveryRate = fp / pPrime;
482                 
483                 double accuracy = (tp + tn) / (p + n);
484                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
485                 double f1Score = 2.0 * tp / (p + pPrime);
486                 
487                 
488                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
489                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
490                 if(p + n == 0)          {       accuracy = 0;                                                           }
491                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
492                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
493                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
494                 
495                 ofstream sensSpecFile;
496                 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
497                 
498                 sensSpecFile << label << '\t' << cutoff << '\t';
499                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
500                 sensSpecFile << setprecision(4);
501                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
502                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
503                 
504                 sensSpecFile.close();
505         }
506         catch(exception& e) {
507                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
508                 exit(1);
509         }
510 }
511
512 //***************************************************************************************************************
513
514
515