]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
pat's updates on 7/19/10
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //***************************************************************************************************************
13
14 SensSpecCommand::SensSpecCommand(string option)  {
15         try {
16                 
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         string temp;
24
25                         //valid paramters for this command
26                         string AlignArray[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
27                         
28                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
29                         
30                         OptionParser parser(option);
31                         map<string,string> parameters = parser.getParameters();
32                         
33                         ValidParameters validParameter;
34                         map<string,string>::iterator it;
35                         
36                         //check to make sure all parameters are valid for command
37                         for (it = parameters.begin(); it != parameters.end(); it++) { 
38                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
39                         }
40                         
41                         //if the user changes the input directory command factory will send this info to us in the output parameter 
42                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
43                         if (inputDir == "not found"){   inputDir = "";          }
44                         else {
45                                 string path;
46                                 it = parameters.find("list");
47                                 //user has given a template file
48                                 if(it != parameters.end()){ 
49                                         path = hasPath(it->second);
50                                         //if the user has not given a path then, add inputdir. else leave path alone.
51                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
52                                 }
53                                 
54                                 it = parameters.find("phylip");
55                                 //user has given a template file
56                                 if(it != parameters.end()){ 
57                                         path = hasPath(it->second);
58                                         //if the user has not given a path then, add inputdir. else leave path alone.
59                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
60                                 }
61                                 
62                                 it = parameters.find("column");
63                                 //user has given a template file
64                                 if(it != parameters.end()){ 
65                                         path = hasPath(it->second);
66                                         //if the user has not given a path then, add inputdir. else leave path alone.
67                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
68                                 }
69                                 
70                                 it = parameters.find("name");
71                                 //user has given a template file
72                                 if(it != parameters.end()){ 
73                                         path = hasPath(it->second);
74                                         //if the user has not given a path then, add inputdir. else leave path alone.
75                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
76                                 }
77                                 
78                         }
79                         //check for required parameters
80                         listFile = validParameter.validFile(parameters, "list", true);
81                         if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
82                         else if (listFile == "not open") { abort = true; }      
83                         
84                         distFile = validParameter.validFile(parameters, "column", true);
85                         format = "column";
86                         if(distFile == "not found")     {
87                                 distFile = validParameter.validFile(parameters, "phylip", true);
88                                 format = "phylip";      
89                         }
90                         if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
91                         else if (distFile == "not open") { abort = true; }      
92                 
93                         //if the user changes the output directory command factory will send this info to us in the output parameter 
94                         outputDir = validParameter.validFile(parameters, "outputdir", false);
95                         if (outputDir == "not found"){  
96                                 outputDir = ""; 
97                                 outputDir += hasPath(listFile); //if user entered a file with a path then preserve it   
98                         }
99
100                         //check for optional parameter and set defaults
101                         // ...at some point should added some additional type checking...
102                         temp = validParameter.validFile(parameters, "hard", false);
103                         if (temp == "not found"){       hard = 0;       }
104                         else if(!isTrue(temp))  {       hard = 0;       }
105                         else if(isTrue(temp))   {       hard = 1;       }
106                         
107 //                      temp = validParameter.validFile(parameters, "name", true);
108 //                      if (temp == "not found")        {       nameFile = "";          }
109 //                      else if(temp == "not open")     {       abort = true;           }
110 //                      else                                            {       nameFile = temp;        }
111 //                      cout << "name:\t" << nameFile << endl;
112
113                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
114                         convert(temp, cutoff);  
115 //                      cout << cutoff << endl;
116                         
117                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
118                         convert(temp, precision);  
119 //                      cout << precision << endl;
120                         
121                         lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
122                         
123                         sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
124                 }
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
128                 exit(1);
129         }
130 }
131
132 //**********************************************************************************************************************
133
134 void SensSpecCommand::help(){
135         try {
136                 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
137
138                 
139                 
140                 m->mothurOut("Example sens.spec(...).\n");
141                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
142                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
143                 
144         }
145         catch(exception& e) {
146                 m->errorOut(e, "SensSpecCommand", "help");
147                 exit(1);
148         }
149 }
150
151 //***************************************************************************************************************
152
153 SensSpecCommand::~SensSpecCommand(){    /*      do nothing      */      }
154
155 //***************************************************************************************************************
156
157 int SensSpecCommand::execute(){
158         try{
159                 if (abort == true) { return 0; }
160
161                 setUpOutput();
162                 if(format == "phylip")          {       processPhylip();        }
163                 else if(format == "column")     {       processColumn();        }
164                 
165                 
166                 return 0;       
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "SensSpecCommand", "execute");
170                 exit(1);
171         }
172 }
173
174 //***************************************************************************************************************
175
176 void SensSpecCommand::processPhylip(){
177         try{
178                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
179                 
180                 ifstream inputListFile;
181                 openInputFile(listFile, inputListFile);
182                 
183                 string origCutoff = "";
184                 bool getCutoff = 0;
185                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
186                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
187                 
188                 string label;
189                 int numOTUs;
190
191                 map<string, int> seqMap;
192                 string seqList;
193                 
194                 while(inputListFile){
195                         inputListFile >> label >> numOTUs;
196                         for(int i=0;i<numOTUs;i++){
197                                 inputListFile >> seqList;
198                                 int seqListLength = seqList.length();
199                                 string seqName = "";
200                                 for(int j=0;j<seqListLength;j++){
201                                         
202                                         if(seqList[j] == ','){
203                                                 seqMap[seqName] = i;
204                                                 seqName = "";
205                                         }
206                                         else{
207                                                 seqName += seqList[j];
208                                         }
209                                         
210                                 }
211                                 seqMap[seqName] = i;
212                         }
213                         gobble(inputListFile);
214                 
215                         int lNumSeqs = seqMap.size();
216                         int pNumSeqs = 0;
217
218                         ifstream phylipFile;
219                         openInputFile(distFile, phylipFile);
220                         phylipFile >> pNumSeqs;
221                         if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
222                         
223                         string seqName;
224                         double distance;
225                         vector<int> otuIndices(lNumSeqs, -1);
226                                 
227                         truePositives = 0;
228                         falsePositives = 0;
229                         trueNegatives = 0;
230                         falseNegatives = 0;
231                         
232                         if(getCutoff == 1){
233                                 if(label != "unique"){
234                                         origCutoff = label;
235                                         convert(label, cutoff);
236                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
237                                 }
238                                 else{
239                                         origCutoff = "unique";
240                                         cutoff = 0.0000;
241                                 }
242                         }
243                                    
244                         cout << label << endl;
245                         
246                         for(int i=0;i<lNumSeqs;i++){
247                                 phylipFile >> seqName;
248                                 otuIndices[i] = seqMap[seqName];
249                                 
250                                 for(int j=0;j<i;j++){
251                                         phylipFile >> distance;
252                                         
253                                         if(distance <= cutoff){
254                                                 if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
255                                                 else                                                            {       falseNegatives++;       }
256                                         }
257                                         else{
258                                                 if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
259                                                 else                                                            {       trueNegatives++;        }
260                                         }
261                                 }
262                         }
263                         phylipFile.close();
264                         
265                         outputStatistics(label, origCutoff);
266                 }
267                 inputListFile.close();
268
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "SensSpecCommand", "processPhylip");
272                 exit(1);
273         }
274 }
275
276 //***************************************************************************************************************
277
278 void SensSpecCommand::processColumn(){
279         try{            
280                 ifstream inputListFile;
281                 openInputFile(listFile, inputListFile);
282                 
283                 string origCutoff = "";
284                 bool getCutoff = 0;
285                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
286                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
287                 
288                 set<string> seqPairSet;
289                 
290                 string label, seqList;
291                 int numOTUs;
292                 int numSeqs;
293                 
294                 while(inputListFile){
295                         numSeqs = 0;
296                         
297                         inputListFile >> label >> numOTUs;
298                         for(int i=0;i<numOTUs;i++){
299                                 
300                                 vector<string> seqNameVector;
301                                 
302                                 inputListFile >> seqList;
303                                 int seqListLength = seqList.length();
304                                 string seqName = "";
305                                 for(int j=0;j<seqListLength;j++){
306                                         
307                                         if(seqList[j] == ','){
308                                                 seqNameVector.push_back(seqName);
309                                                 seqName = "";
310                                         }
311                                         else{
312                                                 seqName += seqList[j];
313                                         }
314                                 }
315                                 seqNameVector.push_back(seqName);
316
317                                 numSeqs += seqNameVector.size();
318                                 
319                                 int numSeqsInOTU = seqNameVector.size();
320                                 for(int j=0;j<numSeqsInOTU;j++){
321                                         string seqPairString = "";                              
322                                         for(int k=0;k<j;k++){
323                                                 if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
324                                                 else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
325                                                 seqPairSet.insert(seqPairString);
326                                         }
327                                 }
328                         }
329                         gobble(inputListFile);
330                         
331                         int numDists = (numSeqs * (numSeqs-1) / 2);
332
333                         ifstream columnFile;
334                         openInputFile(distFile, columnFile);
335                         string seqNameA, seqNameB, seqPairString;
336                         double distance;
337                         
338                         truePositives = 0;
339                         falsePositives = 0;
340                         trueNegatives = numDists;
341                         falseNegatives = 0;
342                         
343                         if(getCutoff == 1){
344                                 if(label != "unique"){
345                                         origCutoff = label;
346                                         convert(label, cutoff);
347                                         if(hard == 0){  cutoff += (0.49 / double(precision));   }               
348                                 }
349                                 else{
350                                         origCutoff = "unique";
351                                         cutoff = 0.0000;
352                                 }
353                         }
354                         
355                         cout << label << endl;
356                         
357                         while(columnFile){
358                                 columnFile >> seqNameA >> seqNameB >> distance;
359                                 if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
360                                 else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
361
362                                 set<string>::iterator it = seqPairSet.find(seqPairString);
363                         
364                                 if(distance <= cutoff){
365                                         if(it != seqPairSet.end()){
366                                                 truePositives++;
367                                                 seqPairSet.erase(it);   
368                                         }
369                                         else{
370                                                 falseNegatives++;
371                                         }
372                                         trueNegatives--;
373                                 }
374                                 else if(it != seqPairSet.end()){        
375                                         falsePositives++;
376                                         trueNegatives--;
377                                         seqPairSet.erase(it);   
378                                 }
379                                 
380                                 gobble(columnFile);
381                         }
382                         falsePositives += seqPairSet.size();
383                         
384                         outputStatistics(label, origCutoff);
385                 }
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "SensSpecCommand", "processColumn");
389                 exit(1);
390         }
391 }
392
393 //***************************************************************************************************************
394
395 void SensSpecCommand::setUpOutput(){
396         try{            
397                 ofstream sensSpecFile;
398                 openOutputFile(sensSpecFileName, sensSpecFile);
399                 
400                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
401
402                 sensSpecFile.close();
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
406                 exit(1);
407         }
408 }
409
410 //***************************************************************************************************************
411
412 void SensSpecCommand::outputStatistics(string label, string cutoff){
413         try{            
414                 double tp = (double) truePositives;
415                 double fp = (double) falsePositives;
416                 double tn = (double) trueNegatives;
417                 double fn = (double) falseNegatives;
418                 
419                 double p = tp + fn;
420                 double n = fp + tn;
421                 double pPrime = tp + fp;
422                 double nPrime = tn + fn;
423                 
424                 double sensitivity = tp / p;    
425                 double specificity = tn / n;
426                 double positivePredictiveValue = tp / pPrime;
427                 double negativePredictiveValue = tn / nPrime;
428                 double falseDiscoveryRate = fp / pPrime;
429                 
430                 double accuracy = (tp + tn) / (p + n);
431                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
432                 double f1Score = 2.0 * tp / (p + pPrime);
433                 
434                 
435                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
436                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
437                 if(p + n == 0)          {       accuracy = 0;                                                           }
438                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
439                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
440                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
441                 
442                 ofstream sensSpecFile;
443                 openOutputFileAppend(sensSpecFileName, sensSpecFile);
444                 
445                 sensSpecFile << label << '\t' << cutoff << '\t';
446                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
447                 sensSpecFile << setprecision(4);
448                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
449                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
450                 
451                 sensSpecFile.close();
452         }
453         catch(exception& e) {
454                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
455                 exit(1);
456         }
457 }
458
459 //***************************************************************************************************************