]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
changed random forest output filename
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){        
14         try {
15                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
17                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
18                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19                 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "SensSpecCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string SensSpecCommand::getHelpString(){        
36         try {
37                 string helpString = "";
38                 helpString += "The sens.spec command....\n";
39                 return helpString;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "SensSpecCommand", "getHelpString");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 string SensSpecCommand::getOutputPattern(string type) {
48     try {
49         string pattern = "";
50         
51         if (type == "sensspec") {  pattern = "[filename],sensspec"; } 
52         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
53         
54         return pattern;
55     }
56     catch(exception& e) {
57         m->errorOut(e, "SensSpecCommand", "getOutputPattern");
58         exit(1);
59     }
60 }
61 //**********************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(){     
63         try {
64                 abort = true; calledHelp = true; 
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["sensspec"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
71                 exit(1);
72         }
73 }
74 //***************************************************************************************************************
75
76 SensSpecCommand::SensSpecCommand(string option)  {
77         try {
78                 
79                 abort = false; calledHelp = false;  
80                 allLines = 1;
81                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; calledHelp = true; }
84                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85                 
86                 else {
87                         string temp;
88                         
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         map<string,string>::iterator it;
96                         
97                         //check to make sure all parameters are valid for command
98                         for (it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["sensspec"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("list");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("phylip");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
125                                 }
126                                 
127                                 it = parameters.find("column");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
133                                 }                               
134                         }
135                         //check for required parameters
136                         listFile = validParameter.validFile(parameters, "list", true);
137                         if (listFile == "not found") {          
138                                 listFile = m->getListFile(); 
139                                 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
140                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
141                         }
142                         else if (listFile == "not open") { abort = true; }      
143                         else { m->setListFile(listFile); }
144                         
145                         phylipfile = validParameter.validFile(parameters, "phylip", true);
146                         if (phylipfile == "not found") { phylipfile = "";  }
147                         else if (phylipfile == "not open") { abort = true; }    
148                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);  }
149                         
150                         columnfile = validParameter.validFile(parameters, "column", true);
151                         if (columnfile == "not found") { columnfile = ""; }
152                         else if (columnfile == "not open") { abort = true; }    
153                         else { distFile = columnfile; format = "column";   m->setColumnFile(columnfile); }
154                         
155                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
156                                 //give priority to column, then phylip
157                                 columnfile = m->getColumnFile(); 
158                                 if (columnfile != "") {  distFile = columnfile; format = "column";  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
159                                 else { 
160                                         phylipfile = m->getPhylipFile(); 
161                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
162                                         else { 
163                                                 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
164                                                 abort = true;
165                                         }
166                                 }
167                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
168                         
169                         
170                         //if the user changes the output directory command factory will send this info to us in the output parameter 
171                         outputDir = validParameter.validFile(parameters, "outputdir", false);
172                         if (outputDir == "not found"){  
173                                 outputDir = ""; 
174                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
175                         }
176
177                         //check for optional parameter and set defaults
178                         // ...at some point should added some additional type checking...
179                         temp = validParameter.validFile(parameters, "hard", false);
180                         if (temp == "not found"){       hard = 0;       }
181                         else if(!m->isTrue(temp))       {       hard = 0;       }
182                         else if(m->isTrue(temp))        {       hard = 1;       }
183                         
184                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
185                         m->mothurConvert(temp, cutoff);  
186 //                      cout << cutoff << endl;
187                         
188                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
189                         m->mothurConvert(temp, precision);  
190 //                      cout << precision << endl;
191                         
192                         string label = validParameter.validFile(parameters, "label", false);                    
193                         if (label == "not found") { label = ""; }
194                         else { 
195                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
196                                 else { allLines = 1;  }
197                         }
198                         
199             map<string, string> variables; 
200             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
201                         sensSpecFileName = getOutputFileName("sensspec",variables);
202                 }
203         }
204         catch(exception& e) {
205                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
206                 exit(1);
207         }
208 }
209 //***************************************************************************************************************
210
211 int SensSpecCommand::execute(){
212         try{
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214         
215         int startTime = time(NULL);
216         
217         //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
218         string newListFile = preProcessList();
219         if (newListFile != "") { listFile = newListFile; }
220         
221                 setUpOutput();
222                 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
223                 if(format == "phylip")          {       processPhylip();        }
224                 else if(format == "column")     {       processColumn();        }
225                 
226         //remove temp file if created
227         if (newListFile != "") { m->mothurRemove(newListFile); }
228         
229                 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
230         
231         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
232         
233                 m->mothurOutEndLine();
234                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235                 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();  
236                 m->mothurOutEndLine();
237                 
238                 
239                 return 0;       
240         }
241         catch(exception& e) {
242                 m->errorOut(e, "SensSpecCommand", "execute");
243                 exit(1);
244         }
245 }
246 //***************************************************************************************************************
247 bool SensSpecCommand::testFile(){
248         try{
249         ifstream fileHandle;
250         m->openInputFile(phylipfile, fileHandle);
251         
252         bool square = false;
253         string numTest, name;
254         fileHandle >> numTest >> name;
255
256         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
257
258         char d;
259         while((d=fileHandle.get()) != EOF){
260             if(isalnum(d)){
261                 square = true;
262                 break;
263             }
264             if(d == '\n'){
265                 square = false;
266                 break;
267             }
268         }
269         fileHandle.close();
270         
271         return square;
272     }
273         catch(exception& e) {
274                 m->errorOut(e, "SensSpecCommand", "testFile");
275                 exit(1);
276         }
277 }
278
279 //***************************************************************************************************************
280
281 int SensSpecCommand::processPhylip(){
282         try{
283                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
284         square = testFile();
285                 string origCutoff = "";
286                 bool getCutoff = 0;
287                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
288                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
289                 
290                 map<string, int> seqMap;
291                 string seqList;
292                 
293                 InputData input(listFile, "list");
294                 ListVector* list = input.getListVector();
295                 string lastLabel = list->getLabel();
296                 
297                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
298                 set<string> processedLabels;
299                 set<string> userLabels = labels;
300                 
301                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
302                         
303                         if(m->control_pressed){
304                 for (int i = 0; i < outputNames.size(); i++){   m->mothurRemove(outputNames[i]);  }  delete list;  return 0;
305             }
306                         
307                         if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
308                                 
309                                 processedLabels.insert(list->getLabel());
310                                 userLabels.erase(list->getLabel());
311                                 
312                                 //process
313                                 fillSeqMap(seqMap, list);
314                                 process(seqMap, list->getLabel(), getCutoff, origCutoff);
315                         }
316                         
317                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
318                                 string saveLabel = list->getLabel();
319                                 
320                                 delete list;
321                                 list = input.getListVector(lastLabel);
322                                 
323                                 processedLabels.insert(list->getLabel());
324                                 userLabels.erase(list->getLabel());
325                                 
326                                 //process
327                                 fillSeqMap(seqMap, list);
328                                 process(seqMap, list->getLabel(), getCutoff, origCutoff);
329                                 
330                                 //restore real lastlabel to save below
331                                 list->setLabel(saveLabel);
332                         }               
333                         
334                         lastLabel = list->getLabel();                   
335                         
336                         delete list;
337                         list = input.getListVector();
338                 }
339                 
340                 
341                 //output error messages about any remaining user labels
342                 set<string>::iterator it;
343                 bool needToRun = false;
344                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
345                         m->mothurOut("Your file does not include the label " + *it); 
346                         if (processedLabels.count(lastLabel) != 1) {
347                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
348                                 needToRun = true;
349                         }else {
350                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
351                         }
352                 }
353                 
354                 //run last label if you need to
355                 if (needToRun == true)  {
356                         if (list != NULL) {     delete list;    }
357                         list = input.getListVector(lastLabel);
358                         
359                         //process
360                         fillSeqMap(seqMap, list);
361                         process(seqMap, list->getLabel(), getCutoff, origCutoff);
362                         
363                         delete list;
364                 }
365                 
366                 return 0;
367         }
368         catch(exception& e) {
369                 m->errorOut(e, "SensSpecCommand", "processPhylip");
370                 exit(1);
371         }
372 }
373
374 //***************************************************************************************************************
375
376 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
377         try {
378                 //for each otu
379                 for(int i=0;i<list->getNumBins();i++){
380                         
381                         if (m->control_pressed) { return 0; }
382                         
383                         string seqList = list->get(i);
384                         int seqListLength = seqList.length();
385                         string seqName = "";
386                         
387                         //parse bin by name, mapping each name to its otu number
388                         for(int j=0;j<seqListLength;j++){
389                                 
390                                 if(seqList[j] == ','){
391                                         seqMap[seqName] = i;
392                                         seqName = "";
393                                 }
394                                 else{
395                                         seqName += seqList[j];
396                                 }
397                                 
398                         }
399                         seqMap[seqName] = i;
400                 }
401                 
402                 return 0;
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
406                 exit(1);
407         }
408 }
409 //***************************************************************************************************************
410 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
411         try {
412                 int numSeqs = 0;
413                 
414                 //for each otu
415                 for(int i=0;i<list->getNumBins();i++){
416                         
417                         if (m->control_pressed) { return 0; }
418                         
419                         vector<string> seqNameVector;
420                         string bin = list->get(i);
421                         m->splitAtComma(bin, seqNameVector);
422                         
423                         numSeqs += seqNameVector.size();
424                         
425                         for(int j=0;j<seqNameVector.size();j++){
426                                 string seqPairString = "";                              
427                                 for(int k=0;k<j;k++){
428                                         if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
429                                         else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
430                                         seqPairSet.insert(seqPairString);
431                                 }
432                         }
433                 }
434                 
435                 return numSeqs;
436         }
437         catch(exception& e) {
438                 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
439                 exit(1);
440         }
441 }
442 //***************************************************************************************************************
443 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
444         try {
445                                                 
446                 int lNumSeqs = seqMap.size();
447                 int pNumSeqs = 0;
448                 
449                 ifstream phylipFile;
450                 m->openInputFile(distFile, phylipFile);
451                 phylipFile >> pNumSeqs;
452                 if(pNumSeqs != lNumSeqs){       m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
453                 
454                 string seqName;
455                 double distance;
456                 vector<int> otuIndices(lNumSeqs, -1);
457                 
458                 truePositives = 0;
459                 falsePositives = 0;
460                 trueNegatives = 0;
461                 falseNegatives = 0;
462                 
463                 if(getCutoff == 1){
464                         if(label != "unique"){
465                                 origCutoff = label;
466                                 convert(label, cutoff);
467                                 if(hard == 0){  cutoff += (0.49 / double(precision));   }               
468                         }
469                         else{
470                                 origCutoff = "unique";
471                                 cutoff = 0.0000;
472                         }
473                 }
474                 
475                 m->mothurOut(label); m->mothurOutEndLine();
476                 
477                 for(int i=0;i<pNumSeqs;i++){
478                         
479                         if (m->control_pressed) { return 0; }
480                         
481                         phylipFile >> seqName;
482                         otuIndices[i] = seqMap[seqName];
483                         
484                         for(int j=0;j<i;j++){
485                                 phylipFile >> distance;
486                                 
487                                 if(distance <= cutoff){
488                                         if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
489                                         else                                                            {       falseNegatives++;       }
490                                 }
491                                 else{
492                                         if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
493                                         else                                                            {       trueNegatives++;        }
494                                 }
495                         }
496             
497             if (square) { m->getline(phylipFile); } //get rest of line - redundant distances
498             m->gobble(phylipFile);
499                 }
500                 phylipFile.close();
501                 
502                 outputStatistics(label, origCutoff);
503                 
504                 return 0;
505         }
506         catch(exception& e) {
507                 m->errorOut(e, "SensSpecCommand", "process");
508                 exit(1);
509         }
510 }
511 //***************************************************************************************************************
512 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
513         try {
514                 int numDists = (numSeqs * (numSeqs-1) / 2);
515                 
516                 ifstream columnFile;
517                 m->openInputFile(distFile, columnFile);
518                 string seqNameA, seqNameB, seqPairString;
519                 double distance;
520                 
521                 truePositives = 0;
522                 falsePositives = 0;
523                 trueNegatives = numDists;
524                 falseNegatives = 0;
525                 
526                 if(getCutoff == 1){
527                         if(label != "unique"){
528                                 origCutoff = label;
529                                 convert(label, cutoff);
530                                 if(hard == 0){  cutoff += (0.49 / double(precision));   }               
531                         }
532                         else{
533                                 origCutoff = "unique";
534                                 cutoff = 0.0000;
535                         }
536                 }
537                 
538                 m->mothurOut(label); m->mothurOutEndLine();
539                 
540                 while(columnFile){
541                         columnFile >> seqNameA >> seqNameB >> distance;
542                         if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
543                         else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
544                         
545                         set<string>::iterator it = seqPairSet.find(seqPairString);
546                         
547                         if(distance <= cutoff){
548                                 if(it != seqPairSet.end()){
549                                         truePositives++;
550                                         seqPairSet.erase(it);   
551                                 }
552                                 else{
553                                         falseNegatives++;
554                                 }
555                                 trueNegatives--;
556                         }
557                         else if(it != seqPairSet.end()){        
558                                 falsePositives++;
559                                 trueNegatives--;
560                                 seqPairSet.erase(it);   
561                         }
562                         
563                         m->gobble(columnFile);
564                 }
565                 falsePositives += seqPairSet.size();
566                 
567                 outputStatistics(label, origCutoff);
568                 
569                                 
570                 return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "SensSpecCommand", "process");
574                 exit(1);
575         }
576 }
577 //***************************************************************************************************************
578
579 int SensSpecCommand::processColumn(){
580         try{            
581                 string origCutoff = "";
582                 bool getCutoff = 0;
583                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
584                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
585                 
586                 set<string> seqPairSet;
587                 int numSeqs = 0;
588                 
589                 InputData input(listFile, "list");
590                 ListVector* list = input.getListVector();
591                 string lastLabel = list->getLabel();
592                 
593                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
594                 set<string> processedLabels;
595                 set<string> userLabels = labels;
596                 
597                 
598                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
599                         
600                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  delete list;  return 0;  }
601                         
602                         if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
603                                 
604                                 processedLabels.insert(list->getLabel());
605                                 userLabels.erase(list->getLabel());
606                                 
607                                 //process
608                                 numSeqs = fillSeqPairSet(seqPairSet, list);
609                                 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
610                         }
611                         
612                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
613                                 string saveLabel = list->getLabel();
614                                 
615                                 delete list;
616                                 list = input.getListVector(lastLabel);
617                                 
618                                 processedLabels.insert(list->getLabel());
619                                 userLabels.erase(list->getLabel());
620                                 
621                                 //process
622                                 numSeqs = fillSeqPairSet(seqPairSet, list);
623                                 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
624                                 
625                                 //restore real lastlabel to save below
626                                 list->setLabel(saveLabel);
627                         }               
628                         
629                         lastLabel = list->getLabel();                   
630                         
631                         delete list;
632                         list = input.getListVector();
633                 }
634                 
635                 
636                 //output error messages about any remaining user labels
637                 set<string>::iterator it;
638                 bool needToRun = false;
639                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
640                         m->mothurOut("Your file does not include the label " + *it); 
641                         if (processedLabels.count(lastLabel) != 1) {
642                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
643                                 needToRun = true;
644                         }else {
645                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
646                         }
647                 }
648                 
649                 //run last label if you need to
650                 if (needToRun == true)  {
651                         if (list != NULL) {     delete list;    }
652                         list = input.getListVector(lastLabel);
653                         
654                         //process
655                         numSeqs = fillSeqPairSet(seqPairSet, list);
656                         delete list;
657                         process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
658                 }
659                 
660                 return 0;
661         }
662         catch(exception& e) {
663                 m->errorOut(e, "SensSpecCommand", "processColumn");
664                 exit(1);
665         }
666 }
667
668 //***************************************************************************************************************
669
670 void SensSpecCommand::setUpOutput(){
671         try{            
672                 ofstream sensSpecFile;
673                 m->openOutputFile(sensSpecFileName, sensSpecFile);
674                 
675                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
676
677                 sensSpecFile.close();
678         }
679         catch(exception& e) {
680                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
681                 exit(1);
682         }
683 }
684
685 //***************************************************************************************************************
686
687 void SensSpecCommand::outputStatistics(string label, string cutoff){
688         try{            
689                 double tp = (double) truePositives;
690                 double fp = (double) falsePositives;
691                 double tn = (double) trueNegatives;
692                 double fn = (double) falseNegatives;
693                 
694                 double p = tp + fn;
695                 double n = fp + tn;
696                 double pPrime = tp + fp;
697                 double nPrime = tn + fn;
698                 
699                 double sensitivity = tp / p;    
700                 double specificity = tn / n;
701                 double positivePredictiveValue = tp / pPrime;
702                 double negativePredictiveValue = tn / nPrime;
703                 double falseDiscoveryRate = fp / pPrime;
704                 
705                 double accuracy = (tp + tn) / (p + n);
706                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
707                 double f1Score = 2.0 * tp / (p + pPrime);
708                 
709                 
710                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
711                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
712                 if(p + n == 0)          {       accuracy = 0;                                                           }
713                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
714                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
715                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
716                 
717                 ofstream sensSpecFile;
718                 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
719                 
720                 sensSpecFile << label << '\t' << cutoff << '\t';
721                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
722                 sensSpecFile << setprecision(4);
723                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
724                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
725                 
726                 sensSpecFile.close();
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
730                 exit(1);
731         }
732 }
733 //***************************************************************************************************************
734
735 string SensSpecCommand::preProcessList(){
736     try {
737         set<string> uniqueNames;
738         //get unique names from distance file
739         if (format == "phylip") {
740             
741             ifstream phylipFile;
742             m->openInputFile(distFile, phylipFile);
743             string numTest;
744             int pNumSeqs;
745                         phylipFile >> numTest; m->gobble(phylipFile);
746                         
747                         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
748             else {
749                 m->mothurConvert(numTest, pNumSeqs);
750             }
751             
752             string seqName;
753             for(int i=0;i<pNumSeqs;i++){
754                 if (m->control_pressed) { return ""; }
755                 phylipFile >> seqName;  m->getline(phylipFile);  m->gobble(phylipFile);
756                 uniqueNames.insert(seqName);
757             }
758             phylipFile.close();
759         }else {
760             ifstream columnFile;
761             m->openInputFile(distFile, columnFile);
762             string seqNameA, seqNameB;
763             double distance;
764             
765             while(columnFile){
766                 if (m->control_pressed) { return ""; }
767                 columnFile >> seqNameA >> seqNameB >> distance;
768                 uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
769                 m->gobble(columnFile);
770             }
771             columnFile.close();
772         }
773         
774         //read list file, if numSeqs > unique names then remove redundant names
775         string newListFile = listFile + ".temp";
776         ofstream out;
777         m->openOutputFile(newListFile, out);
778         ifstream in;
779                 m->openInputFile(listFile, in);
780                 
781                 bool wroteSomething = false;
782                 
783                 while(!in.eof()){
784                         
785                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
786             
787                         //read in list vector
788                         ListVector list(in);
789             
790             //listfile is already unique
791             if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
792                         
793                         //make a new list vector
794                         ListVector newList;
795                         newList.setLabel(list.getLabel());
796                         
797                         //for each bin
798                         for (int i = 0; i < list.getNumBins(); i++) {
799                 
800                                 //parse out names that are in accnos file
801                                 string binnames = list.get(i);
802                 vector<string> bnames;
803                 m->splitAtComma(binnames, bnames);
804                                 
805                                 string newNames = "";
806                 for (int j = 0; j < bnames.size(); j++) {
807                                         string name = bnames[j];
808                                         //if that name is in the .accnos file, add it
809                                         if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
810                                 }
811                 
812                                 //if there are names in this bin add to new list
813                                 if (newNames != "") { 
814                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
815                                         newList.push_back(newNames);    
816                                 }
817                         }
818             
819                         //print new listvector
820                         if (newList.getNumBins() != 0) {
821                                 wroteSomething = true;
822                                 newList.print(out);
823                         }
824                         
825                         m->gobble(in);
826                 }
827                 in.close();     
828                 out.close();
829
830         if (wroteSomething) { return newListFile; }
831         else { m->mothurRemove(newListFile); }
832         
833         return ""; 
834     }
835     catch(exception& e) {
836         m->errorOut(e, "SensSpecCommand", "preProcessList");
837         exit(1);
838     }
839 }
840
841
842 //***************************************************************************************************************
843
844
845