]> git.donarmstrong.com Git - mothur.git/blob - sensspeccommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / sensspeccommand.cpp
1 /*
2  *  sensspeccommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/6/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "sensspeccommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){        
14         try {
15                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
16                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
17                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
18                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19                 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
20                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21                 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "SensSpecCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string SensSpecCommand::getHelpString(){        
36         try {
37                 string helpString = "";
38                 helpString += "The sens.spec command....\n";
39                 return helpString;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "SensSpecCommand", "getHelpString");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 string SensSpecCommand::getOutputPattern(string type) {
48     try {
49         string pattern = "";
50         
51         if (type == "sensspec") {  pattern = "[filename],sensspec"; } 
52         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
53         
54         return pattern;
55     }
56     catch(exception& e) {
57         m->errorOut(e, "SensSpecCommand", "getOutputPattern");
58         exit(1);
59     }
60 }
61 //**********************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(){     
63         try {
64                 abort = true; calledHelp = true; 
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["sensspec"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
71                 exit(1);
72         }
73 }
74 //***************************************************************************************************************
75
76 SensSpecCommand::SensSpecCommand(string option)  {
77         try {
78                 
79                 abort = false; calledHelp = false;  
80                 allLines = 1;
81                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; calledHelp = true; }
84                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85                 
86                 else {
87                         string temp;
88                         
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         map<string,string>::iterator it;
96                         
97                         //check to make sure all parameters are valid for command
98                         for (it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["sensspec"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("list");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("phylip");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
125                                 }
126                                 
127                                 it = parameters.find("column");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
133                                 }                               
134                         }
135                         //check for required parameters
136                         listFile = validParameter.validFile(parameters, "list", true);
137                         if (listFile == "not found") {          
138                                 listFile = m->getListFile(); 
139                                 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
140                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
141                         }
142                         else if (listFile == "not open") { abort = true; }      
143                         else { m->setListFile(listFile); }
144                         
145                         phylipfile = validParameter.validFile(parameters, "phylip", true);
146                         if (phylipfile == "not found") { phylipfile = "";  }
147                         else if (phylipfile == "not open") { abort = true; }    
148                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);  }
149                         
150                         columnfile = validParameter.validFile(parameters, "column", true);
151                         if (columnfile == "not found") { columnfile = ""; }
152                         else if (columnfile == "not open") { abort = true; }    
153                         else { distFile = columnfile; format = "column";   m->setColumnFile(columnfile); }
154                         
155                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
156                                 //give priority to column, then phylip
157                                 columnfile = m->getColumnFile(); 
158                                 if (columnfile != "") {  distFile = columnfile; format = "column";  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
159                                 else { 
160                                         phylipfile = m->getPhylipFile(); 
161                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
162                                         else { 
163                                                 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
164                                                 abort = true;
165                                         }
166                                 }
167                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
168                         
169                         
170                         //if the user changes the output directory command factory will send this info to us in the output parameter 
171                         outputDir = validParameter.validFile(parameters, "outputdir", false);
172                         if (outputDir == "not found"){  
173                                 outputDir = ""; 
174                                 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it        
175                         }
176
177                         //check for optional parameter and set defaults
178                         // ...at some point should added some additional type checking...
179                         temp = validParameter.validFile(parameters, "hard", false);
180                         if (temp == "not found"){       hard = 0;       }
181                         else if(!m->isTrue(temp))       {       hard = 0;       }
182                         else if(m->isTrue(temp))        {       hard = 1;       }
183                         
184                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
185                         m->mothurConvert(temp, cutoff);  
186 //                      cout << cutoff << endl;
187                         
188                         temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
189                         m->mothurConvert(temp, precision);  
190 //                      cout << precision << endl;
191                         
192                         string label = validParameter.validFile(parameters, "label", false);                    
193                         if (label == "not found") { label = ""; }
194                         else { 
195                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
196                                 else { allLines = 1;  }
197                         }
198                         
199             map<string, string> variables; 
200             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
201                         sensSpecFileName = getOutputFileName("sensspec",variables);
202                 }
203         }
204         catch(exception& e) {
205                 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
206                 exit(1);
207         }
208 }
209 //***************************************************************************************************************
210
211 int SensSpecCommand::execute(){
212         try{
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214         
215         int startTime = time(NULL);
216         
217         //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
218         string newListFile = preProcessList();
219         if (newListFile != "") { listFile = newListFile; }
220         
221                 setUpOutput();
222                 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
223                 if(format == "phylip")          {       processPhylip();        }
224                 else if(format == "column")     {       processColumn();        }
225                 
226         //remove temp file if created
227         if (newListFile != "") { m->mothurRemove(newListFile); }
228         
229                 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
230         
231         m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
232         
233                 m->mothurOutEndLine();
234                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235                 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();  
236                 m->mothurOutEndLine();
237                 
238                 
239                 return 0;       
240         }
241         catch(exception& e) {
242                 m->errorOut(e, "SensSpecCommand", "execute");
243                 exit(1);
244         }
245 }
246
247 //***************************************************************************************************************
248
249 int SensSpecCommand::processPhylip(){
250         try{
251                 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
252                 string origCutoff = "";
253                 bool getCutoff = 0;
254                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
255                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }               
256                 
257                 map<string, int> seqMap;
258                 string seqList;
259                 
260                 InputData input(listFile, "list");
261                 ListVector* list = input.getListVector();
262                 string lastLabel = list->getLabel();
263                 
264                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
265                 set<string> processedLabels;
266                 set<string> userLabels = labels;
267                 
268                 
269                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
270                         
271                         if(m->control_pressed){
272                 for (int i = 0; i < outputNames.size(); i++){   m->mothurRemove(outputNames[i]);  }  delete list;  return 0;
273             }
274                         
275                         if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
276                                 
277                                 processedLabels.insert(list->getLabel());
278                                 userLabels.erase(list->getLabel());
279                                 
280                                 //process
281                                 fillSeqMap(seqMap, list);
282                                 process(seqMap, list->getLabel(), getCutoff, origCutoff);
283                         }
284                         
285                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
286                                 string saveLabel = list->getLabel();
287                                 
288                                 delete list;
289                                 list = input.getListVector(lastLabel);
290                                 
291                                 processedLabels.insert(list->getLabel());
292                                 userLabels.erase(list->getLabel());
293                                 
294                                 //process
295                                 fillSeqMap(seqMap, list);
296                                 process(seqMap, list->getLabel(), getCutoff, origCutoff);
297                                 
298                                 //restore real lastlabel to save below
299                                 list->setLabel(saveLabel);
300                         }               
301                         
302                         lastLabel = list->getLabel();                   
303                         
304                         delete list;
305                         list = input.getListVector();
306                 }
307                 
308                 
309                 //output error messages about any remaining user labels
310                 set<string>::iterator it;
311                 bool needToRun = false;
312                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
313                         m->mothurOut("Your file does not include the label " + *it); 
314                         if (processedLabels.count(lastLabel) != 1) {
315                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
316                                 needToRun = true;
317                         }else {
318                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
319                         }
320                 }
321                 
322                 //run last label if you need to
323                 if (needToRun == true)  {
324                         if (list != NULL) {     delete list;    }
325                         list = input.getListVector(lastLabel);
326                         
327                         //process
328                         fillSeqMap(seqMap, list);
329                         process(seqMap, list->getLabel(), getCutoff, origCutoff);
330                         
331                         delete list;
332                 }
333                 
334                 return 0;
335         }
336         catch(exception& e) {
337                 m->errorOut(e, "SensSpecCommand", "processPhylip");
338                 exit(1);
339         }
340 }
341
342 //***************************************************************************************************************
343
344 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
345         try {
346                 //for each otu
347                 for(int i=0;i<list->getNumBins();i++){
348                         
349                         if (m->control_pressed) { return 0; }
350                         
351                         string seqList = list->get(i);
352                         int seqListLength = seqList.length();
353                         string seqName = "";
354                         
355                         //parse bin by name, mapping each name to its otu number
356                         for(int j=0;j<seqListLength;j++){
357                                 
358                                 if(seqList[j] == ','){
359                                         seqMap[seqName] = i;
360                                         seqName = "";
361                                 }
362                                 else{
363                                         seqName += seqList[j];
364                                 }
365                                 
366                         }
367                         seqMap[seqName] = i;
368                 }
369                 
370                 return 0;
371         }
372         catch(exception& e) {
373                 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
374                 exit(1);
375         }
376 }
377 //***************************************************************************************************************
378 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
379         try {
380                 int numSeqs = 0;
381                 
382                 //for each otu
383                 for(int i=0;i<list->getNumBins();i++){
384                         
385                         if (m->control_pressed) { return 0; }
386                         
387                         vector<string> seqNameVector;
388                         string bin = list->get(i);
389                         m->splitAtComma(bin, seqNameVector);
390                         
391                         numSeqs += seqNameVector.size();
392                         
393                         for(int j=0;j<seqNameVector.size();j++){
394                                 string seqPairString = "";                              
395                                 for(int k=0;k<j;k++){
396                                         if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
397                                         else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
398                                         seqPairSet.insert(seqPairString);
399                                 }
400                         }
401                 }
402                 
403                 return numSeqs;
404         }
405         catch(exception& e) {
406                 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
407                 exit(1);
408         }
409 }
410 //***************************************************************************************************************
411 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
412         try {
413                                                 
414                 int lNumSeqs = seqMap.size();
415                 int pNumSeqs = 0;
416                 
417                 ifstream phylipFile;
418                 m->openInputFile(distFile, phylipFile);
419                 phylipFile >> pNumSeqs;
420                 if(pNumSeqs != lNumSeqs){       m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
421                 
422                 string seqName;
423                 double distance;
424                 vector<int> otuIndices(lNumSeqs, -1);
425                 
426                 truePositives = 0;
427                 falsePositives = 0;
428                 trueNegatives = 0;
429                 falseNegatives = 0;
430                 
431                 if(getCutoff == 1){
432                         if(label != "unique"){
433                                 origCutoff = label;
434                                 convert(label, cutoff);
435                                 if(hard == 0){  cutoff += (0.49 / double(precision));   }               
436                         }
437                         else{
438                                 origCutoff = "unique";
439                                 cutoff = 0.0000;
440                         }
441                 }
442                 
443                 m->mothurOut(label); m->mothurOutEndLine();
444                 
445                 for(int i=0;i<pNumSeqs;i++){
446                         
447                         if (m->control_pressed) { return 0; }
448                         
449                         phylipFile >> seqName;
450                         otuIndices[i] = seqMap[seqName];
451                         
452                         for(int j=0;j<i;j++){
453                                 phylipFile >> distance;
454                                 
455                                 if(distance <= cutoff){
456                                         if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
457                                         else                                                            {       falseNegatives++;       }
458                                 }
459                                 else{
460                                         if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
461                                         else                                                            {       trueNegatives++;        }
462                                 }
463                         }
464                 }
465                 phylipFile.close();
466                 
467                 outputStatistics(label, origCutoff);
468                 
469                 return 0;
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "SensSpecCommand", "process");
473                 exit(1);
474         }
475 }
476 //***************************************************************************************************************
477 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
478         try {
479                 int numDists = (numSeqs * (numSeqs-1) / 2);
480                 
481                 ifstream columnFile;
482                 m->openInputFile(distFile, columnFile);
483                 string seqNameA, seqNameB, seqPairString;
484                 double distance;
485                 
486                 truePositives = 0;
487                 falsePositives = 0;
488                 trueNegatives = numDists;
489                 falseNegatives = 0;
490                 
491                 if(getCutoff == 1){
492                         if(label != "unique"){
493                                 origCutoff = label;
494                                 convert(label, cutoff);
495                                 if(hard == 0){  cutoff += (0.49 / double(precision));   }               
496                         }
497                         else{
498                                 origCutoff = "unique";
499                                 cutoff = 0.0000;
500                         }
501                 }
502                 
503                 m->mothurOut(label); m->mothurOutEndLine();
504                 
505                 while(columnFile){
506                         columnFile >> seqNameA >> seqNameB >> distance;
507                         if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
508                         else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
509                         
510                         set<string>::iterator it = seqPairSet.find(seqPairString);
511                         
512                         if(distance <= cutoff){
513                                 if(it != seqPairSet.end()){
514                                         truePositives++;
515                                         seqPairSet.erase(it);   
516                                 }
517                                 else{
518                                         falseNegatives++;
519                                 }
520                                 trueNegatives--;
521                         }
522                         else if(it != seqPairSet.end()){        
523                                 falsePositives++;
524                                 trueNegatives--;
525                                 seqPairSet.erase(it);   
526                         }
527                         
528                         m->gobble(columnFile);
529                 }
530                 falsePositives += seqPairSet.size();
531                 
532                 outputStatistics(label, origCutoff);
533                 
534                                 
535                 return 0;
536         }
537         catch(exception& e) {
538                 m->errorOut(e, "SensSpecCommand", "process");
539                 exit(1);
540         }
541 }
542 //***************************************************************************************************************
543
544 int SensSpecCommand::processColumn(){
545         try{            
546                 string origCutoff = "";
547                 bool getCutoff = 0;
548                 if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
549                 else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
550                 
551                 set<string> seqPairSet;
552                 int numSeqs = 0;
553                 
554                 InputData input(listFile, "list");
555                 ListVector* list = input.getListVector();
556                 string lastLabel = list->getLabel();
557                 
558                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
559                 set<string> processedLabels;
560                 set<string> userLabels = labels;
561                 
562                 
563                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
564                         
565                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  delete list;  return 0;  }
566                         
567                         if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
568                                 
569                                 processedLabels.insert(list->getLabel());
570                                 userLabels.erase(list->getLabel());
571                                 
572                                 //process
573                                 numSeqs = fillSeqPairSet(seqPairSet, list);
574                                 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
575                         }
576                         
577                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
578                                 string saveLabel = list->getLabel();
579                                 
580                                 delete list;
581                                 list = input.getListVector(lastLabel);
582                                 
583                                 processedLabels.insert(list->getLabel());
584                                 userLabels.erase(list->getLabel());
585                                 
586                                 //process
587                                 numSeqs = fillSeqPairSet(seqPairSet, list);
588                                 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
589                                 
590                                 //restore real lastlabel to save below
591                                 list->setLabel(saveLabel);
592                         }               
593                         
594                         lastLabel = list->getLabel();                   
595                         
596                         delete list;
597                         list = input.getListVector();
598                 }
599                 
600                 
601                 //output error messages about any remaining user labels
602                 set<string>::iterator it;
603                 bool needToRun = false;
604                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
605                         m->mothurOut("Your file does not include the label " + *it); 
606                         if (processedLabels.count(lastLabel) != 1) {
607                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
608                                 needToRun = true;
609                         }else {
610                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
611                         }
612                 }
613                 
614                 //run last label if you need to
615                 if (needToRun == true)  {
616                         if (list != NULL) {     delete list;    }
617                         list = input.getListVector(lastLabel);
618                         
619                         //process
620                         numSeqs = fillSeqPairSet(seqPairSet, list);
621                         delete list;
622                         process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
623                 }
624                 
625                 return 0;
626         }
627         catch(exception& e) {
628                 m->errorOut(e, "SensSpecCommand", "processColumn");
629                 exit(1);
630         }
631 }
632
633 //***************************************************************************************************************
634
635 void SensSpecCommand::setUpOutput(){
636         try{            
637                 ofstream sensSpecFile;
638                 m->openOutputFile(sensSpecFileName, sensSpecFile);
639                 
640                 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
641
642                 sensSpecFile.close();
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "SensSpecCommand", "setUpOutput");
646                 exit(1);
647         }
648 }
649
650 //***************************************************************************************************************
651
652 void SensSpecCommand::outputStatistics(string label, string cutoff){
653         try{            
654                 double tp = (double) truePositives;
655                 double fp = (double) falsePositives;
656                 double tn = (double) trueNegatives;
657                 double fn = (double) falseNegatives;
658                 
659                 double p = tp + fn;
660                 double n = fp + tn;
661                 double pPrime = tp + fp;
662                 double nPrime = tn + fn;
663                 
664                 double sensitivity = tp / p;    
665                 double specificity = tn / n;
666                 double positivePredictiveValue = tp / pPrime;
667                 double negativePredictiveValue = tn / nPrime;
668                 double falseDiscoveryRate = fp / pPrime;
669                 
670                 double accuracy = (tp + tn) / (p + n);
671                 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
672                 double f1Score = 2.0 * tp / (p + pPrime);
673                 
674                 
675                 if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
676                 if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
677                 if(p + n == 0)          {       accuracy = 0;                                                           }
678                 if(p + pPrime == 0)     {       f1Score = 0;                                                            }
679                 if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
680                 if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
681                 
682                 ofstream sensSpecFile;
683                 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
684                 
685                 sensSpecFile << label << '\t' << cutoff << '\t';
686                 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
687                 sensSpecFile << setprecision(4);
688                 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
689                 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
690                 
691                 sensSpecFile.close();
692         }
693         catch(exception& e) {
694                 m->errorOut(e, "SensSpecCommand", "outputStatistics");
695                 exit(1);
696         }
697 }
698 //***************************************************************************************************************
699
700 string SensSpecCommand::preProcessList(){
701     try {
702         set<string> uniqueNames;
703         //get unique names from distance file
704         if (format == "phylip") {
705             
706             ifstream phylipFile;
707             m->openInputFile(distFile, phylipFile);
708             string numTest;
709             int pNumSeqs;
710                         phylipFile >> numTest;
711                         
712                         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
713             else {
714                 m->mothurConvert(numTest, pNumSeqs);
715             }
716             phylipFile >> pNumSeqs; m->gobble(phylipFile);
717             
718             string seqName;
719             double distance;
720             
721             for(int i=0;i<pNumSeqs;i++){
722                 
723                 if (m->control_pressed) { return ""; }
724                 
725                 phylipFile >> seqName; 
726                 uniqueNames.insert(seqName);
727                 
728                 for(int j=0;j<i;j++){
729                     phylipFile >> distance;
730                 }
731                 m->gobble(phylipFile);
732             }
733             phylipFile.close();
734         }else {
735             ifstream columnFile;
736             m->openInputFile(distFile, columnFile);
737             string seqNameA, seqNameB;
738             double distance;
739             
740             while(columnFile){
741                 if (m->control_pressed) { return ""; }
742                 columnFile >> seqNameA >> seqNameB >> distance;
743                 uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
744                 m->gobble(columnFile);
745             }
746             columnFile.close();
747         }
748         
749         //read list file, if numSeqs > unique names then remove redundant names
750         string newListFile = listFile + ".temp";
751         ofstream out;
752         m->openOutputFile(newListFile, out);
753         ifstream in;
754                 m->openInputFile(listFile, in);
755                 
756                 bool wroteSomething = false;
757                 
758                 while(!in.eof()){
759                         
760                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
761             
762                         //read in list vector
763                         ListVector list(in);
764             
765             //listfile is already unique
766             if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
767                         
768                         //make a new list vector
769                         ListVector newList;
770                         newList.setLabel(list.getLabel());
771                         
772                         //for each bin
773                         for (int i = 0; i < list.getNumBins(); i++) {
774                 
775                                 //parse out names that are in accnos file
776                                 string binnames = list.get(i);
777                 vector<string> bnames;
778                 m->splitAtComma(binnames, bnames);
779                                 
780                                 string newNames = "";
781                 for (int i = 0; i < bnames.size(); i++) {
782                                         string name = bnames[i];
783                                         //if that name is in the .accnos file, add it
784                                         if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
785                                 }
786                 
787                                 //if there are names in this bin add to new list
788                                 if (newNames != "") { 
789                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
790                                         newList.push_back(newNames);    
791                                 }
792                         }
793             
794                         //print new listvector
795                         if (newList.getNumBins() != 0) {
796                                 wroteSomething = true;
797                                 newList.print(out);
798                         }
799                         
800                         m->gobble(in);
801                 }
802                 in.close();     
803                 out.close();
804
805         if (wroteSomething) { return newListFile; }
806         return ""; 
807     }
808     catch(exception& e) {
809         m->errorOut(e, "SensSpecCommand", "preProcessList");
810         exit(1);
811     }
812 }
813
814
815 //***************************************************************************************************************
816
817
818