5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17 //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
18 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SensSpecCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SensSpecCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The sens.spec command....\n";
43 m->errorOut(e, "SensSpecCommand", "getHelpString");
47 //**********************************************************************************************************************
48 SensSpecCommand::SensSpecCommand(){
50 abort = true; calledHelp = true;
52 vector<string> tempOutNames;
53 outputTypes["sensspec"] = tempOutNames;
56 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
60 //***************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(string option) {
65 abort = false; calledHelp = false;
68 //allow user to run help
69 if(option == "help") { help(); abort = true; calledHelp = true; }
70 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
75 vector<string> myArray = setParameters();
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
81 map<string,string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
88 //initialize outputTypes
89 vector<string> tempOutNames;
90 outputTypes["sensspec"] = tempOutNames;
92 //if the user changes the input directory command factory will send this info to us in the output parameter
93 string inputDir = validParameter.validFile(parameters, "inputdir", false);
94 if (inputDir == "not found"){ inputDir = ""; }
97 it = parameters.find("list");
98 //user has given a template file
99 if(it != parameters.end()){
100 path = m->hasPath(it->second);
101 //if the user has not given a path then, add inputdir. else leave path alone.
102 if (path == "") { parameters["list"] = inputDir + it->second; }
105 it = parameters.find("phylip");
106 //user has given a template file
107 if(it != parameters.end()){
108 path = m->hasPath(it->second);
109 //if the user has not given a path then, add inputdir. else leave path alone.
110 if (path == "") { parameters["phylip"] = inputDir + it->second; }
113 it = parameters.find("column");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["column"] = inputDir + it->second; }
121 //it = parameters.find("name");
122 //user has given a template file
123 //if(it != parameters.end()){
124 //path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 //if (path == "") { parameters["name"] = inputDir + it->second; }
130 //check for required parameters
131 listFile = validParameter.validFile(parameters, "list", true);
132 if (listFile == "not found") {
133 listFile = m->getListFile();
134 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
135 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
137 else if (listFile == "not open") { abort = true; }
138 else { m->setListFile(listFile); }
140 phylipfile = validParameter.validFile(parameters, "phylip", true);
141 if (phylipfile == "not found") { phylipfile = ""; }
142 else if (phylipfile == "not open") { abort = true; }
143 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
145 columnfile = validParameter.validFile(parameters, "column", true);
146 if (columnfile == "not found") { columnfile = ""; }
147 else if (columnfile == "not open") { abort = true; }
148 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
150 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
151 //give priority to column, then phylip
152 columnfile = m->getColumnFile();
153 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
155 phylipfile = m->getPhylipFile();
156 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
158 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
162 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
165 //if the user changes the output directory command factory will send this info to us in the output parameter
166 outputDir = validParameter.validFile(parameters, "outputdir", false);
167 if (outputDir == "not found"){
169 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
172 //check for optional parameter and set defaults
173 // ...at some point should added some additional type checking...
174 temp = validParameter.validFile(parameters, "hard", false);
175 if (temp == "not found"){ hard = 0; }
176 else if(!m->isTrue(temp)) { hard = 0; }
177 else if(m->isTrue(temp)) { hard = 1; }
179 // temp = validParameter.validFile(parameters, "name", true);
180 // if (temp == "not found") { nameFile = ""; }
181 // else if(temp == "not open") { abort = true; }
182 // else { nameFile = temp; }
183 // cout << "name:\t" << nameFile << endl;
185 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
186 convert(temp, cutoff);
187 // cout << cutoff << endl;
189 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
190 convert(temp, precision);
191 // cout << precision << endl;
193 string label = validParameter.validFile(parameters, "label", false);
194 if (label == "not found") { label = ""; }
196 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
197 else { allLines = 1; }
200 sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + "sensspec";
203 catch(exception& e) {
204 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
208 //***************************************************************************************************************
210 int SensSpecCommand::execute(){
212 if (abort == true) { if (calledHelp) { return 0; } return 2; }
215 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
216 if(format == "phylip") { processPhylip(); }
217 else if(format == "column") { processColumn(); }
219 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
221 m->mothurOutEndLine();
222 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
223 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
224 m->mothurOutEndLine();
229 catch(exception& e) {
230 m->errorOut(e, "SensSpecCommand", "execute");
235 //***************************************************************************************************************
237 int SensSpecCommand::processPhylip(){
239 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
240 string origCutoff = "";
242 if(cutoff == -1.00) { getCutoff = 1; }
243 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
245 map<string, int> seqMap;
248 InputData input(listFile, "list");
249 ListVector* list = input.getListVector();
250 string lastLabel = list->getLabel();
252 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
253 set<string> processedLabels;
254 set<string> userLabels = labels;
257 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
259 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
261 if(allLines == 1 || labels.count(list->getLabel()) == 1){
263 processedLabels.insert(list->getLabel());
264 userLabels.erase(list->getLabel());
267 fillSeqMap(seqMap, list);
268 process(seqMap, list->getLabel(), getCutoff, origCutoff);
271 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
272 string saveLabel = list->getLabel();
275 list = input.getListVector(lastLabel);
277 processedLabels.insert(list->getLabel());
278 userLabels.erase(list->getLabel());
281 fillSeqMap(seqMap, list);
282 process(seqMap, list->getLabel(), getCutoff, origCutoff);
284 //restore real lastlabel to save below
285 list->setLabel(saveLabel);
288 lastLabel = list->getLabel();
291 list = input.getListVector();
295 //output error messages about any remaining user labels
296 set<string>::iterator it;
297 bool needToRun = false;
298 for (it = userLabels.begin(); it != userLabels.end(); it++) {
299 m->mothurOut("Your file does not include the label " + *it);
300 if (processedLabels.count(lastLabel) != 1) {
301 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
304 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
308 //run last label if you need to
309 if (needToRun == true) {
310 if (list != NULL) { delete list; }
311 list = input.getListVector(lastLabel);
314 fillSeqMap(seqMap, list);
315 process(seqMap, list->getLabel(), getCutoff, origCutoff);
322 catch(exception& e) {
323 m->errorOut(e, "SensSpecCommand", "processPhylip");
327 //***************************************************************************************************************
328 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
331 for(int i=0;i<list->getNumBins();i++){
333 if (m->control_pressed) { return 0; }
335 string seqList = list->get(i);
336 int seqListLength = seqList.length();
339 //parse bin by name, mapping each name to its otu number
340 for(int j=0;j<seqListLength;j++){
342 if(seqList[j] == ','){
347 seqName += seqList[j];
356 catch(exception& e) {
357 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
361 //***************************************************************************************************************
362 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
367 for(int i=0;i<list->getNumBins();i++){
369 if (m->control_pressed) { return 0; }
371 vector<string> seqNameVector;
372 string bin = list->get(i);
373 m->splitAtComma(bin, seqNameVector);
375 numSeqs += seqNameVector.size();
377 for(int j=0;j<seqNameVector.size();j++){
378 string seqPairString = "";
379 for(int k=0;k<j;k++){
380 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
381 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
382 seqPairSet.insert(seqPairString);
389 catch(exception& e) {
390 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
394 //***************************************************************************************************************
395 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
398 int lNumSeqs = seqMap.size();
402 m->openInputFile(distFile, phylipFile);
403 phylipFile >> pNumSeqs;
404 if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); m->control_pressed = true; }
408 vector<int> otuIndices(lNumSeqs, -1);
416 if(label != "unique"){
418 convert(label, cutoff);
419 if(hard == 0){ cutoff += (0.49 / double(precision)); }
422 origCutoff = "unique";
427 m->mothurOut(label); m->mothurOutEndLine();
429 for(int i=0;i<lNumSeqs;i++){
431 if (m->control_pressed) { return 0; }
433 phylipFile >> seqName;
434 otuIndices[i] = seqMap[seqName];
436 for(int j=0;j<i;j++){
437 phylipFile >> distance;
439 if(distance <= cutoff){
440 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
441 else { falseNegatives++; }
444 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
445 else { trueNegatives++; }
451 outputStatistics(label, origCutoff);
455 catch(exception& e) {
456 m->errorOut(e, "SensSpecCommand", "process");
460 //***************************************************************************************************************
461 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
463 int numDists = (numSeqs * (numSeqs-1) / 2);
466 m->openInputFile(distFile, columnFile);
467 string seqNameA, seqNameB, seqPairString;
472 trueNegatives = numDists;
476 if(label != "unique"){
478 convert(label, cutoff);
479 if(hard == 0){ cutoff += (0.49 / double(precision)); }
482 origCutoff = "unique";
487 m->mothurOut(label); m->mothurOutEndLine();
490 columnFile >> seqNameA >> seqNameB >> distance;
491 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
492 else { seqPairString = seqNameB + '\t' + seqNameA; }
494 set<string>::iterator it = seqPairSet.find(seqPairString);
496 if(distance <= cutoff){
497 if(it != seqPairSet.end()){
499 seqPairSet.erase(it);
506 else if(it != seqPairSet.end()){
509 seqPairSet.erase(it);
512 m->gobble(columnFile);
514 falsePositives += seqPairSet.size();
516 outputStatistics(label, origCutoff);
521 catch(exception& e) {
522 m->errorOut(e, "SensSpecCommand", "process");
526 //***************************************************************************************************************
528 int SensSpecCommand::processColumn(){
530 string origCutoff = "";
532 if(cutoff == -1.00) { getCutoff = 1; }
533 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
535 set<string> seqPairSet;
538 InputData input(listFile, "list");
539 ListVector* list = input.getListVector();
540 string lastLabel = list->getLabel();
542 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
543 set<string> processedLabels;
544 set<string> userLabels = labels;
547 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
549 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
551 if(allLines == 1 || labels.count(list->getLabel()) == 1){
553 processedLabels.insert(list->getLabel());
554 userLabels.erase(list->getLabel());
557 numSeqs = fillSeqPairSet(seqPairSet, list);
558 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
561 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
562 string saveLabel = list->getLabel();
565 list = input.getListVector(lastLabel);
567 processedLabels.insert(list->getLabel());
568 userLabels.erase(list->getLabel());
571 numSeqs = fillSeqPairSet(seqPairSet, list);
572 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
574 //restore real lastlabel to save below
575 list->setLabel(saveLabel);
578 lastLabel = list->getLabel();
581 list = input.getListVector();
585 //output error messages about any remaining user labels
586 set<string>::iterator it;
587 bool needToRun = false;
588 for (it = userLabels.begin(); it != userLabels.end(); it++) {
589 m->mothurOut("Your file does not include the label " + *it);
590 if (processedLabels.count(lastLabel) != 1) {
591 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
594 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
598 //run last label if you need to
599 if (needToRun == true) {
600 if (list != NULL) { delete list; }
601 list = input.getListVector(lastLabel);
604 numSeqs = fillSeqPairSet(seqPairSet, list);
606 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
611 catch(exception& e) {
612 m->errorOut(e, "SensSpecCommand", "processColumn");
617 //***************************************************************************************************************
619 void SensSpecCommand::setUpOutput(){
621 ofstream sensSpecFile;
622 m->openOutputFile(sensSpecFileName, sensSpecFile);
624 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
626 sensSpecFile.close();
628 catch(exception& e) {
629 m->errorOut(e, "SensSpecCommand", "setUpOutput");
634 //***************************************************************************************************************
636 void SensSpecCommand::outputStatistics(string label, string cutoff){
638 double tp = (double) truePositives;
639 double fp = (double) falsePositives;
640 double tn = (double) trueNegatives;
641 double fn = (double) falseNegatives;
645 double pPrime = tp + fp;
646 double nPrime = tn + fn;
648 double sensitivity = tp / p;
649 double specificity = tn / n;
650 double positivePredictiveValue = tp / pPrime;
651 double negativePredictiveValue = tn / nPrime;
652 double falseDiscoveryRate = fp / pPrime;
654 double accuracy = (tp + tn) / (p + n);
655 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
656 double f1Score = 2.0 * tp / (p + pPrime);
659 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
660 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
661 if(p + n == 0) { accuracy = 0; }
662 if(p + pPrime == 0) { f1Score = 0; }
663 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
664 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
666 ofstream sensSpecFile;
667 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
669 sensSpecFile << label << '\t' << cutoff << '\t';
670 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
671 sensSpecFile << setprecision(4);
672 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
673 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
675 sensSpecFile.close();
677 catch(exception& e) {
678 m->errorOut(e, "SensSpecCommand", "outputStatistics");
683 //***************************************************************************************************************