5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
17 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
18 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "SensSpecCommand", "setParameters");
34 //**********************************************************************************************************************
35 string SensSpecCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The sens.spec command....\n";
42 m->errorOut(e, "SensSpecCommand", "getHelpString");
46 //**********************************************************************************************************************
47 string SensSpecCommand::getOutputPattern(string type) {
51 if (type == "sensspec") { pattern = "[filename],sensspec"; }
52 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
57 m->errorOut(e, "SensSpecCommand", "getOutputPattern");
61 //**********************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(){
64 abort = true; calledHelp = true;
66 vector<string> tempOutNames;
67 outputTypes["sensspec"] = tempOutNames;
70 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
74 //***************************************************************************************************************
76 SensSpecCommand::SensSpecCommand(string option) {
79 abort = false; calledHelp = false;
82 //allow user to run help
83 if(option == "help") { help(); abort = true; calledHelp = true; }
84 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89 vector<string> myArray = setParameters();
91 OptionParser parser(option);
92 map<string,string> parameters = parser.getParameters();
94 ValidParameters validParameter;
95 map<string,string>::iterator it;
97 //check to make sure all parameters are valid for command
98 for (it = parameters.begin(); it != parameters.end(); it++) {
99 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
102 //initialize outputTypes
103 vector<string> tempOutNames;
104 outputTypes["sensspec"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("list");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["list"] = inputDir + it->second; }
119 it = parameters.find("phylip");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["phylip"] = inputDir + it->second; }
127 it = parameters.find("column");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["column"] = inputDir + it->second; }
135 //check for required parameters
136 listFile = validParameter.validFile(parameters, "list", true);
137 if (listFile == "not found") {
138 listFile = m->getListFile();
139 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
140 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
142 else if (listFile == "not open") { abort = true; }
143 else { m->setListFile(listFile); }
145 phylipfile = validParameter.validFile(parameters, "phylip", true);
146 if (phylipfile == "not found") { phylipfile = ""; }
147 else if (phylipfile == "not open") { abort = true; }
148 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
150 columnfile = validParameter.validFile(parameters, "column", true);
151 if (columnfile == "not found") { columnfile = ""; }
152 else if (columnfile == "not open") { abort = true; }
153 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
155 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
156 //give priority to column, then phylip
157 columnfile = m->getColumnFile();
158 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
160 phylipfile = m->getPhylipFile();
161 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
163 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
167 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
170 //if the user changes the output directory command factory will send this info to us in the output parameter
171 outputDir = validParameter.validFile(parameters, "outputdir", false);
172 if (outputDir == "not found"){
174 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
177 //check for optional parameter and set defaults
178 // ...at some point should added some additional type checking...
179 temp = validParameter.validFile(parameters, "hard", false);
180 if (temp == "not found"){ hard = 0; }
181 else if(!m->isTrue(temp)) { hard = 0; }
182 else if(m->isTrue(temp)) { hard = 1; }
184 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
185 m->mothurConvert(temp, cutoff);
186 // cout << cutoff << endl;
188 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
189 m->mothurConvert(temp, precision);
190 // cout << precision << endl;
192 string label = validParameter.validFile(parameters, "label", false);
193 if (label == "not found") { label = ""; }
195 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
196 else { allLines = 1; }
199 map<string, string> variables;
200 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
201 sensSpecFileName = getOutputFileName("sensspec",variables);
204 catch(exception& e) {
205 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
209 //***************************************************************************************************************
211 int SensSpecCommand::execute(){
213 if (abort == true) { if (calledHelp) { return 0; } return 2; }
216 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
217 if(format == "phylip") { processPhylip(); }
218 else if(format == "column") { processColumn(); }
220 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
222 m->mothurOutEndLine();
223 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
224 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
225 m->mothurOutEndLine();
230 catch(exception& e) {
231 m->errorOut(e, "SensSpecCommand", "execute");
236 //***************************************************************************************************************
238 int SensSpecCommand::processPhylip(){
240 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
241 string origCutoff = "";
243 if(cutoff == -1.00) { getCutoff = 1; }
244 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
246 map<string, int> seqMap;
249 InputData input(listFile, "list");
250 ListVector* list = input.getListVector();
251 string lastLabel = list->getLabel();
253 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
254 set<string> processedLabels;
255 set<string> userLabels = labels;
258 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
260 if(m->control_pressed){
261 for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0;
264 if(allLines == 1 || labels.count(list->getLabel()) == 1){
266 processedLabels.insert(list->getLabel());
267 userLabels.erase(list->getLabel());
270 fillSeqMap(seqMap, list);
271 process(seqMap, list->getLabel(), getCutoff, origCutoff);
274 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
275 string saveLabel = list->getLabel();
278 list = input.getListVector(lastLabel);
280 processedLabels.insert(list->getLabel());
281 userLabels.erase(list->getLabel());
284 fillSeqMap(seqMap, list);
285 process(seqMap, list->getLabel(), getCutoff, origCutoff);
287 //restore real lastlabel to save below
288 list->setLabel(saveLabel);
291 lastLabel = list->getLabel();
294 list = input.getListVector();
298 //output error messages about any remaining user labels
299 set<string>::iterator it;
300 bool needToRun = false;
301 for (it = userLabels.begin(); it != userLabels.end(); it++) {
302 m->mothurOut("Your file does not include the label " + *it);
303 if (processedLabels.count(lastLabel) != 1) {
304 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
307 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
311 //run last label if you need to
312 if (needToRun == true) {
313 if (list != NULL) { delete list; }
314 list = input.getListVector(lastLabel);
317 fillSeqMap(seqMap, list);
318 process(seqMap, list->getLabel(), getCutoff, origCutoff);
325 catch(exception& e) {
326 m->errorOut(e, "SensSpecCommand", "processPhylip");
331 //***************************************************************************************************************
333 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
336 for(int i=0;i<list->getNumBins();i++){
338 if (m->control_pressed) { return 0; }
340 string seqList = list->get(i);
341 int seqListLength = seqList.length();
344 //parse bin by name, mapping each name to its otu number
345 for(int j=0;j<seqListLength;j++){
347 if(seqList[j] == ','){
352 seqName += seqList[j];
361 catch(exception& e) {
362 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
366 //***************************************************************************************************************
367 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
372 for(int i=0;i<list->getNumBins();i++){
374 if (m->control_pressed) { return 0; }
376 vector<string> seqNameVector;
377 string bin = list->get(i);
378 m->splitAtComma(bin, seqNameVector);
380 numSeqs += seqNameVector.size();
382 for(int j=0;j<seqNameVector.size();j++){
383 string seqPairString = "";
384 for(int k=0;k<j;k++){
385 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
386 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
387 seqPairSet.insert(seqPairString);
394 catch(exception& e) {
395 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
399 //***************************************************************************************************************
400 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
403 int lNumSeqs = seqMap.size();
407 m->openInputFile(distFile, phylipFile);
408 phylipFile >> pNumSeqs;
409 if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
413 vector<int> otuIndices(lNumSeqs, -1);
421 if(label != "unique"){
423 convert(label, cutoff);
424 if(hard == 0){ cutoff += (0.49 / double(precision)); }
427 origCutoff = "unique";
432 m->mothurOut(label); m->mothurOutEndLine();
434 for(int i=0;i<pNumSeqs;i++){
436 if (m->control_pressed) { return 0; }
438 phylipFile >> seqName;
439 otuIndices[i] = seqMap[seqName];
441 for(int j=0;j<i;j++){
442 phylipFile >> distance;
444 if(distance <= cutoff){
445 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
446 else { falseNegatives++; }
449 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
450 else { trueNegatives++; }
456 outputStatistics(label, origCutoff);
460 catch(exception& e) {
461 m->errorOut(e, "SensSpecCommand", "process");
465 //***************************************************************************************************************
466 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
468 int numDists = (numSeqs * (numSeqs-1) / 2);
471 m->openInputFile(distFile, columnFile);
472 string seqNameA, seqNameB, seqPairString;
477 trueNegatives = numDists;
481 if(label != "unique"){
483 convert(label, cutoff);
484 if(hard == 0){ cutoff += (0.49 / double(precision)); }
487 origCutoff = "unique";
492 m->mothurOut(label); m->mothurOutEndLine();
495 columnFile >> seqNameA >> seqNameB >> distance;
496 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
497 else { seqPairString = seqNameB + '\t' + seqNameA; }
499 set<string>::iterator it = seqPairSet.find(seqPairString);
501 if(distance <= cutoff){
502 if(it != seqPairSet.end()){
504 seqPairSet.erase(it);
511 else if(it != seqPairSet.end()){
514 seqPairSet.erase(it);
517 m->gobble(columnFile);
519 falsePositives += seqPairSet.size();
521 outputStatistics(label, origCutoff);
526 catch(exception& e) {
527 m->errorOut(e, "SensSpecCommand", "process");
531 //***************************************************************************************************************
533 int SensSpecCommand::processColumn(){
535 string origCutoff = "";
537 if(cutoff == -1.00) { getCutoff = 1; }
538 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
540 set<string> seqPairSet;
543 InputData input(listFile, "list");
544 ListVector* list = input.getListVector();
545 string lastLabel = list->getLabel();
547 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
548 set<string> processedLabels;
549 set<string> userLabels = labels;
552 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
554 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
556 if(allLines == 1 || labels.count(list->getLabel()) == 1){
558 processedLabels.insert(list->getLabel());
559 userLabels.erase(list->getLabel());
562 numSeqs = fillSeqPairSet(seqPairSet, list);
563 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
566 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
567 string saveLabel = list->getLabel();
570 list = input.getListVector(lastLabel);
572 processedLabels.insert(list->getLabel());
573 userLabels.erase(list->getLabel());
576 numSeqs = fillSeqPairSet(seqPairSet, list);
577 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
579 //restore real lastlabel to save below
580 list->setLabel(saveLabel);
583 lastLabel = list->getLabel();
586 list = input.getListVector();
590 //output error messages about any remaining user labels
591 set<string>::iterator it;
592 bool needToRun = false;
593 for (it = userLabels.begin(); it != userLabels.end(); it++) {
594 m->mothurOut("Your file does not include the label " + *it);
595 if (processedLabels.count(lastLabel) != 1) {
596 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
599 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
603 //run last label if you need to
604 if (needToRun == true) {
605 if (list != NULL) { delete list; }
606 list = input.getListVector(lastLabel);
609 numSeqs = fillSeqPairSet(seqPairSet, list);
611 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
616 catch(exception& e) {
617 m->errorOut(e, "SensSpecCommand", "processColumn");
622 //***************************************************************************************************************
624 void SensSpecCommand::setUpOutput(){
626 ofstream sensSpecFile;
627 m->openOutputFile(sensSpecFileName, sensSpecFile);
629 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
631 sensSpecFile.close();
633 catch(exception& e) {
634 m->errorOut(e, "SensSpecCommand", "setUpOutput");
639 //***************************************************************************************************************
641 void SensSpecCommand::outputStatistics(string label, string cutoff){
643 double tp = (double) truePositives;
644 double fp = (double) falsePositives;
645 double tn = (double) trueNegatives;
646 double fn = (double) falseNegatives;
650 double pPrime = tp + fp;
651 double nPrime = tn + fn;
653 double sensitivity = tp / p;
654 double specificity = tn / n;
655 double positivePredictiveValue = tp / pPrime;
656 double negativePredictiveValue = tn / nPrime;
657 double falseDiscoveryRate = fp / pPrime;
659 double accuracy = (tp + tn) / (p + n);
660 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
661 double f1Score = 2.0 * tp / (p + pPrime);
664 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
665 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
666 if(p + n == 0) { accuracy = 0; }
667 if(p + pPrime == 0) { f1Score = 0; }
668 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
669 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
671 ofstream sensSpecFile;
672 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
674 sensSpecFile << label << '\t' << cutoff << '\t';
675 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
676 sensSpecFile << setprecision(4);
677 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
678 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
680 sensSpecFile.close();
682 catch(exception& e) {
683 m->errorOut(e, "SensSpecCommand", "outputStatistics");
688 //***************************************************************************************************************