5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
18 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
19 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
21 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "SensSpecCommand", "setParameters");
34 //**********************************************************************************************************************
35 string SensSpecCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The sens.spec command....\n";
42 m->errorOut(e, "SensSpecCommand", "getHelpString");
46 //**********************************************************************************************************************
47 string SensSpecCommand::getOutputFileNameTag(string type, string inputName=""){
49 string outputFileName = "";
50 map<string, vector<string> >::iterator it;
52 //is this a type this command creates
53 it = outputTypes.find(type);
54 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
56 if (type == "sensspec") { outputFileName = "sensspec"; }
57 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
59 return outputFileName;
62 m->errorOut(e, "SensSpecCommand", "getOutputFileNameTag");
66 //**********************************************************************************************************************
67 SensSpecCommand::SensSpecCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["sensspec"] = tempOutNames;
75 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
79 //***************************************************************************************************************
81 SensSpecCommand::SensSpecCommand(string option) {
84 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
100 map<string,string>::iterator it;
102 //check to make sure all parameters are valid for command
103 for (it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["sensspec"] = tempOutNames;
111 //if the user changes the input directory command factory will send this info to us in the output parameter
112 string inputDir = validParameter.validFile(parameters, "inputdir", false);
113 if (inputDir == "not found"){ inputDir = ""; }
116 it = parameters.find("list");
117 //user has given a template file
118 if(it != parameters.end()){
119 path = m->hasPath(it->second);
120 //if the user has not given a path then, add inputdir. else leave path alone.
121 if (path == "") { parameters["list"] = inputDir + it->second; }
124 it = parameters.find("phylip");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["phylip"] = inputDir + it->second; }
132 it = parameters.find("column");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["column"] = inputDir + it->second; }
140 //check for required parameters
141 listFile = validParameter.validFile(parameters, "list", true);
142 if (listFile == "not found") {
143 listFile = m->getListFile();
144 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
147 else if (listFile == "not open") { abort = true; }
148 else { m->setListFile(listFile); }
150 phylipfile = validParameter.validFile(parameters, "phylip", true);
151 if (phylipfile == "not found") { phylipfile = ""; }
152 else if (phylipfile == "not open") { abort = true; }
153 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
155 columnfile = validParameter.validFile(parameters, "column", true);
156 if (columnfile == "not found") { columnfile = ""; }
157 else if (columnfile == "not open") { abort = true; }
158 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
160 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
161 //give priority to column, then phylip
162 columnfile = m->getColumnFile();
163 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
165 phylipfile = m->getPhylipFile();
166 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
168 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
172 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
175 //if the user changes the output directory command factory will send this info to us in the output parameter
176 outputDir = validParameter.validFile(parameters, "outputdir", false);
177 if (outputDir == "not found"){
179 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
182 //check for optional parameter and set defaults
183 // ...at some point should added some additional type checking...
184 temp = validParameter.validFile(parameters, "hard", false);
185 if (temp == "not found"){ hard = 0; }
186 else if(!m->isTrue(temp)) { hard = 0; }
187 else if(m->isTrue(temp)) { hard = 1; }
189 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
190 m->mothurConvert(temp, cutoff);
191 // cout << cutoff << endl;
193 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
194 m->mothurConvert(temp, precision);
195 // cout << precision << endl;
197 string label = validParameter.validFile(parameters, "label", false);
198 if (label == "not found") { label = ""; }
200 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
201 else { allLines = 1; }
204 sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + getOutputFileNameTag("sensspec");
207 catch(exception& e) {
208 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
212 //***************************************************************************************************************
214 int SensSpecCommand::execute(){
216 if (abort == true) { if (calledHelp) { return 0; } return 2; }
219 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
220 if(format == "phylip") { processPhylip(); }
221 else if(format == "column") { processColumn(); }
223 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
225 m->mothurOutEndLine();
226 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
227 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
228 m->mothurOutEndLine();
233 catch(exception& e) {
234 m->errorOut(e, "SensSpecCommand", "execute");
239 //***************************************************************************************************************
241 int SensSpecCommand::processPhylip(){
243 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
244 string origCutoff = "";
246 if(cutoff == -1.00) { getCutoff = 1; }
247 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
249 map<string, int> seqMap;
252 InputData input(listFile, "list");
253 ListVector* list = input.getListVector();
254 string lastLabel = list->getLabel();
256 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
257 set<string> processedLabels;
258 set<string> userLabels = labels;
261 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
263 if(m->control_pressed){
264 for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0;
267 if(allLines == 1 || labels.count(list->getLabel()) == 1){
269 processedLabels.insert(list->getLabel());
270 userLabels.erase(list->getLabel());
273 fillSeqMap(seqMap, list);
274 process(seqMap, list->getLabel(), getCutoff, origCutoff);
277 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
278 string saveLabel = list->getLabel();
281 list = input.getListVector(lastLabel);
283 processedLabels.insert(list->getLabel());
284 userLabels.erase(list->getLabel());
287 fillSeqMap(seqMap, list);
288 process(seqMap, list->getLabel(), getCutoff, origCutoff);
290 //restore real lastlabel to save below
291 list->setLabel(saveLabel);
294 lastLabel = list->getLabel();
297 list = input.getListVector();
301 //output error messages about any remaining user labels
302 set<string>::iterator it;
303 bool needToRun = false;
304 for (it = userLabels.begin(); it != userLabels.end(); it++) {
305 m->mothurOut("Your file does not include the label " + *it);
306 if (processedLabels.count(lastLabel) != 1) {
307 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
310 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
314 //run last label if you need to
315 if (needToRun == true) {
316 if (list != NULL) { delete list; }
317 list = input.getListVector(lastLabel);
320 fillSeqMap(seqMap, list);
321 process(seqMap, list->getLabel(), getCutoff, origCutoff);
328 catch(exception& e) {
329 m->errorOut(e, "SensSpecCommand", "processPhylip");
334 //***************************************************************************************************************
336 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
339 for(int i=0;i<list->getNumBins();i++){
341 if (m->control_pressed) { return 0; }
343 string seqList = list->get(i);
344 int seqListLength = seqList.length();
347 //parse bin by name, mapping each name to its otu number
348 for(int j=0;j<seqListLength;j++){
350 if(seqList[j] == ','){
355 seqName += seqList[j];
364 catch(exception& e) {
365 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
369 //***************************************************************************************************************
370 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
375 for(int i=0;i<list->getNumBins();i++){
377 if (m->control_pressed) { return 0; }
379 vector<string> seqNameVector;
380 string bin = list->get(i);
381 m->splitAtComma(bin, seqNameVector);
383 numSeqs += seqNameVector.size();
385 for(int j=0;j<seqNameVector.size();j++){
386 string seqPairString = "";
387 for(int k=0;k<j;k++){
388 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
389 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
390 seqPairSet.insert(seqPairString);
397 catch(exception& e) {
398 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
402 //***************************************************************************************************************
403 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
406 int lNumSeqs = seqMap.size();
410 m->openInputFile(distFile, phylipFile);
411 phylipFile >> pNumSeqs;
412 if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
416 vector<int> otuIndices(lNumSeqs, -1);
424 if(label != "unique"){
426 convert(label, cutoff);
427 if(hard == 0){ cutoff += (0.49 / double(precision)); }
430 origCutoff = "unique";
435 m->mothurOut(label); m->mothurOutEndLine();
437 for(int i=0;i<pNumSeqs;i++){
439 if (m->control_pressed) { return 0; }
441 phylipFile >> seqName;
442 otuIndices[i] = seqMap[seqName];
444 for(int j=0;j<i;j++){
445 phylipFile >> distance;
447 if(distance <= cutoff){
448 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
449 else { falseNegatives++; }
452 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
453 else { trueNegatives++; }
459 outputStatistics(label, origCutoff);
463 catch(exception& e) {
464 m->errorOut(e, "SensSpecCommand", "process");
468 //***************************************************************************************************************
469 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
471 int numDists = (numSeqs * (numSeqs-1) / 2);
474 m->openInputFile(distFile, columnFile);
475 string seqNameA, seqNameB, seqPairString;
480 trueNegatives = numDists;
484 if(label != "unique"){
486 convert(label, cutoff);
487 if(hard == 0){ cutoff += (0.49 / double(precision)); }
490 origCutoff = "unique";
495 m->mothurOut(label); m->mothurOutEndLine();
498 columnFile >> seqNameA >> seqNameB >> distance;
499 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
500 else { seqPairString = seqNameB + '\t' + seqNameA; }
502 set<string>::iterator it = seqPairSet.find(seqPairString);
504 if(distance <= cutoff){
505 if(it != seqPairSet.end()){
507 seqPairSet.erase(it);
514 else if(it != seqPairSet.end()){
517 seqPairSet.erase(it);
520 m->gobble(columnFile);
522 falsePositives += seqPairSet.size();
524 outputStatistics(label, origCutoff);
529 catch(exception& e) {
530 m->errorOut(e, "SensSpecCommand", "process");
534 //***************************************************************************************************************
536 int SensSpecCommand::processColumn(){
538 string origCutoff = "";
540 if(cutoff == -1.00) { getCutoff = 1; }
541 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
543 set<string> seqPairSet;
546 InputData input(listFile, "list");
547 ListVector* list = input.getListVector();
548 string lastLabel = list->getLabel();
550 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
551 set<string> processedLabels;
552 set<string> userLabels = labels;
555 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
557 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
559 if(allLines == 1 || labels.count(list->getLabel()) == 1){
561 processedLabels.insert(list->getLabel());
562 userLabels.erase(list->getLabel());
565 numSeqs = fillSeqPairSet(seqPairSet, list);
566 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
569 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
570 string saveLabel = list->getLabel();
573 list = input.getListVector(lastLabel);
575 processedLabels.insert(list->getLabel());
576 userLabels.erase(list->getLabel());
579 numSeqs = fillSeqPairSet(seqPairSet, list);
580 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
582 //restore real lastlabel to save below
583 list->setLabel(saveLabel);
586 lastLabel = list->getLabel();
589 list = input.getListVector();
593 //output error messages about any remaining user labels
594 set<string>::iterator it;
595 bool needToRun = false;
596 for (it = userLabels.begin(); it != userLabels.end(); it++) {
597 m->mothurOut("Your file does not include the label " + *it);
598 if (processedLabels.count(lastLabel) != 1) {
599 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
602 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
606 //run last label if you need to
607 if (needToRun == true) {
608 if (list != NULL) { delete list; }
609 list = input.getListVector(lastLabel);
612 numSeqs = fillSeqPairSet(seqPairSet, list);
614 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
619 catch(exception& e) {
620 m->errorOut(e, "SensSpecCommand", "processColumn");
625 //***************************************************************************************************************
627 void SensSpecCommand::setUpOutput(){
629 ofstream sensSpecFile;
630 m->openOutputFile(sensSpecFileName, sensSpecFile);
632 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
634 sensSpecFile.close();
636 catch(exception& e) {
637 m->errorOut(e, "SensSpecCommand", "setUpOutput");
642 //***************************************************************************************************************
644 void SensSpecCommand::outputStatistics(string label, string cutoff){
646 double tp = (double) truePositives;
647 double fp = (double) falsePositives;
648 double tn = (double) trueNegatives;
649 double fn = (double) falseNegatives;
653 double pPrime = tp + fp;
654 double nPrime = tn + fn;
656 double sensitivity = tp / p;
657 double specificity = tn / n;
658 double positivePredictiveValue = tp / pPrime;
659 double negativePredictiveValue = tn / nPrime;
660 double falseDiscoveryRate = fp / pPrime;
662 double accuracy = (tp + tn) / (p + n);
663 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
664 double f1Score = 2.0 * tp / (p + pPrime);
667 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
668 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
669 if(p + n == 0) { accuracy = 0; }
670 if(p + pPrime == 0) { f1Score = 0; }
671 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
672 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
674 ofstream sensSpecFile;
675 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
677 sensSpecFile << label << '\t' << cutoff << '\t';
678 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
679 sensSpecFile << setprecision(4);
680 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
681 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
683 sensSpecFile.close();
685 catch(exception& e) {
686 m->errorOut(e, "SensSpecCommand", "outputStatistics");
691 //***************************************************************************************************************