5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17 //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
18 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SensSpecCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SensSpecCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The sens.spec command....\n";
43 m->errorOut(e, "SensSpecCommand", "getHelpString");
47 //**********************************************************************************************************************
48 string SensSpecCommand::getOutputFileNameTag(string type, string inputName=""){
50 string outputFileName = "";
51 map<string, vector<string> >::iterator it;
53 //is this a type this command creates
54 it = outputTypes.find(type);
55 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
57 if (type == "sensspec") { outputFileName = "sensspec"; }
58 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
60 return outputFileName;
63 m->errorOut(e, "SensSpecCommand", "getOutputFileNameTag");
67 //**********************************************************************************************************************
68 SensSpecCommand::SensSpecCommand(){
70 abort = true; calledHelp = true;
72 vector<string> tempOutNames;
73 outputTypes["sensspec"] = tempOutNames;
76 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
80 //***************************************************************************************************************
82 SensSpecCommand::SensSpecCommand(string option) {
85 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95 vector<string> myArray = setParameters();
97 OptionParser parser(option);
98 map<string,string> parameters = parser.getParameters();
100 ValidParameters validParameter;
101 map<string,string>::iterator it;
103 //check to make sure all parameters are valid for command
104 for (it = parameters.begin(); it != parameters.end(); it++) {
105 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
108 //initialize outputTypes
109 vector<string> tempOutNames;
110 outputTypes["sensspec"] = tempOutNames;
112 //if the user changes the input directory command factory will send this info to us in the output parameter
113 string inputDir = validParameter.validFile(parameters, "inputdir", false);
114 if (inputDir == "not found"){ inputDir = ""; }
117 it = parameters.find("list");
118 //user has given a template file
119 if(it != parameters.end()){
120 path = m->hasPath(it->second);
121 //if the user has not given a path then, add inputdir. else leave path alone.
122 if (path == "") { parameters["list"] = inputDir + it->second; }
125 it = parameters.find("phylip");
126 //user has given a template file
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 //if the user has not given a path then, add inputdir. else leave path alone.
130 if (path == "") { parameters["phylip"] = inputDir + it->second; }
133 it = parameters.find("column");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["column"] = inputDir + it->second; }
141 //it = parameters.find("name");
142 //user has given a template file
143 //if(it != parameters.end()){
144 //path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 //if (path == "") { parameters["name"] = inputDir + it->second; }
150 //check for required parameters
151 listFile = validParameter.validFile(parameters, "list", true);
152 if (listFile == "not found") {
153 listFile = m->getListFile();
154 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
155 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
157 else if (listFile == "not open") { abort = true; }
158 else { m->setListFile(listFile); }
160 phylipfile = validParameter.validFile(parameters, "phylip", true);
161 if (phylipfile == "not found") { phylipfile = ""; }
162 else if (phylipfile == "not open") { abort = true; }
163 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
165 columnfile = validParameter.validFile(parameters, "column", true);
166 if (columnfile == "not found") { columnfile = ""; }
167 else if (columnfile == "not open") { abort = true; }
168 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
170 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
171 //give priority to column, then phylip
172 columnfile = m->getColumnFile();
173 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
175 phylipfile = m->getPhylipFile();
176 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
178 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
182 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
185 //if the user changes the output directory command factory will send this info to us in the output parameter
186 outputDir = validParameter.validFile(parameters, "outputdir", false);
187 if (outputDir == "not found"){
189 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
192 //check for optional parameter and set defaults
193 // ...at some point should added some additional type checking...
194 temp = validParameter.validFile(parameters, "hard", false);
195 if (temp == "not found"){ hard = 0; }
196 else if(!m->isTrue(temp)) { hard = 0; }
197 else if(m->isTrue(temp)) { hard = 1; }
199 // temp = validParameter.validFile(parameters, "name", true);
200 // if (temp == "not found") { nameFile = ""; }
201 // else if(temp == "not open") { abort = true; }
202 // else { nameFile = temp; }
203 // cout << "name:\t" << nameFile << endl;
205 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
206 m->mothurConvert(temp, cutoff);
207 // cout << cutoff << endl;
209 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
210 m->mothurConvert(temp, precision);
211 // cout << precision << endl;
213 string label = validParameter.validFile(parameters, "label", false);
214 if (label == "not found") { label = ""; }
216 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
217 else { allLines = 1; }
220 sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + getOutputFileNameTag("sensspec");
223 catch(exception& e) {
224 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
228 //***************************************************************************************************************
230 int SensSpecCommand::execute(){
232 if (abort == true) { if (calledHelp) { return 0; } return 2; }
235 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
236 if(format == "phylip") { processPhylip(); }
237 else if(format == "column") { processColumn(); }
239 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
241 m->mothurOutEndLine();
242 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
243 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
244 m->mothurOutEndLine();
249 catch(exception& e) {
250 m->errorOut(e, "SensSpecCommand", "execute");
255 //***************************************************************************************************************
257 int SensSpecCommand::processPhylip(){
259 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
260 string origCutoff = "";
262 if(cutoff == -1.00) { getCutoff = 1; }
263 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
265 map<string, int> seqMap;
268 InputData input(listFile, "list");
269 ListVector* list = input.getListVector();
270 string lastLabel = list->getLabel();
272 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
273 set<string> processedLabels;
274 set<string> userLabels = labels;
277 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
279 if(m->control_pressed){
280 for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0;
283 if(allLines == 1 || labels.count(list->getLabel()) == 1){
285 processedLabels.insert(list->getLabel());
286 userLabels.erase(list->getLabel());
289 fillSeqMap(seqMap, list);
290 process(seqMap, list->getLabel(), getCutoff, origCutoff);
293 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
294 string saveLabel = list->getLabel();
297 list = input.getListVector(lastLabel);
299 processedLabels.insert(list->getLabel());
300 userLabels.erase(list->getLabel());
303 fillSeqMap(seqMap, list);
304 process(seqMap, list->getLabel(), getCutoff, origCutoff);
306 //restore real lastlabel to save below
307 list->setLabel(saveLabel);
310 lastLabel = list->getLabel();
313 list = input.getListVector();
317 //output error messages about any remaining user labels
318 set<string>::iterator it;
319 bool needToRun = false;
320 for (it = userLabels.begin(); it != userLabels.end(); it++) {
321 m->mothurOut("Your file does not include the label " + *it);
322 if (processedLabels.count(lastLabel) != 1) {
323 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
326 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
330 //run last label if you need to
331 if (needToRun == true) {
332 if (list != NULL) { delete list; }
333 list = input.getListVector(lastLabel);
336 fillSeqMap(seqMap, list);
337 process(seqMap, list->getLabel(), getCutoff, origCutoff);
344 catch(exception& e) {
345 m->errorOut(e, "SensSpecCommand", "processPhylip");
350 //***************************************************************************************************************
352 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
355 for(int i=0;i<list->getNumBins();i++){
357 if (m->control_pressed) { return 0; }
359 string seqList = list->get(i);
360 int seqListLength = seqList.length();
363 //parse bin by name, mapping each name to its otu number
364 for(int j=0;j<seqListLength;j++){
366 if(seqList[j] == ','){
371 seqName += seqList[j];
380 catch(exception& e) {
381 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
385 //***************************************************************************************************************
386 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
391 for(int i=0;i<list->getNumBins();i++){
393 if (m->control_pressed) { return 0; }
395 vector<string> seqNameVector;
396 string bin = list->get(i);
397 m->splitAtComma(bin, seqNameVector);
399 numSeqs += seqNameVector.size();
401 for(int j=0;j<seqNameVector.size();j++){
402 string seqPairString = "";
403 for(int k=0;k<j;k++){
404 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
405 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
406 seqPairSet.insert(seqPairString);
413 catch(exception& e) {
414 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
418 //***************************************************************************************************************
419 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
422 int lNumSeqs = seqMap.size();
426 m->openInputFile(distFile, phylipFile);
427 phylipFile >> pNumSeqs;
428 if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
432 vector<int> otuIndices(lNumSeqs, -1);
440 if(label != "unique"){
442 convert(label, cutoff);
443 if(hard == 0){ cutoff += (0.49 / double(precision)); }
446 origCutoff = "unique";
451 m->mothurOut(label); m->mothurOutEndLine();
453 for(int i=0;i<pNumSeqs;i++){
455 if (m->control_pressed) { return 0; }
457 phylipFile >> seqName;
458 otuIndices[i] = seqMap[seqName];
460 for(int j=0;j<i;j++){
461 phylipFile >> distance;
463 if(distance <= cutoff){
464 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
465 else { falseNegatives++; }
468 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
469 else { trueNegatives++; }
475 outputStatistics(label, origCutoff);
479 catch(exception& e) {
480 m->errorOut(e, "SensSpecCommand", "process");
484 //***************************************************************************************************************
485 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
487 int numDists = (numSeqs * (numSeqs-1) / 2);
490 m->openInputFile(distFile, columnFile);
491 string seqNameA, seqNameB, seqPairString;
496 trueNegatives = numDists;
500 if(label != "unique"){
502 convert(label, cutoff);
503 if(hard == 0){ cutoff += (0.49 / double(precision)); }
506 origCutoff = "unique";
511 m->mothurOut(label); m->mothurOutEndLine();
514 columnFile >> seqNameA >> seqNameB >> distance;
515 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
516 else { seqPairString = seqNameB + '\t' + seqNameA; }
518 set<string>::iterator it = seqPairSet.find(seqPairString);
520 if(distance <= cutoff){
521 if(it != seqPairSet.end()){
523 seqPairSet.erase(it);
530 else if(it != seqPairSet.end()){
533 seqPairSet.erase(it);
536 m->gobble(columnFile);
538 falsePositives += seqPairSet.size();
540 outputStatistics(label, origCutoff);
545 catch(exception& e) {
546 m->errorOut(e, "SensSpecCommand", "process");
550 //***************************************************************************************************************
552 int SensSpecCommand::processColumn(){
554 string origCutoff = "";
556 if(cutoff == -1.00) { getCutoff = 1; }
557 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
559 set<string> seqPairSet;
562 InputData input(listFile, "list");
563 ListVector* list = input.getListVector();
564 string lastLabel = list->getLabel();
566 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
567 set<string> processedLabels;
568 set<string> userLabels = labels;
571 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
573 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
575 if(allLines == 1 || labels.count(list->getLabel()) == 1){
577 processedLabels.insert(list->getLabel());
578 userLabels.erase(list->getLabel());
581 numSeqs = fillSeqPairSet(seqPairSet, list);
582 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
585 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
586 string saveLabel = list->getLabel();
589 list = input.getListVector(lastLabel);
591 processedLabels.insert(list->getLabel());
592 userLabels.erase(list->getLabel());
595 numSeqs = fillSeqPairSet(seqPairSet, list);
596 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
598 //restore real lastlabel to save below
599 list->setLabel(saveLabel);
602 lastLabel = list->getLabel();
605 list = input.getListVector();
609 //output error messages about any remaining user labels
610 set<string>::iterator it;
611 bool needToRun = false;
612 for (it = userLabels.begin(); it != userLabels.end(); it++) {
613 m->mothurOut("Your file does not include the label " + *it);
614 if (processedLabels.count(lastLabel) != 1) {
615 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
618 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
622 //run last label if you need to
623 if (needToRun == true) {
624 if (list != NULL) { delete list; }
625 list = input.getListVector(lastLabel);
628 numSeqs = fillSeqPairSet(seqPairSet, list);
630 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
635 catch(exception& e) {
636 m->errorOut(e, "SensSpecCommand", "processColumn");
641 //***************************************************************************************************************
643 void SensSpecCommand::setUpOutput(){
645 ofstream sensSpecFile;
646 m->openOutputFile(sensSpecFileName, sensSpecFile);
648 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
650 sensSpecFile.close();
652 catch(exception& e) {
653 m->errorOut(e, "SensSpecCommand", "setUpOutput");
658 //***************************************************************************************************************
660 void SensSpecCommand::outputStatistics(string label, string cutoff){
662 double tp = (double) truePositives;
663 double fp = (double) falsePositives;
664 double tn = (double) trueNegatives;
665 double fn = (double) falseNegatives;
669 double pPrime = tp + fp;
670 double nPrime = tn + fn;
672 double sensitivity = tp / p;
673 double specificity = tn / n;
674 double positivePredictiveValue = tp / pPrime;
675 double negativePredictiveValue = tn / nPrime;
676 double falseDiscoveryRate = fp / pPrime;
678 double accuracy = (tp + tn) / (p + n);
679 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
680 double f1Score = 2.0 * tp / (p + pPrime);
683 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
684 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
685 if(p + n == 0) { accuracy = 0; }
686 if(p + pPrime == 0) { f1Score = 0; }
687 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
688 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
690 ofstream sensSpecFile;
691 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
693 sensSpecFile << label << '\t' << cutoff << '\t';
694 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
695 sensSpecFile << setprecision(4);
696 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
697 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
699 sensSpecFile.close();
701 catch(exception& e) {
702 m->errorOut(e, "SensSpecCommand", "outputStatistics");
707 //***************************************************************************************************************