5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
17 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
18 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
19 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
20 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
21 CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "SensSpecCommand", "setParameters");
34 //**********************************************************************************************************************
35 string SensSpecCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The sens.spec command....\n";
42 m->errorOut(e, "SensSpecCommand", "getHelpString");
46 //**********************************************************************************************************************
47 string SensSpecCommand::getOutputPattern(string type) {
51 if (type == "sensspec") { pattern = "[filename],sensspec"; }
52 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
57 m->errorOut(e, "SensSpecCommand", "getOutputPattern");
61 //**********************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(){
64 abort = true; calledHelp = true;
66 vector<string> tempOutNames;
67 outputTypes["sensspec"] = tempOutNames;
70 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
74 //***************************************************************************************************************
76 SensSpecCommand::SensSpecCommand(string option) {
79 abort = false; calledHelp = false;
82 //allow user to run help
83 if(option == "help") { help(); abort = true; calledHelp = true; }
84 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89 vector<string> myArray = setParameters();
91 OptionParser parser(option);
92 map<string,string> parameters = parser.getParameters();
94 ValidParameters validParameter;
95 map<string,string>::iterator it;
97 //check to make sure all parameters are valid for command
98 for (it = parameters.begin(); it != parameters.end(); it++) {
99 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
102 //initialize outputTypes
103 vector<string> tempOutNames;
104 outputTypes["sensspec"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("list");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["list"] = inputDir + it->second; }
119 it = parameters.find("phylip");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["phylip"] = inputDir + it->second; }
127 it = parameters.find("column");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["column"] = inputDir + it->second; }
135 //check for required parameters
136 listFile = validParameter.validFile(parameters, "list", true);
137 if (listFile == "not found") {
138 listFile = m->getListFile();
139 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
140 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
142 else if (listFile == "not open") { abort = true; }
143 else { m->setListFile(listFile); }
145 phylipfile = validParameter.validFile(parameters, "phylip", true);
146 if (phylipfile == "not found") { phylipfile = ""; }
147 else if (phylipfile == "not open") { abort = true; }
148 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
150 columnfile = validParameter.validFile(parameters, "column", true);
151 if (columnfile == "not found") { columnfile = ""; }
152 else if (columnfile == "not open") { abort = true; }
153 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
155 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
156 //give priority to column, then phylip
157 columnfile = m->getColumnFile();
158 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
160 phylipfile = m->getPhylipFile();
161 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
163 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
167 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
170 //if the user changes the output directory command factory will send this info to us in the output parameter
171 outputDir = validParameter.validFile(parameters, "outputdir", false);
172 if (outputDir == "not found"){
174 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
177 //check for optional parameter and set defaults
178 // ...at some point should added some additional type checking...
179 temp = validParameter.validFile(parameters, "hard", false);
180 if (temp == "not found"){ hard = 0; }
181 else if(!m->isTrue(temp)) { hard = 0; }
182 else if(m->isTrue(temp)) { hard = 1; }
184 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
185 m->mothurConvert(temp, cutoff);
186 // cout << cutoff << endl;
188 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
189 m->mothurConvert(temp, precision);
190 // cout << precision << endl;
192 string label = validParameter.validFile(parameters, "label", false);
193 if (label == "not found") { label = ""; }
195 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
196 else { allLines = 1; }
199 map<string, string> variables;
200 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listFile));
201 sensSpecFileName = getOutputFileName("sensspec",variables);
204 catch(exception& e) {
205 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
209 //***************************************************************************************************************
211 int SensSpecCommand::execute(){
213 if (abort == true) { if (calledHelp) { return 0; } return 2; }
215 int startTime = time(NULL);
217 //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
218 string newListFile = preProcessList();
219 if (newListFile != "") { listFile = newListFile; }
222 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
223 if(format == "phylip") { processPhylip(); }
224 else if(format == "column") { processColumn(); }
226 //remove temp file if created
227 if (newListFile != "") { m->mothurRemove(newListFile); }
229 if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
231 m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
233 m->mothurOutEndLine();
234 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
236 m->mothurOutEndLine();
241 catch(exception& e) {
242 m->errorOut(e, "SensSpecCommand", "execute");
247 //***************************************************************************************************************
249 int SensSpecCommand::processPhylip(){
251 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
252 string origCutoff = "";
254 if(cutoff == -1.00) { getCutoff = 1; }
255 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
257 map<string, int> seqMap;
260 InputData input(listFile, "list");
261 ListVector* list = input.getListVector();
262 string lastLabel = list->getLabel();
264 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
265 set<string> processedLabels;
266 set<string> userLabels = labels;
269 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
271 if(m->control_pressed){
272 for (int i = 0; i < outputNames.size(); i++){ m->mothurRemove(outputNames[i]); } delete list; return 0;
275 if(allLines == 1 || labels.count(list->getLabel()) == 1){
277 processedLabels.insert(list->getLabel());
278 userLabels.erase(list->getLabel());
281 fillSeqMap(seqMap, list);
282 process(seqMap, list->getLabel(), getCutoff, origCutoff);
285 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
286 string saveLabel = list->getLabel();
289 list = input.getListVector(lastLabel);
291 processedLabels.insert(list->getLabel());
292 userLabels.erase(list->getLabel());
295 fillSeqMap(seqMap, list);
296 process(seqMap, list->getLabel(), getCutoff, origCutoff);
298 //restore real lastlabel to save below
299 list->setLabel(saveLabel);
302 lastLabel = list->getLabel();
305 list = input.getListVector();
309 //output error messages about any remaining user labels
310 set<string>::iterator it;
311 bool needToRun = false;
312 for (it = userLabels.begin(); it != userLabels.end(); it++) {
313 m->mothurOut("Your file does not include the label " + *it);
314 if (processedLabels.count(lastLabel) != 1) {
315 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
318 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
322 //run last label if you need to
323 if (needToRun == true) {
324 if (list != NULL) { delete list; }
325 list = input.getListVector(lastLabel);
328 fillSeqMap(seqMap, list);
329 process(seqMap, list->getLabel(), getCutoff, origCutoff);
336 catch(exception& e) {
337 m->errorOut(e, "SensSpecCommand", "processPhylip");
342 //***************************************************************************************************************
344 int SensSpecCommand::fillSeqMap(map<string, int>& seqMap, ListVector*& list){
347 for(int i=0;i<list->getNumBins();i++){
349 if (m->control_pressed) { return 0; }
351 string seqList = list->get(i);
352 int seqListLength = seqList.length();
355 //parse bin by name, mapping each name to its otu number
356 for(int j=0;j<seqListLength;j++){
358 if(seqList[j] == ','){
363 seqName += seqList[j];
372 catch(exception& e) {
373 m->errorOut(e, "SensSpecCommand", "fillSeqMap");
377 //***************************************************************************************************************
378 int SensSpecCommand::fillSeqPairSet(set<string>& seqPairSet, ListVector*& list){
383 for(int i=0;i<list->getNumBins();i++){
385 if (m->control_pressed) { return 0; }
387 vector<string> seqNameVector;
388 string bin = list->get(i);
389 m->splitAtComma(bin, seqNameVector);
391 numSeqs += seqNameVector.size();
393 for(int j=0;j<seqNameVector.size();j++){
394 string seqPairString = "";
395 for(int k=0;k<j;k++){
396 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
397 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
398 seqPairSet.insert(seqPairString);
405 catch(exception& e) {
406 m->errorOut(e, "SensSpecCommand", "fillSeqPairSet");
410 //***************************************************************************************************************
411 int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCutoff, string& origCutoff){
414 int lNumSeqs = seqMap.size();
418 m->openInputFile(distFile, phylipFile);
419 phylipFile >> pNumSeqs;
420 if(pNumSeqs != lNumSeqs){ m->mothurOut("numSeq mismatch!\n"); /*m->control_pressed = true;*/ }
424 vector<int> otuIndices(lNumSeqs, -1);
432 if(label != "unique"){
434 convert(label, cutoff);
435 if(hard == 0){ cutoff += (0.49 / double(precision)); }
438 origCutoff = "unique";
443 m->mothurOut(label); m->mothurOutEndLine();
445 for(int i=0;i<pNumSeqs;i++){
447 if (m->control_pressed) { return 0; }
449 phylipFile >> seqName;
450 otuIndices[i] = seqMap[seqName];
452 for(int j=0;j<i;j++){
453 phylipFile >> distance;
455 if(distance <= cutoff){
456 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
457 else { falseNegatives++; }
460 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
461 else { trueNegatives++; }
467 outputStatistics(label, origCutoff);
471 catch(exception& e) {
472 m->errorOut(e, "SensSpecCommand", "process");
476 //***************************************************************************************************************
477 int SensSpecCommand::process(set<string>& seqPairSet, string label, bool& getCutoff, string& origCutoff, int numSeqs){
479 int numDists = (numSeqs * (numSeqs-1) / 2);
482 m->openInputFile(distFile, columnFile);
483 string seqNameA, seqNameB, seqPairString;
488 trueNegatives = numDists;
492 if(label != "unique"){
494 convert(label, cutoff);
495 if(hard == 0){ cutoff += (0.49 / double(precision)); }
498 origCutoff = "unique";
503 m->mothurOut(label); m->mothurOutEndLine();
506 columnFile >> seqNameA >> seqNameB >> distance;
507 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
508 else { seqPairString = seqNameB + '\t' + seqNameA; }
510 set<string>::iterator it = seqPairSet.find(seqPairString);
512 if(distance <= cutoff){
513 if(it != seqPairSet.end()){
515 seqPairSet.erase(it);
522 else if(it != seqPairSet.end()){
525 seqPairSet.erase(it);
528 m->gobble(columnFile);
530 falsePositives += seqPairSet.size();
532 outputStatistics(label, origCutoff);
537 catch(exception& e) {
538 m->errorOut(e, "SensSpecCommand", "process");
542 //***************************************************************************************************************
544 int SensSpecCommand::processColumn(){
546 string origCutoff = "";
548 if(cutoff == -1.00) { getCutoff = 1; }
549 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
551 set<string> seqPairSet;
554 InputData input(listFile, "list");
555 ListVector* list = input.getListVector();
556 string lastLabel = list->getLabel();
558 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
559 set<string> processedLabels;
560 set<string> userLabels = labels;
563 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
565 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; }
567 if(allLines == 1 || labels.count(list->getLabel()) == 1){
569 processedLabels.insert(list->getLabel());
570 userLabels.erase(list->getLabel());
573 numSeqs = fillSeqPairSet(seqPairSet, list);
574 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
577 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
578 string saveLabel = list->getLabel();
581 list = input.getListVector(lastLabel);
583 processedLabels.insert(list->getLabel());
584 userLabels.erase(list->getLabel());
587 numSeqs = fillSeqPairSet(seqPairSet, list);
588 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
590 //restore real lastlabel to save below
591 list->setLabel(saveLabel);
594 lastLabel = list->getLabel();
597 list = input.getListVector();
601 //output error messages about any remaining user labels
602 set<string>::iterator it;
603 bool needToRun = false;
604 for (it = userLabels.begin(); it != userLabels.end(); it++) {
605 m->mothurOut("Your file does not include the label " + *it);
606 if (processedLabels.count(lastLabel) != 1) {
607 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
610 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
614 //run last label if you need to
615 if (needToRun == true) {
616 if (list != NULL) { delete list; }
617 list = input.getListVector(lastLabel);
620 numSeqs = fillSeqPairSet(seqPairSet, list);
622 process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
627 catch(exception& e) {
628 m->errorOut(e, "SensSpecCommand", "processColumn");
633 //***************************************************************************************************************
635 void SensSpecCommand::setUpOutput(){
637 ofstream sensSpecFile;
638 m->openOutputFile(sensSpecFileName, sensSpecFile);
640 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
642 sensSpecFile.close();
644 catch(exception& e) {
645 m->errorOut(e, "SensSpecCommand", "setUpOutput");
650 //***************************************************************************************************************
652 void SensSpecCommand::outputStatistics(string label, string cutoff){
654 double tp = (double) truePositives;
655 double fp = (double) falsePositives;
656 double tn = (double) trueNegatives;
657 double fn = (double) falseNegatives;
661 double pPrime = tp + fp;
662 double nPrime = tn + fn;
664 double sensitivity = tp / p;
665 double specificity = tn / n;
666 double positivePredictiveValue = tp / pPrime;
667 double negativePredictiveValue = tn / nPrime;
668 double falseDiscoveryRate = fp / pPrime;
670 double accuracy = (tp + tn) / (p + n);
671 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
672 double f1Score = 2.0 * tp / (p + pPrime);
675 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
676 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
677 if(p + n == 0) { accuracy = 0; }
678 if(p + pPrime == 0) { f1Score = 0; }
679 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
680 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
682 ofstream sensSpecFile;
683 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
685 sensSpecFile << label << '\t' << cutoff << '\t';
686 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
687 sensSpecFile << setprecision(4);
688 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
689 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
691 sensSpecFile.close();
693 catch(exception& e) {
694 m->errorOut(e, "SensSpecCommand", "outputStatistics");
698 //***************************************************************************************************************
700 string SensSpecCommand::preProcessList(){
702 set<string> uniqueNames;
703 //get unique names from distance file
704 if (format == "phylip") {
707 m->openInputFile(distFile, phylipFile);
710 phylipFile >> numTest;
712 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
714 m->mothurConvert(numTest, pNumSeqs);
716 phylipFile >> pNumSeqs; m->gobble(phylipFile);
721 for(int i=0;i<pNumSeqs;i++){
723 if (m->control_pressed) { return ""; }
725 phylipFile >> seqName;
726 uniqueNames.insert(seqName);
728 for(int j=0;j<i;j++){
729 phylipFile >> distance;
731 m->gobble(phylipFile);
736 m->openInputFile(distFile, columnFile);
737 string seqNameA, seqNameB;
741 if (m->control_pressed) { return ""; }
742 columnFile >> seqNameA >> seqNameB >> distance;
743 uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
744 m->gobble(columnFile);
749 //read list file, if numSeqs > unique names then remove redundant names
750 string newListFile = listFile + ".temp";
752 m->openOutputFile(newListFile, out);
754 m->openInputFile(listFile, in);
756 bool wroteSomething = false;
760 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; }
762 //read in list vector
765 //listfile is already unique
766 if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; }
768 //make a new list vector
770 newList.setLabel(list.getLabel());
773 for (int i = 0; i < list.getNumBins(); i++) {
775 //parse out names that are in accnos file
776 string binnames = list.get(i);
777 vector<string> bnames;
778 m->splitAtComma(binnames, bnames);
780 string newNames = "";
781 for (int i = 0; i < bnames.size(); i++) {
782 string name = bnames[i];
783 //if that name is in the .accnos file, add it
784 if (uniqueNames.count(name) != 0) { newNames += name + ","; }
787 //if there are names in this bin add to new list
788 if (newNames != "") {
789 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
790 newList.push_back(newNames);
794 //print new listvector
795 if (newList.getNumBins() != 0) {
796 wroteSomething = true;
805 if (wroteSomething) { return newListFile; }
808 catch(exception& e) {
809 m->errorOut(e, "SensSpecCommand", "preProcessList");
815 //***************************************************************************************************************