5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17 //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
18 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SensSpecCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SensSpecCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The sens.spec command....\n";
43 m->errorOut(e, "SensSpecCommand", "getHelpString");
47 //**********************************************************************************************************************
48 SensSpecCommand::SensSpecCommand(){
50 abort = true; calledHelp = true;
52 vector<string> tempOutNames;
53 outputTypes["sensspec"] = tempOutNames;
56 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
60 //***************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(string option) {
65 abort = false; calledHelp = false;
67 //allow user to run help
68 if(option == "help") { help(); abort = true; calledHelp = true; }
69 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
74 vector<string> myArray = setParameters();
76 OptionParser parser(option);
77 map<string,string> parameters = parser.getParameters();
79 ValidParameters validParameter;
80 map<string,string>::iterator it;
82 //check to make sure all parameters are valid for command
83 for (it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //initialize outputTypes
88 vector<string> tempOutNames;
89 outputTypes["sensspec"] = tempOutNames;
91 //if the user changes the input directory command factory will send this info to us in the output parameter
92 string inputDir = validParameter.validFile(parameters, "inputdir", false);
93 if (inputDir == "not found"){ inputDir = ""; }
96 it = parameters.find("list");
97 //user has given a template file
98 if(it != parameters.end()){
99 path = m->hasPath(it->second);
100 //if the user has not given a path then, add inputdir. else leave path alone.
101 if (path == "") { parameters["list"] = inputDir + it->second; }
104 it = parameters.find("phylip");
105 //user has given a template file
106 if(it != parameters.end()){
107 path = m->hasPath(it->second);
108 //if the user has not given a path then, add inputdir. else leave path alone.
109 if (path == "") { parameters["phylip"] = inputDir + it->second; }
112 it = parameters.find("column");
113 //user has given a template file
114 if(it != parameters.end()){
115 path = m->hasPath(it->second);
116 //if the user has not given a path then, add inputdir. else leave path alone.
117 if (path == "") { parameters["column"] = inputDir + it->second; }
120 //it = parameters.find("name");
121 //user has given a template file
122 //if(it != parameters.end()){
123 //path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 //if (path == "") { parameters["name"] = inputDir + it->second; }
129 //check for required parameters
130 listFile = validParameter.validFile(parameters, "list", true);
131 if (listFile == "not found") {
132 listFile = m->getListFile();
133 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
134 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
136 else if (listFile == "not open") { abort = true; }
137 else { m->setListFile(listFile); }
139 phylipfile = validParameter.validFile(parameters, "phylip", true);
140 if (phylipfile == "not found") { phylipfile = ""; }
141 else if (phylipfile == "not open") { abort = true; }
142 else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
144 columnfile = validParameter.validFile(parameters, "column", true);
145 if (columnfile == "not found") { columnfile = ""; }
146 else if (columnfile == "not open") { abort = true; }
147 else { distFile = columnfile; format = "column"; m->setColumnFile(columnfile); }
149 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
150 //give priority to column, then phylip
151 columnfile = m->getColumnFile();
152 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
154 phylipfile = m->getPhylipFile();
155 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
157 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
161 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
164 //if the user changes the output directory command factory will send this info to us in the output parameter
165 outputDir = validParameter.validFile(parameters, "outputdir", false);
166 if (outputDir == "not found"){
168 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
171 //check for optional parameter and set defaults
172 // ...at some point should added some additional type checking...
173 temp = validParameter.validFile(parameters, "hard", false);
174 if (temp == "not found"){ hard = 0; }
175 else if(!m->isTrue(temp)) { hard = 0; }
176 else if(m->isTrue(temp)) { hard = 1; }
178 // temp = validParameter.validFile(parameters, "name", true);
179 // if (temp == "not found") { nameFile = ""; }
180 // else if(temp == "not open") { abort = true; }
181 // else { nameFile = temp; }
182 // cout << "name:\t" << nameFile << endl;
184 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
185 convert(temp, cutoff);
186 // cout << cutoff << endl;
188 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
189 convert(temp, precision);
190 // cout << precision << endl;
192 lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; }
194 sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + ".sensspec";
197 catch(exception& e) {
198 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
202 //***************************************************************************************************************
204 int SensSpecCommand::execute(){
206 if (abort == true) { if (calledHelp) { return 0; } return 2; }
209 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
210 if(format == "phylip") { processPhylip(); }
211 else if(format == "column") { processColumn(); }
213 m->mothurOutEndLine();
214 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
215 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
216 m->mothurOutEndLine();
221 catch(exception& e) {
222 m->errorOut(e, "SensSpecCommand", "execute");
227 //***************************************************************************************************************
229 void SensSpecCommand::processPhylip(){
231 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
233 ifstream inputListFile;
234 m->openInputFile(listFile, inputListFile);
236 string origCutoff = "";
238 if(cutoff == -1.00) { getCutoff = 1; }
239 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
244 map<string, int> seqMap;
247 while(inputListFile){
248 inputListFile >> label >> numOTUs;
249 for(int i=0;i<numOTUs;i++){
250 inputListFile >> seqList;
251 int seqListLength = seqList.length();
253 for(int j=0;j<seqListLength;j++){
255 if(seqList[j] == ','){
260 seqName += seqList[j];
266 m->gobble(inputListFile);
268 int lNumSeqs = seqMap.size();
272 m->openInputFile(distFile, phylipFile);
273 phylipFile >> pNumSeqs;
274 if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; }
278 vector<int> otuIndices(lNumSeqs, -1);
286 if(label != "unique"){
288 convert(label, cutoff);
289 if(hard == 0){ cutoff += (0.49 / double(precision)); }
292 origCutoff = "unique";
297 cout << label << endl;
299 for(int i=0;i<lNumSeqs;i++){
300 phylipFile >> seqName;
301 otuIndices[i] = seqMap[seqName];
303 for(int j=0;j<i;j++){
304 phylipFile >> distance;
306 if(distance <= cutoff){
307 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
308 else { falseNegatives++; }
311 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
312 else { trueNegatives++; }
318 outputStatistics(label, origCutoff);
320 inputListFile.close();
323 catch(exception& e) {
324 m->errorOut(e, "SensSpecCommand", "processPhylip");
329 //***************************************************************************************************************
331 void SensSpecCommand::processColumn(){
333 ifstream inputListFile;
334 m->openInputFile(listFile, inputListFile);
336 string origCutoff = "";
338 if(cutoff == -1.00) { getCutoff = 1; }
339 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
341 set<string> seqPairSet;
343 string label, seqList;
347 while(inputListFile){
350 inputListFile >> label >> numOTUs;
351 for(int i=0;i<numOTUs;i++){
353 vector<string> seqNameVector;
355 inputListFile >> seqList;
356 int seqListLength = seqList.length();
358 for(int j=0;j<seqListLength;j++){
360 if(seqList[j] == ','){
361 seqNameVector.push_back(seqName);
365 seqName += seqList[j];
368 seqNameVector.push_back(seqName);
370 numSeqs += seqNameVector.size();
372 int numSeqsInOTU = seqNameVector.size();
373 for(int j=0;j<numSeqsInOTU;j++){
374 string seqPairString = "";
375 for(int k=0;k<j;k++){
376 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
377 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
378 seqPairSet.insert(seqPairString);
382 m->gobble(inputListFile);
384 int numDists = (numSeqs * (numSeqs-1) / 2);
387 m->openInputFile(distFile, columnFile);
388 string seqNameA, seqNameB, seqPairString;
393 trueNegatives = numDists;
397 if(label != "unique"){
399 convert(label, cutoff);
400 if(hard == 0){ cutoff += (0.49 / double(precision)); }
403 origCutoff = "unique";
408 cout << label << endl;
411 columnFile >> seqNameA >> seqNameB >> distance;
412 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
413 else { seqPairString = seqNameB + '\t' + seqNameA; }
415 set<string>::iterator it = seqPairSet.find(seqPairString);
417 if(distance <= cutoff){
418 if(it != seqPairSet.end()){
420 seqPairSet.erase(it);
427 else if(it != seqPairSet.end()){
430 seqPairSet.erase(it);
433 m->gobble(columnFile);
435 falsePositives += seqPairSet.size();
437 outputStatistics(label, origCutoff);
440 catch(exception& e) {
441 m->errorOut(e, "SensSpecCommand", "processColumn");
446 //***************************************************************************************************************
448 void SensSpecCommand::setUpOutput(){
450 ofstream sensSpecFile;
451 m->openOutputFile(sensSpecFileName, sensSpecFile);
453 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
455 sensSpecFile.close();
457 catch(exception& e) {
458 m->errorOut(e, "SensSpecCommand", "setUpOutput");
463 //***************************************************************************************************************
465 void SensSpecCommand::outputStatistics(string label, string cutoff){
467 double tp = (double) truePositives;
468 double fp = (double) falsePositives;
469 double tn = (double) trueNegatives;
470 double fn = (double) falseNegatives;
474 double pPrime = tp + fp;
475 double nPrime = tn + fn;
477 double sensitivity = tp / p;
478 double specificity = tn / n;
479 double positivePredictiveValue = tp / pPrime;
480 double negativePredictiveValue = tn / nPrime;
481 double falseDiscoveryRate = fp / pPrime;
483 double accuracy = (tp + tn) / (p + n);
484 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
485 double f1Score = 2.0 * tp / (p + pPrime);
488 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
489 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
490 if(p + n == 0) { accuracy = 0; }
491 if(p + pPrime == 0) { f1Score = 0; }
492 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
493 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
495 ofstream sensSpecFile;
496 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
498 sensSpecFile << label << '\t' << cutoff << '\t';
499 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
500 sensSpecFile << setprecision(4);
501 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
502 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
504 sensSpecFile.close();
506 catch(exception& e) {
507 m->errorOut(e, "SensSpecCommand", "outputStatistics");
512 //***************************************************************************************************************