5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::setParameters(){
15 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
16 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
17 //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
18 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
21 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
22 CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SensSpecCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SensSpecCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The sens.spec command....\n";
43 m->errorOut(e, "SensSpecCommand", "getHelpString");
47 //**********************************************************************************************************************
48 SensSpecCommand::SensSpecCommand(){
50 abort = true; calledHelp = true;
52 vector<string> tempOutNames;
53 outputTypes["sensspec"] = tempOutNames;
56 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
60 //***************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(string option) {
65 abort = false; calledHelp = false;
67 //allow user to run help
68 if(option == "help") { help(); abort = true; calledHelp = true; }
73 vector<string> myArray = setParameters();
75 OptionParser parser(option);
76 map<string,string> parameters = parser.getParameters();
78 ValidParameters validParameter;
79 map<string,string>::iterator it;
81 //check to make sure all parameters are valid for command
82 for (it = parameters.begin(); it != parameters.end(); it++) {
83 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 //initialize outputTypes
87 vector<string> tempOutNames;
88 outputTypes["sensspec"] = tempOutNames;
90 //if the user changes the input directory command factory will send this info to us in the output parameter
91 string inputDir = validParameter.validFile(parameters, "inputdir", false);
92 if (inputDir == "not found"){ inputDir = ""; }
95 it = parameters.find("list");
96 //user has given a template file
97 if(it != parameters.end()){
98 path = m->hasPath(it->second);
99 //if the user has not given a path then, add inputdir. else leave path alone.
100 if (path == "") { parameters["list"] = inputDir + it->second; }
103 it = parameters.find("phylip");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["phylip"] = inputDir + it->second; }
111 it = parameters.find("column");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["column"] = inputDir + it->second; }
119 //it = parameters.find("name");
120 //user has given a template file
121 //if(it != parameters.end()){
122 //path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 //if (path == "") { parameters["name"] = inputDir + it->second; }
128 //check for required parameters
129 listFile = validParameter.validFile(parameters, "list", true);
130 if (listFile == "not found") {
131 listFile = m->getListFile();
132 if (listFile != "") { m->mothurOut("Using " + listFile + " as input file for the list parameter."); m->mothurOutEndLine(); }
133 else { m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
135 else if (listFile == "not open") { abort = true; }
137 phylipfile = validParameter.validFile(parameters, "phylip", true);
138 if (phylipfile == "not found") { phylipfile = ""; }
139 else if (phylipfile == "not open") { abort = true; }
140 else { distFile = phylipfile; format = "phylip"; }
142 columnfile = validParameter.validFile(parameters, "column", true);
143 if (columnfile == "not found") { columnfile = ""; }
144 else if (columnfile == "not open") { abort = true; }
145 else { distFile = columnfile; format = "column"; }
147 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
148 //give priority to column, then phylip
149 columnfile = m->getColumnFile();
150 if (columnfile != "") { distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
152 phylipfile = m->getPhylipFile();
153 if (phylipfile != "") { distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
155 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
159 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
162 //if the user changes the output directory command factory will send this info to us in the output parameter
163 outputDir = validParameter.validFile(parameters, "outputdir", false);
164 if (outputDir == "not found"){
166 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
169 //check for optional parameter and set defaults
170 // ...at some point should added some additional type checking...
171 temp = validParameter.validFile(parameters, "hard", false);
172 if (temp == "not found"){ hard = 0; }
173 else if(!m->isTrue(temp)) { hard = 0; }
174 else if(m->isTrue(temp)) { hard = 1; }
176 // temp = validParameter.validFile(parameters, "name", true);
177 // if (temp == "not found") { nameFile = ""; }
178 // else if(temp == "not open") { abort = true; }
179 // else { nameFile = temp; }
180 // cout << "name:\t" << nameFile << endl;
182 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
183 convert(temp, cutoff);
184 // cout << cutoff << endl;
186 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
187 convert(temp, precision);
188 // cout << precision << endl;
190 lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; }
192 sensSpecFileName = outputDir + m->getRootName(m->getSimpleName(listFile)) + ".sensspec";
195 catch(exception& e) {
196 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
200 //***************************************************************************************************************
202 int SensSpecCommand::execute(){
204 if (abort == true) { if (calledHelp) { return 0; } return 2; }
207 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
208 if(format == "phylip") { processPhylip(); }
209 else if(format == "column") { processColumn(); }
211 m->mothurOutEndLine();
212 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
213 m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
214 m->mothurOutEndLine();
219 catch(exception& e) {
220 m->errorOut(e, "SensSpecCommand", "execute");
225 //***************************************************************************************************************
227 void SensSpecCommand::processPhylip(){
229 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
231 ifstream inputListFile;
232 m->openInputFile(listFile, inputListFile);
234 string origCutoff = "";
236 if(cutoff == -1.00) { getCutoff = 1; }
237 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
242 map<string, int> seqMap;
245 while(inputListFile){
246 inputListFile >> label >> numOTUs;
247 for(int i=0;i<numOTUs;i++){
248 inputListFile >> seqList;
249 int seqListLength = seqList.length();
251 for(int j=0;j<seqListLength;j++){
253 if(seqList[j] == ','){
258 seqName += seqList[j];
264 m->gobble(inputListFile);
266 int lNumSeqs = seqMap.size();
270 m->openInputFile(distFile, phylipFile);
271 phylipFile >> pNumSeqs;
272 if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; }
276 vector<int> otuIndices(lNumSeqs, -1);
284 if(label != "unique"){
286 convert(label, cutoff);
287 if(hard == 0){ cutoff += (0.49 / double(precision)); }
290 origCutoff = "unique";
295 cout << label << endl;
297 for(int i=0;i<lNumSeqs;i++){
298 phylipFile >> seqName;
299 otuIndices[i] = seqMap[seqName];
301 for(int j=0;j<i;j++){
302 phylipFile >> distance;
304 if(distance <= cutoff){
305 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
306 else { falseNegatives++; }
309 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
310 else { trueNegatives++; }
316 outputStatistics(label, origCutoff);
318 inputListFile.close();
321 catch(exception& e) {
322 m->errorOut(e, "SensSpecCommand", "processPhylip");
327 //***************************************************************************************************************
329 void SensSpecCommand::processColumn(){
331 ifstream inputListFile;
332 m->openInputFile(listFile, inputListFile);
334 string origCutoff = "";
336 if(cutoff == -1.00) { getCutoff = 1; }
337 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
339 set<string> seqPairSet;
341 string label, seqList;
345 while(inputListFile){
348 inputListFile >> label >> numOTUs;
349 for(int i=0;i<numOTUs;i++){
351 vector<string> seqNameVector;
353 inputListFile >> seqList;
354 int seqListLength = seqList.length();
356 for(int j=0;j<seqListLength;j++){
358 if(seqList[j] == ','){
359 seqNameVector.push_back(seqName);
363 seqName += seqList[j];
366 seqNameVector.push_back(seqName);
368 numSeqs += seqNameVector.size();
370 int numSeqsInOTU = seqNameVector.size();
371 for(int j=0;j<numSeqsInOTU;j++){
372 string seqPairString = "";
373 for(int k=0;k<j;k++){
374 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
375 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
376 seqPairSet.insert(seqPairString);
380 m->gobble(inputListFile);
382 int numDists = (numSeqs * (numSeqs-1) / 2);
385 m->openInputFile(distFile, columnFile);
386 string seqNameA, seqNameB, seqPairString;
391 trueNegatives = numDists;
395 if(label != "unique"){
397 convert(label, cutoff);
398 if(hard == 0){ cutoff += (0.49 / double(precision)); }
401 origCutoff = "unique";
406 cout << label << endl;
409 columnFile >> seqNameA >> seqNameB >> distance;
410 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
411 else { seqPairString = seqNameB + '\t' + seqNameA; }
413 set<string>::iterator it = seqPairSet.find(seqPairString);
415 if(distance <= cutoff){
416 if(it != seqPairSet.end()){
418 seqPairSet.erase(it);
425 else if(it != seqPairSet.end()){
428 seqPairSet.erase(it);
431 m->gobble(columnFile);
433 falsePositives += seqPairSet.size();
435 outputStatistics(label, origCutoff);
438 catch(exception& e) {
439 m->errorOut(e, "SensSpecCommand", "processColumn");
444 //***************************************************************************************************************
446 void SensSpecCommand::setUpOutput(){
448 ofstream sensSpecFile;
449 m->openOutputFile(sensSpecFileName, sensSpecFile);
451 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
453 sensSpecFile.close();
455 catch(exception& e) {
456 m->errorOut(e, "SensSpecCommand", "setUpOutput");
461 //***************************************************************************************************************
463 void SensSpecCommand::outputStatistics(string label, string cutoff){
465 double tp = (double) truePositives;
466 double fp = (double) falsePositives;
467 double tn = (double) trueNegatives;
468 double fn = (double) falseNegatives;
472 double pPrime = tp + fp;
473 double nPrime = tn + fn;
475 double sensitivity = tp / p;
476 double specificity = tn / n;
477 double positivePredictiveValue = tp / pPrime;
478 double negativePredictiveValue = tn / nPrime;
479 double falseDiscoveryRate = fp / pPrime;
481 double accuracy = (tp + tn) / (p + n);
482 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
483 double f1Score = 2.0 * tp / (p + pPrime);
486 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
487 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
488 if(p + n == 0) { accuracy = 0; }
489 if(p + pPrime == 0) { f1Score = 0; }
490 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
491 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
493 ofstream sensSpecFile;
494 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
496 sensSpecFile << label << '\t' << cutoff << '\t';
497 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
498 sensSpecFile << setprecision(4);
499 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
500 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
502 sensSpecFile.close();
504 catch(exception& e) {
505 m->errorOut(e, "SensSpecCommand", "outputStatistics");
510 //***************************************************************************************************************