5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::getValidParameters(){
15 string Array[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "SensSpecCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 SensSpecCommand::SensSpecCommand(){
27 abort = true; calledHelp = true;
28 vector<string> tempOutNames;
29 outputTypes["sensspec"] = tempOutNames;
32 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
36 //**********************************************************************************************************************
37 vector<string> SensSpecCommand::getRequiredParameters(){
39 string Array[] = {"list","phylip","column","or"};
40 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44 m->errorOut(e, "SensSpecCommand", "getRequiredParameters");
48 //**********************************************************************************************************************
49 vector<string> SensSpecCommand::getRequiredFiles(){
51 vector<string> myArray;
55 m->errorOut(e, "SensSpecCommand", "getRequiredFiles");
59 //***************************************************************************************************************
61 SensSpecCommand::SensSpecCommand(string option) {
64 abort = false; calledHelp = false;
66 //allow user to run help
67 if(option == "help") { help(); abort = true; calledHelp = true; }
72 //valid paramters for this command
73 string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
75 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
81 map<string,string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
88 //initialize outputTypes
89 vector<string> tempOutNames;
90 outputTypes["sensspec"] = tempOutNames;
92 //if the user changes the input directory command factory will send this info to us in the output parameter
93 string inputDir = validParameter.validFile(parameters, "inputdir", false);
94 if (inputDir == "not found"){ inputDir = ""; }
97 it = parameters.find("list");
98 //user has given a template file
99 if(it != parameters.end()){
100 path = m->hasPath(it->second);
101 //if the user has not given a path then, add inputdir. else leave path alone.
102 if (path == "") { parameters["list"] = inputDir + it->second; }
105 it = parameters.find("phylip");
106 //user has given a template file
107 if(it != parameters.end()){
108 path = m->hasPath(it->second);
109 //if the user has not given a path then, add inputdir. else leave path alone.
110 if (path == "") { parameters["phylip"] = inputDir + it->second; }
113 it = parameters.find("column");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["column"] = inputDir + it->second; }
121 it = parameters.find("name");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["name"] = inputDir + it->second; }
130 //check for required parameters
131 listFile = validParameter.validFile(parameters, "list", true);
132 if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
133 else if (listFile == "not open") { abort = true; }
135 distFile = validParameter.validFile(parameters, "column", true);
137 if(distFile == "not found") {
138 distFile = validParameter.validFile(parameters, "phylip", true);
141 if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
142 else if (distFile == "not open") { abort = true; }
144 //if the user changes the output directory command factory will send this info to us in the output parameter
145 outputDir = validParameter.validFile(parameters, "outputdir", false);
146 if (outputDir == "not found"){
148 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
151 //check for optional parameter and set defaults
152 // ...at some point should added some additional type checking...
153 temp = validParameter.validFile(parameters, "hard", false);
154 if (temp == "not found"){ hard = 0; }
155 else if(!m->isTrue(temp)) { hard = 0; }
156 else if(m->isTrue(temp)) { hard = 1; }
158 // temp = validParameter.validFile(parameters, "name", true);
159 // if (temp == "not found") { nameFile = ""; }
160 // else if(temp == "not open") { abort = true; }
161 // else { nameFile = temp; }
162 // cout << "name:\t" << nameFile << endl;
164 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
165 convert(temp, cutoff);
166 // cout << cutoff << endl;
168 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
169 convert(temp, precision);
170 // cout << precision << endl;
172 lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; }
174 sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
177 catch(exception& e) {
178 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
183 //**********************************************************************************************************************
185 void SensSpecCommand::help(){
187 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
191 m->mothurOut("Example sens.spec(...).\n");
192 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
193 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
196 catch(exception& e) {
197 m->errorOut(e, "SensSpecCommand", "help");
202 //***************************************************************************************************************
204 SensSpecCommand::~SensSpecCommand(){ /* do nothing */ }
206 //***************************************************************************************************************
208 int SensSpecCommand::execute(){
210 if (abort == true) { if (calledHelp) { return 0; } return 2; }
213 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
214 if(format == "phylip") { processPhylip(); }
215 else if(format == "column") { processColumn(); }
220 catch(exception& e) {
221 m->errorOut(e, "SensSpecCommand", "execute");
226 //***************************************************************************************************************
228 void SensSpecCommand::processPhylip(){
230 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
232 ifstream inputListFile;
233 m->openInputFile(listFile, inputListFile);
235 string origCutoff = "";
237 if(cutoff == -1.00) { getCutoff = 1; }
238 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
243 map<string, int> seqMap;
246 while(inputListFile){
247 inputListFile >> label >> numOTUs;
248 for(int i=0;i<numOTUs;i++){
249 inputListFile >> seqList;
250 int seqListLength = seqList.length();
252 for(int j=0;j<seqListLength;j++){
254 if(seqList[j] == ','){
259 seqName += seqList[j];
265 m->gobble(inputListFile);
267 int lNumSeqs = seqMap.size();
271 m->openInputFile(distFile, phylipFile);
272 phylipFile >> pNumSeqs;
273 if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; }
277 vector<int> otuIndices(lNumSeqs, -1);
285 if(label != "unique"){
287 convert(label, cutoff);
288 if(hard == 0){ cutoff += (0.49 / double(precision)); }
291 origCutoff = "unique";
296 cout << label << endl;
298 for(int i=0;i<lNumSeqs;i++){
299 phylipFile >> seqName;
300 otuIndices[i] = seqMap[seqName];
302 for(int j=0;j<i;j++){
303 phylipFile >> distance;
305 if(distance <= cutoff){
306 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
307 else { falseNegatives++; }
310 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
311 else { trueNegatives++; }
317 outputStatistics(label, origCutoff);
319 inputListFile.close();
322 catch(exception& e) {
323 m->errorOut(e, "SensSpecCommand", "processPhylip");
328 //***************************************************************************************************************
330 void SensSpecCommand::processColumn(){
332 ifstream inputListFile;
333 m->openInputFile(listFile, inputListFile);
335 string origCutoff = "";
337 if(cutoff == -1.00) { getCutoff = 1; }
338 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
340 set<string> seqPairSet;
342 string label, seqList;
346 while(inputListFile){
349 inputListFile >> label >> numOTUs;
350 for(int i=0;i<numOTUs;i++){
352 vector<string> seqNameVector;
354 inputListFile >> seqList;
355 int seqListLength = seqList.length();
357 for(int j=0;j<seqListLength;j++){
359 if(seqList[j] == ','){
360 seqNameVector.push_back(seqName);
364 seqName += seqList[j];
367 seqNameVector.push_back(seqName);
369 numSeqs += seqNameVector.size();
371 int numSeqsInOTU = seqNameVector.size();
372 for(int j=0;j<numSeqsInOTU;j++){
373 string seqPairString = "";
374 for(int k=0;k<j;k++){
375 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
376 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
377 seqPairSet.insert(seqPairString);
381 m->gobble(inputListFile);
383 int numDists = (numSeqs * (numSeqs-1) / 2);
386 m->openInputFile(distFile, columnFile);
387 string seqNameA, seqNameB, seqPairString;
392 trueNegatives = numDists;
396 if(label != "unique"){
398 convert(label, cutoff);
399 if(hard == 0){ cutoff += (0.49 / double(precision)); }
402 origCutoff = "unique";
407 cout << label << endl;
410 columnFile >> seqNameA >> seqNameB >> distance;
411 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
412 else { seqPairString = seqNameB + '\t' + seqNameA; }
414 set<string>::iterator it = seqPairSet.find(seqPairString);
416 if(distance <= cutoff){
417 if(it != seqPairSet.end()){
419 seqPairSet.erase(it);
426 else if(it != seqPairSet.end()){
429 seqPairSet.erase(it);
432 m->gobble(columnFile);
434 falsePositives += seqPairSet.size();
436 outputStatistics(label, origCutoff);
439 catch(exception& e) {
440 m->errorOut(e, "SensSpecCommand", "processColumn");
445 //***************************************************************************************************************
447 void SensSpecCommand::setUpOutput(){
449 ofstream sensSpecFile;
450 m->openOutputFile(sensSpecFileName, sensSpecFile);
452 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
454 sensSpecFile.close();
456 catch(exception& e) {
457 m->errorOut(e, "SensSpecCommand", "setUpOutput");
462 //***************************************************************************************************************
464 void SensSpecCommand::outputStatistics(string label, string cutoff){
466 double tp = (double) truePositives;
467 double fp = (double) falsePositives;
468 double tn = (double) trueNegatives;
469 double fn = (double) falseNegatives;
473 double pPrime = tp + fp;
474 double nPrime = tn + fn;
476 double sensitivity = tp / p;
477 double specificity = tn / n;
478 double positivePredictiveValue = tp / pPrime;
479 double negativePredictiveValue = tn / nPrime;
480 double falseDiscoveryRate = fp / pPrime;
482 double accuracy = (tp + tn) / (p + n);
483 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
484 double f1Score = 2.0 * tp / (p + pPrime);
487 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
488 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
489 if(p + n == 0) { accuracy = 0; }
490 if(p + pPrime == 0) { f1Score = 0; }
491 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
492 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
494 ofstream sensSpecFile;
495 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
497 sensSpecFile << label << '\t' << cutoff << '\t';
498 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
499 sensSpecFile << setprecision(4);
500 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
501 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
503 sensSpecFile.close();
505 catch(exception& e) {
506 m->errorOut(e, "SensSpecCommand", "outputStatistics");
511 //***************************************************************************************************************