5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //**********************************************************************************************************************
13 vector<string> SensSpecCommand::getValidParameters(){
15 string Array[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "SensSpecCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 SensSpecCommand::SensSpecCommand(){
28 //initialize outputTypes
29 vector<string> tempOutNames;
30 outputTypes["sensspec"] = tempOutNames;
33 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
37 //**********************************************************************************************************************
38 vector<string> SensSpecCommand::getRequiredParameters(){
40 string Array[] = {"list","phylip","column","or"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45 m->errorOut(e, "SensSpecCommand", "getRequiredParameters");
49 //**********************************************************************************************************************
50 vector<string> SensSpecCommand::getRequiredFiles(){
52 vector<string> myArray;
56 m->errorOut(e, "SensSpecCommand", "getRequiredFiles");
60 //***************************************************************************************************************
62 SensSpecCommand::SensSpecCommand(string option) {
67 //allow user to run help
68 if(option == "help") { help(); abort = true; }
73 //valid paramters for this command
74 string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
76 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
78 OptionParser parser(option);
79 map<string,string> parameters = parser.getParameters();
81 ValidParameters validParameter;
82 map<string,string>::iterator it;
84 //check to make sure all parameters are valid for command
85 for (it = parameters.begin(); it != parameters.end(); it++) {
86 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
89 //initialize outputTypes
90 vector<string> tempOutNames;
91 outputTypes["sensspec"] = tempOutNames;
93 //if the user changes the input directory command factory will send this info to us in the output parameter
94 string inputDir = validParameter.validFile(parameters, "inputdir", false);
95 if (inputDir == "not found"){ inputDir = ""; }
98 it = parameters.find("list");
99 //user has given a template file
100 if(it != parameters.end()){
101 path = m->hasPath(it->second);
102 //if the user has not given a path then, add inputdir. else leave path alone.
103 if (path == "") { parameters["list"] = inputDir + it->second; }
106 it = parameters.find("phylip");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["phylip"] = inputDir + it->second; }
114 it = parameters.find("column");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["column"] = inputDir + it->second; }
122 it = parameters.find("name");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["name"] = inputDir + it->second; }
131 //check for required parameters
132 listFile = validParameter.validFile(parameters, "list", true);
133 if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
134 else if (listFile == "not open") { abort = true; }
136 distFile = validParameter.validFile(parameters, "column", true);
138 if(distFile == "not found") {
139 distFile = validParameter.validFile(parameters, "phylip", true);
142 if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
143 else if (distFile == "not open") { abort = true; }
145 //if the user changes the output directory command factory will send this info to us in the output parameter
146 outputDir = validParameter.validFile(parameters, "outputdir", false);
147 if (outputDir == "not found"){
149 outputDir += m->hasPath(listFile); //if user entered a file with a path then preserve it
152 //check for optional parameter and set defaults
153 // ...at some point should added some additional type checking...
154 temp = validParameter.validFile(parameters, "hard", false);
155 if (temp == "not found"){ hard = 0; }
156 else if(!m->isTrue(temp)) { hard = 0; }
157 else if(m->isTrue(temp)) { hard = 1; }
159 // temp = validParameter.validFile(parameters, "name", true);
160 // if (temp == "not found") { nameFile = ""; }
161 // else if(temp == "not open") { abort = true; }
162 // else { nameFile = temp; }
163 // cout << "name:\t" << nameFile << endl;
165 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
166 convert(temp, cutoff);
167 // cout << cutoff << endl;
169 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
170 convert(temp, precision);
171 // cout << precision << endl;
173 lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; }
175 sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
178 catch(exception& e) {
179 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
184 //**********************************************************************************************************************
186 void SensSpecCommand::help(){
188 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
192 m->mothurOut("Example sens.spec(...).\n");
193 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
194 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
197 catch(exception& e) {
198 m->errorOut(e, "SensSpecCommand", "help");
203 //***************************************************************************************************************
205 SensSpecCommand::~SensSpecCommand(){ /* do nothing */ }
207 //***************************************************************************************************************
209 int SensSpecCommand::execute(){
211 if (abort == true) { return 0; }
214 outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
215 if(format == "phylip") { processPhylip(); }
216 else if(format == "column") { processColumn(); }
221 catch(exception& e) {
222 m->errorOut(e, "SensSpecCommand", "execute");
227 //***************************************************************************************************************
229 void SensSpecCommand::processPhylip(){
231 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
233 ifstream inputListFile;
234 m->openInputFile(listFile, inputListFile);
236 string origCutoff = "";
238 if(cutoff == -1.00) { getCutoff = 1; }
239 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
244 map<string, int> seqMap;
247 while(inputListFile){
248 inputListFile >> label >> numOTUs;
249 for(int i=0;i<numOTUs;i++){
250 inputListFile >> seqList;
251 int seqListLength = seqList.length();
253 for(int j=0;j<seqListLength;j++){
255 if(seqList[j] == ','){
260 seqName += seqList[j];
266 m->gobble(inputListFile);
268 int lNumSeqs = seqMap.size();
272 m->openInputFile(distFile, phylipFile);
273 phylipFile >> pNumSeqs;
274 if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; }
278 vector<int> otuIndices(lNumSeqs, -1);
286 if(label != "unique"){
288 convert(label, cutoff);
289 if(hard == 0){ cutoff += (0.49 / double(precision)); }
292 origCutoff = "unique";
297 cout << label << endl;
299 for(int i=0;i<lNumSeqs;i++){
300 phylipFile >> seqName;
301 otuIndices[i] = seqMap[seqName];
303 for(int j=0;j<i;j++){
304 phylipFile >> distance;
306 if(distance <= cutoff){
307 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
308 else { falseNegatives++; }
311 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
312 else { trueNegatives++; }
318 outputStatistics(label, origCutoff);
320 inputListFile.close();
323 catch(exception& e) {
324 m->errorOut(e, "SensSpecCommand", "processPhylip");
329 //***************************************************************************************************************
331 void SensSpecCommand::processColumn(){
333 ifstream inputListFile;
334 m->openInputFile(listFile, inputListFile);
336 string origCutoff = "";
338 if(cutoff == -1.00) { getCutoff = 1; }
339 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
341 set<string> seqPairSet;
343 string label, seqList;
347 while(inputListFile){
350 inputListFile >> label >> numOTUs;
351 for(int i=0;i<numOTUs;i++){
353 vector<string> seqNameVector;
355 inputListFile >> seqList;
356 int seqListLength = seqList.length();
358 for(int j=0;j<seqListLength;j++){
360 if(seqList[j] == ','){
361 seqNameVector.push_back(seqName);
365 seqName += seqList[j];
368 seqNameVector.push_back(seqName);
370 numSeqs += seqNameVector.size();
372 int numSeqsInOTU = seqNameVector.size();
373 for(int j=0;j<numSeqsInOTU;j++){
374 string seqPairString = "";
375 for(int k=0;k<j;k++){
376 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
377 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
378 seqPairSet.insert(seqPairString);
382 m->gobble(inputListFile);
384 int numDists = (numSeqs * (numSeqs-1) / 2);
387 m->openInputFile(distFile, columnFile);
388 string seqNameA, seqNameB, seqPairString;
393 trueNegatives = numDists;
397 if(label != "unique"){
399 convert(label, cutoff);
400 if(hard == 0){ cutoff += (0.49 / double(precision)); }
403 origCutoff = "unique";
408 cout << label << endl;
411 columnFile >> seqNameA >> seqNameB >> distance;
412 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
413 else { seqPairString = seqNameB + '\t' + seqNameA; }
415 set<string>::iterator it = seqPairSet.find(seqPairString);
417 if(distance <= cutoff){
418 if(it != seqPairSet.end()){
420 seqPairSet.erase(it);
427 else if(it != seqPairSet.end()){
430 seqPairSet.erase(it);
433 m->gobble(columnFile);
435 falsePositives += seqPairSet.size();
437 outputStatistics(label, origCutoff);
440 catch(exception& e) {
441 m->errorOut(e, "SensSpecCommand", "processColumn");
446 //***************************************************************************************************************
448 void SensSpecCommand::setUpOutput(){
450 ofstream sensSpecFile;
451 m->openOutputFile(sensSpecFileName, sensSpecFile);
453 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
455 sensSpecFile.close();
457 catch(exception& e) {
458 m->errorOut(e, "SensSpecCommand", "setUpOutput");
463 //***************************************************************************************************************
465 void SensSpecCommand::outputStatistics(string label, string cutoff){
467 double tp = (double) truePositives;
468 double fp = (double) falsePositives;
469 double tn = (double) trueNegatives;
470 double fn = (double) falseNegatives;
474 double pPrime = tp + fp;
475 double nPrime = tn + fn;
477 double sensitivity = tp / p;
478 double specificity = tn / n;
479 double positivePredictiveValue = tp / pPrime;
480 double negativePredictiveValue = tn / nPrime;
481 double falseDiscoveryRate = fp / pPrime;
483 double accuracy = (tp + tn) / (p + n);
484 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
485 double f1Score = 2.0 * tp / (p + pPrime);
488 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
489 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
490 if(p + n == 0) { accuracy = 0; }
491 if(p + pPrime == 0) { f1Score = 0; }
492 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
493 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
495 ofstream sensSpecFile;
496 m->openOutputFileAppend(sensSpecFileName, sensSpecFile);
498 sensSpecFile << label << '\t' << cutoff << '\t';
499 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
500 sensSpecFile << setprecision(4);
501 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
502 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
504 sensSpecFile.close();
506 catch(exception& e) {
507 m->errorOut(e, "SensSpecCommand", "outputStatistics");
512 //***************************************************************************************************************