5 * Created by Pat Schloss on 7/6/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "sensspeccommand.h"
12 //***************************************************************************************************************
14 SensSpecCommand::SensSpecCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
25 //valid paramters for this command
26 string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
28 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30 OptionParser parser(option);
31 map<string,string> parameters = parser.getParameters();
33 ValidParameters validParameter;
34 map<string,string>::iterator it;
36 //check to make sure all parameters are valid for command
37 for (it = parameters.begin(); it != parameters.end(); it++) {
38 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
41 //if the user changes the input directory command factory will send this info to us in the output parameter
42 string inputDir = validParameter.validFile(parameters, "inputdir", false);
43 if (inputDir == "not found"){ inputDir = ""; }
46 it = parameters.find("list");
47 //user has given a template file
48 if(it != parameters.end()){
49 path = hasPath(it->second);
50 //if the user has not given a path then, add inputdir. else leave path alone.
51 if (path == "") { parameters["list"] = inputDir + it->second; }
54 it = parameters.find("phylip");
55 //user has given a template file
56 if(it != parameters.end()){
57 path = hasPath(it->second);
58 //if the user has not given a path then, add inputdir. else leave path alone.
59 if (path == "") { parameters["phylip"] = inputDir + it->second; }
62 it = parameters.find("column");
63 //user has given a template file
64 if(it != parameters.end()){
65 path = hasPath(it->second);
66 //if the user has not given a path then, add inputdir. else leave path alone.
67 if (path == "") { parameters["column"] = inputDir + it->second; }
70 it = parameters.find("name");
71 //user has given a template file
72 if(it != parameters.end()){
73 path = hasPath(it->second);
74 //if the user has not given a path then, add inputdir. else leave path alone.
75 if (path == "") { parameters["name"] = inputDir + it->second; }
79 //check for required parameters
80 listFile = validParameter.validFile(parameters, "list", true);
81 if (listFile == "not found") { m->mothurOut("list is a required parameter for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
82 else if (listFile == "not open") { abort = true; }
84 distFile = validParameter.validFile(parameters, "column", true);
86 if(distFile == "not found") {
87 distFile = validParameter.validFile(parameters, "phylip", true);
90 if(distFile == "not found") { m->mothurOut("either column or phylip are required for the sens.spec command."); m->mothurOutEndLine(); abort = true; }
91 else if (distFile == "not open") { abort = true; }
93 //if the user changes the output directory command factory will send this info to us in the output parameter
94 outputDir = validParameter.validFile(parameters, "outputdir", false);
95 if (outputDir == "not found"){
97 outputDir += hasPath(listFile); //if user entered a file with a path then preserve it
100 //check for optional parameter and set defaults
101 // ...at some point should added some additional type checking...
102 temp = validParameter.validFile(parameters, "hard", false);
103 if (temp == "not found"){ hard = 0; }
104 else if(!isTrue(temp)) { hard = 0; }
105 else if(isTrue(temp)) { hard = 1; }
107 // temp = validParameter.validFile(parameters, "name", true);
108 // if (temp == "not found") { nameFile = ""; }
109 // else if(temp == "not open") { abort = true; }
110 // else { nameFile = temp; }
111 // cout << "name:\t" << nameFile << endl;
113 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
114 convert(temp, cutoff);
115 // cout << cutoff << endl;
117 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
118 convert(temp, precision);
119 // cout << precision << endl;
121 lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; }
123 sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
126 catch(exception& e) {
127 m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
132 //**********************************************************************************************************************
134 void SensSpecCommand::help(){
136 m->mothurOut("The sens.spec command reads a fastaFile and creates .....\n");
140 m->mothurOut("Example sens.spec(...).\n");
141 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
142 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
145 catch(exception& e) {
146 m->errorOut(e, "SensSpecCommand", "help");
151 //***************************************************************************************************************
153 SensSpecCommand::~SensSpecCommand(){ /* do nothing */ }
155 //***************************************************************************************************************
157 int SensSpecCommand::execute(){
159 if (abort == true) { return 0; }
162 if(format == "phylip") { processPhylip(); }
163 else if(format == "column") { processColumn(); }
168 catch(exception& e) {
169 m->errorOut(e, "SensSpecCommand", "execute");
174 //***************************************************************************************************************
176 void SensSpecCommand::processPhylip(){
178 //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
180 ifstream inputListFile;
181 openInputFile(listFile, inputListFile);
183 string origCutoff = "";
185 if(cutoff == -1.00) { getCutoff = 1; }
186 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
191 map<string, int> seqMap;
194 while(inputListFile){
195 inputListFile >> label >> numOTUs;
196 for(int i=0;i<numOTUs;i++){
197 inputListFile >> seqList;
198 int seqListLength = seqList.length();
200 for(int j=0;j<seqListLength;j++){
202 if(seqList[j] == ','){
207 seqName += seqList[j];
213 gobble(inputListFile);
215 int lNumSeqs = seqMap.size();
219 openInputFile(distFile, phylipFile);
220 phylipFile >> pNumSeqs;
221 if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; }
225 vector<int> otuIndices(lNumSeqs, -1);
233 if(label != "unique"){
235 convert(label, cutoff);
236 if(hard == 0){ cutoff += (0.49 / double(precision)); }
239 origCutoff = "unique";
244 cout << label << endl;
246 for(int i=0;i<lNumSeqs;i++){
247 phylipFile >> seqName;
248 otuIndices[i] = seqMap[seqName];
250 for(int j=0;j<i;j++){
251 phylipFile >> distance;
253 if(distance <= cutoff){
254 if(otuIndices[i] == otuIndices[j]) { truePositives++; }
255 else { falseNegatives++; }
258 if(otuIndices[i] == otuIndices[j]) { falsePositives++; }
259 else { trueNegatives++; }
265 outputStatistics(label, origCutoff);
267 inputListFile.close();
270 catch(exception& e) {
271 m->errorOut(e, "SensSpecCommand", "processPhylip");
276 //***************************************************************************************************************
278 void SensSpecCommand::processColumn(){
280 ifstream inputListFile;
281 openInputFile(listFile, inputListFile);
283 string origCutoff = "";
285 if(cutoff == -1.00) { getCutoff = 1; }
286 else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
288 set<string> seqPairSet;
290 string label, seqList;
294 while(inputListFile){
297 inputListFile >> label >> numOTUs;
298 for(int i=0;i<numOTUs;i++){
300 vector<string> seqNameVector;
302 inputListFile >> seqList;
303 int seqListLength = seqList.length();
305 for(int j=0;j<seqListLength;j++){
307 if(seqList[j] == ','){
308 seqNameVector.push_back(seqName);
312 seqName += seqList[j];
315 seqNameVector.push_back(seqName);
317 numSeqs += seqNameVector.size();
319 int numSeqsInOTU = seqNameVector.size();
320 for(int j=0;j<numSeqsInOTU;j++){
321 string seqPairString = "";
322 for(int k=0;k<j;k++){
323 if(seqNameVector[j] < seqNameVector[k]) { seqPairString = seqNameVector[j] + '\t' + seqNameVector[k]; }
324 else { seqPairString = seqNameVector[k] + '\t' + seqNameVector[j]; }
325 seqPairSet.insert(seqPairString);
329 gobble(inputListFile);
331 int numDists = (numSeqs * (numSeqs-1) / 2);
334 openInputFile(distFile, columnFile);
335 string seqNameA, seqNameB, seqPairString;
340 trueNegatives = numDists;
344 if(label != "unique"){
346 convert(label, cutoff);
347 if(hard == 0){ cutoff += (0.49 / double(precision)); }
350 origCutoff = "unique";
355 cout << label << endl;
358 columnFile >> seqNameA >> seqNameB >> distance;
359 if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; }
360 else { seqPairString = seqNameB + '\t' + seqNameA; }
362 set<string>::iterator it = seqPairSet.find(seqPairString);
364 if(distance <= cutoff){
365 if(it != seqPairSet.end()){
367 seqPairSet.erase(it);
374 else if(it != seqPairSet.end()){
377 seqPairSet.erase(it);
382 falsePositives += seqPairSet.size();
384 outputStatistics(label, origCutoff);
387 catch(exception& e) {
388 m->errorOut(e, "SensSpecCommand", "processColumn");
393 //***************************************************************************************************************
395 void SensSpecCommand::setUpOutput(){
397 ofstream sensSpecFile;
398 openOutputFile(sensSpecFileName, sensSpecFile);
400 sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
402 sensSpecFile.close();
404 catch(exception& e) {
405 m->errorOut(e, "SensSpecCommand", "setUpOutput");
410 //***************************************************************************************************************
412 void SensSpecCommand::outputStatistics(string label, string cutoff){
414 double tp = (double) truePositives;
415 double fp = (double) falsePositives;
416 double tn = (double) trueNegatives;
417 double fn = (double) falseNegatives;
421 double pPrime = tp + fp;
422 double nPrime = tn + fn;
424 double sensitivity = tp / p;
425 double specificity = tn / n;
426 double positivePredictiveValue = tp / pPrime;
427 double negativePredictiveValue = tn / nPrime;
428 double falseDiscoveryRate = fp / pPrime;
430 double accuracy = (tp + tn) / (p + n);
431 double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; }
432 double f1Score = 2.0 * tp / (p + pPrime);
435 if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; }
436 if(n == 0) { specificity = 0; matthewsCorrCoef = 0; }
437 if(p + n == 0) { accuracy = 0; }
438 if(p + pPrime == 0) { f1Score = 0; }
439 if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; }
440 if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; }
442 ofstream sensSpecFile;
443 openOutputFileAppend(sensSpecFileName, sensSpecFile);
445 sensSpecFile << label << '\t' << cutoff << '\t';
446 sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
447 sensSpecFile << setprecision(4);
448 sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
449 sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
451 sensSpecFile.close();
453 catch(exception& e) {
454 m->errorOut(e, "SensSpecCommand", "outputStatistics");
459 //***************************************************************************************************************