From: pschloss Date: Wed, 7 Jul 2010 19:36:36 +0000 (+0000) Subject: modified makefile and sensspeccommand files X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=37309a2552995e5f7f2e04be99b808c803472311 modified makefile and sensspeccommand files --- diff --git a/makefile b/makefile index af46fd9..33cce4d 100644 --- a/makefile +++ b/makefile @@ -17,7 +17,6 @@ CYGWIN_BUILD ?= no ifeq ($(strip $(CYGWIN_BUILD)),yes) CXXFLAGS += -mno-cygwin LDFLAGS += -mno-cygwin -#-lpsapi endif 64BIT_VERSION ?= yes @@ -35,8 +34,7 @@ ifeq ($(strip $(USEREADLINE)),yes) CXXFLAGS += -DUSE_READLINE LDFLAGS += \ -lreadline\ - -lncurses\ - -L../readline-6.0 + -lncurses endif USEMPI ?= no @@ -64,6 +62,12 @@ mothur : $(OBJECTS) install : mothur cp mothur ../Release/mothur +%.o : %.cpp %.h + $(COMPILE.cpp) $(OUTPUT_OPTION) $< +%.o : %.cpp %.hpp + $(COMPILE.cpp) $(OUTPUT_OPTION) $< + + clean : @rm -f $(OBJECTS) diff --git a/sensspeccommand.cpp b/sensspeccommand.cpp index 0416dcb..edb1d45 100644 --- a/sensspeccommand.cpp +++ b/sensspeccommand.cpp @@ -23,7 +23,7 @@ SensSpecCommand::SensSpecCommand(string option) { string temp; //valid paramters for this command - string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "outputdir", "inputdir"}; + string AlignArray[] = {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -110,16 +110,18 @@ SensSpecCommand::SensSpecCommand(string option) { // else { nameFile = temp; } // cout << "name:\t" << nameFile << endl; - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = ""; } + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; } convert(temp, cutoff); - cout << "cutoff:\t" << cutoff << endl; +// cout << cutoff << endl; -// cutoff = 0.0349; + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + convert(temp, precision); +// cout << precision << endl; lineLabel = validParameter.validFile(parameters, "label", false); if (lineLabel == "not found") { lineLabel = ""; } + sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec"; } - } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "SensSpecCommand"); @@ -156,13 +158,9 @@ int SensSpecCommand::execute(){ try{ if (abort == true) { return 0; } + setUpOutput(); if(format == "phylip") { processPhylip(); } -// else if(format == "column") { processColumn(seqMap); } - - -// string seqList; -// map seqMap; - + else if(format == "column") { processColumn(); } return 0; @@ -177,9 +175,16 @@ int SensSpecCommand::execute(){ void SensSpecCommand::processPhylip(){ try{ + //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file + ifstream inputListFile; openInputFile(listFile, inputListFile); - + + string origCutoff = ""; + bool getCutoff = 0; + if(cutoff == -1.00) { getCutoff = 1; } + else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } + string label; int numOTUs; @@ -205,112 +210,250 @@ void SensSpecCommand::processPhylip(){ } seqMap[seqName] = i; } + gobble(inputListFile); + + int lNumSeqs = seqMap.size(); + int pNumSeqs = 0; + + ifstream phylipFile; + openInputFile(distFile, phylipFile); + phylipFile >> pNumSeqs; + if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; } + + string seqName; + double distance; + vector otuIndices(lNumSeqs, -1); + + truePositives = 0; + falsePositives = 0; + trueNegatives = 0; + falseNegatives = 0; + + if(getCutoff == 1){ + if(label != "unique"){ + origCutoff = label; + convert(label, cutoff); + if(hard == 0){ cutoff += (0.49 / double(precision)); } + } + else{ + origCutoff = "unique"; + cutoff = 0.0000; + } + } + + cout << label << endl; + + for(int i=0;i> seqName; + otuIndices[i] = seqMap[seqName]; + + for(int j=0;j> distance; + + if(distance <= cutoff){ + if(otuIndices[i] == otuIndices[j]) { truePositives++; } + else { falseNegatives++; } + } + else{ + if(otuIndices[i] == otuIndices[j]) { falsePositives++; } + else { trueNegatives++; } + } + } + } + phylipFile.close(); + + outputStatistics(label, origCutoff); } inputListFile.close(); - - int lNumSeqs = seqMap.size(); - int pNumSeqs = 0; - ifstream phylipFile; - openInputFile(distFile, phylipFile); - phylipFile >> pNumSeqs; - if(pNumSeqs != lNumSeqs){ cout << "numSeq mismatch!" << endl; } + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "processPhylip"); + exit(1); + } +} + +//*************************************************************************************************************** + +void SensSpecCommand::processColumn(){ + try{ + ifstream inputListFile; + openInputFile(listFile, inputListFile); - string seqName; - double distance; - vector otuIndices(lNumSeqs, -1); - - truePositives = 0; - falsePositives = 0; - trueNegatives = 0; - falseNegatives = 0; + string origCutoff = ""; + bool getCutoff = 0; + if(cutoff == -1.00) { getCutoff = 1; } + else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } + set seqPairSet; - for(int i=0;i> seqName; - otuIndices[i] = seqMap[seqName]; + string label, seqList; + int numOTUs; + int numSeqs; + + while(inputListFile){ + numSeqs = 0; - for(int j=0;j> distance; - if(distance <= cutoff){ - if(otuIndices[i] == otuIndices[j]){ - truePositives++; + inputListFile >> label >> numOTUs; + for(int i=0;i seqNameVector; + + inputListFile >> seqList; + int seqListLength = seqList.length(); + string seqName = ""; + for(int j=0;j> seqNameA >> seqNameB >> distance; + if(seqNameA < seqNameB) { seqPairString = seqNameA + '\t' + seqNameB; } + else { seqPairString = seqNameB + '\t' + seqNameA; } + + set::iterator it = seqPairSet.find(seqPairString); + + if(distance <= cutoff){ + if(it != seqPairSet.end()){ + truePositives++; + seqPairSet.erase(it); } else{ - trueNegatives++; + falseNegatives++; } + trueNegatives--; + } + else if(it != seqPairSet.end()){ + falsePositives++; + trueNegatives--; + seqPairSet.erase(it); } + + gobble(columnFile); } + falsePositives += seqPairSet.size(); + + outputStatistics(label, origCutoff); } - phylipFile.close(); + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "processColumn"); + exit(1); + } +} + +//*************************************************************************************************************** + +void SensSpecCommand::setUpOutput(){ + try{ + ofstream sensSpecFile; + openOutputFile(sensSpecFileName, sensSpecFile); - cout << "truePositives:\t" << truePositives << endl; - cout << "trueNegatives:\t" << trueNegatives << endl; - cout << "falsePositives:\t" << falsePositives << endl; - cout << "falseNegatives:\t" << falseNegatives << endl; + sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n"; + + sensSpecFile.close(); } catch(exception& e) { - m->errorOut(e, "SensSpecCommand", "processPhylip"); + m->errorOut(e, "SensSpecCommand", "setUpOutput"); exit(1); } } //*************************************************************************************************************** -//void SensSpecCommand::processColumn(map seqMap){ -// -// truePositives = 0; -// falsePositives = 0; -// trueNegatives = numDists; -// falseNegatives = 0; -// -// ifstream columnFile; -// openInputFile(distFile, columnFile); -// -// string seqNameA, seqNameB, oldSeqNameA; -// int otuA, otuB, oldOTUA; -// double distance; -// -// while(columnFile){ -// columnFile >> seqNameA >> seqNameB >> distance; -// -// if(seqNameA == oldSeqNameA) { otuA = oldOTUA; } -// else { otuA = seqMap[seqNameA]; oldOTUA = otuA; } -// -// otuB = seqMap[seqNameB]; -// -// if(distance <= cutoff){ -// if(otuA == otuB){ -// truePositives++; -// } -// else{ -// falseNegatives++; -// } -// trueNegatives--; -// } -// else{ -// if(otuA == otuB){ -// falsePositives++; -// trueNegatives--; -// } -// } -// -// gobble(columnFile); -// } -// columnFile.close(); -// -// cout << "truePositives:\t" << truePositives << endl; -// cout << "trueNegatives:\t" << trueNegatives << endl; -// cout << "falsePositives:\t" << falsePositives << endl; -// cout << "falseNegatives:\t" << falseNegatives << endl; -//} +void SensSpecCommand::outputStatistics(string label, string cutoff){ + try{ + double tp = (double) truePositives; + double fp = (double) falsePositives; + double tn = (double) trueNegatives; + double fn = (double) falseNegatives; + + double p = tp + fn; + double n = fp + tn; + double pPrime = tp + fp; + double nPrime = tn + fn; + + double sensitivity = tp / p; + double specificity = tn / n; + double positivePredictiveValue = tp / pPrime; + double negativePredictiveValue = tn / nPrime; + double falseDiscoveryRate = fp / pPrime; + + double accuracy = (tp + tn) / (p + n); + double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime); if(p == 0 || n == 0){ matthewsCorrCoef = 0; } + double f1Score = 2.0 * tp / (p + pPrime); + + + if(p == 0) { sensitivity = 0; matthewsCorrCoef = 0; } + if(n == 0) { specificity = 0; matthewsCorrCoef = 0; } + if(p + n == 0) { accuracy = 0; } + if(p + pPrime == 0) { f1Score = 0; } + if(pPrime == 0) { positivePredictiveValue = 0; falseDiscoveryRate = 0; matthewsCorrCoef = 0; } + if(nPrime == 0) { negativePredictiveValue = 0; matthewsCorrCoef = 0; } + + ofstream sensSpecFile; + openOutputFileAppend(sensSpecFileName, sensSpecFile); + + sensSpecFile << label << '\t' << cutoff << '\t'; + sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t'; + sensSpecFile << setprecision(4); + sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t'; + sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl; + + sensSpecFile.close(); + } + catch(exception& e) { + m->errorOut(e, "SensSpecCommand", "outputStatistics"); + exit(1); + } +} //*************************************************************************************************************** diff --git a/sensspeccommand.h b/sensspeccommand.h index 52ab31e..818667a 100644 --- a/sensspeccommand.h +++ b/sensspeccommand.h @@ -24,16 +24,20 @@ public: private: void processPhylip(); -/// void processColumn(map); + void processColumn(); + void setUpOutput(); + void outputStatistics(string, string); - string listFile, distFile, nameFile, outputDir; + string listFile, distFile, nameFile, sensSpecFileName; + string outputDir; string format; -// int numSeqs, numDists; - int truePositives, falsePositives, trueNegatives, falseNegatives; + + long int truePositives, falsePositives, trueNegatives, falseNegatives; bool abort; bool hard; string lineLabel; double cutoff; + int precision; }; #endif \ No newline at end of file