]> git.donarmstrong.com Git - mothur.git/commitdiff
modified makefile and sensspeccommand files
authorpschloss <pschloss>
Wed, 7 Jul 2010 19:36:36 +0000 (19:36 +0000)
committerpschloss <pschloss>
Wed, 7 Jul 2010 19:36:36 +0000 (19:36 +0000)
makefile
sensspeccommand.cpp
sensspeccommand.h

index af46fd97ab6bcd26392de438592e9d03678f3948..33cce4d996c4e9a586be09fa9096366658d1cb0f 100644 (file)
--- a/makefile
+++ b/makefile
@@ -17,7 +17,6 @@ CYGWIN_BUILD ?= no
 ifeq  ($(strip $(CYGWIN_BUILD)),yes)\r
     CXXFLAGS += -mno-cygwin\r
     LDFLAGS += -mno-cygwin \r
-#-lpsapi\r
 endif\r
 \r
 64BIT_VERSION ?= yes\r
@@ -35,8 +34,7 @@ ifeq  ($(strip $(USEREADLINE)),yes)
     CXXFLAGS += -DUSE_READLINE\r
     LDFLAGS += \\r
       -lreadline\\r
-      -lncurses\\r
-      -L../readline-6.0\r
+      -lncurses\r
 endif\r
 \r
 USEMPI ?= no\r
@@ -64,6 +62,12 @@ mothur : $(OBJECTS)
 install : mothur\r
        cp mothur ../Release/mothur\r
 \r
+%.o : %.cpp %.h\r
+       $(COMPILE.cpp) $(OUTPUT_OPTION) $<\r
+%.o : %.cpp %.hpp\r
+       $(COMPILE.cpp) $(OUTPUT_OPTION) $<\r
+\r
+\r
 clean :\r
        @rm -f $(OBJECTS)\r
 \r
index 0416dcb3954442e78a527f596dbca223d6fed332..edb1d452746191a4005e6dab0e1f70876e8b1216 100644 (file)
@@ -23,7 +23,7 @@ SensSpecCommand::SensSpecCommand(string option)  {
                        string temp;
 
                        //valid paramters for this command
-                       string AlignArray[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "outputdir", "inputdir"};
+                       string AlignArray[] =  {"list", "phylip", "column", "name", "hard", "label", "cutoff", "precision", "outputdir", "inputdir"};
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
@@ -110,16 +110,18 @@ SensSpecCommand::SensSpecCommand(string option)  {
 //                     else                                            {       nameFile = temp;        }
 //                     cout << "name:\t" << nameFile << endl;
 
-                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = ""; }
+                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
                        convert(temp, cutoff);  
-                       cout << "cutoff:\t" << cutoff << endl;
+//                     cout << cutoff << endl;
                        
-//                     cutoff = 0.0349;
+                       temp = validParameter.validFile(parameters, "precision", false);        if (temp == "not found") { temp = "100"; }
+                       convert(temp, precision);  
+//                     cout << precision << endl;
                        
                        lineLabel = validParameter.validFile(parameters, "label", false);       if (lineLabel == "not found") { lineLabel = ""; }
                        
+                       sensSpecFileName = listFile.substr(0,listFile.find_last_of('.')) + ".sensspec";
                }
-               
        }
        catch(exception& e) {
                m->errorOut(e, "SensSpecCommand", "SensSpecCommand");
@@ -156,13 +158,9 @@ int SensSpecCommand::execute(){
        try{
                if (abort == true) { return 0; }
 
+               setUpOutput();
                if(format == "phylip")          {       processPhylip();        }
-//             else if(format == "column")     {       processColumn(seqMap);  }
-               
-               
-//             string seqList;
-//             map<string, int> seqMap;
-               
+               else if(format == "column")     {       processColumn();        }
                
                
                return 0;       
@@ -177,9 +175,16 @@ int SensSpecCommand::execute(){
 
 void SensSpecCommand::processPhylip(){
        try{
+               //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
+               
                ifstream inputListFile;
                openInputFile(listFile, inputListFile);
-
+               
+               string origCutoff = "";
+               bool getCutoff = 0;
+               if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
+               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
+               
                string label;
                int numOTUs;
 
@@ -205,112 +210,250 @@ void SensSpecCommand::processPhylip(){
                                }
                                seqMap[seqName] = i;
                        }
+                       gobble(inputListFile);
+               
+                       int lNumSeqs = seqMap.size();
+                       int pNumSeqs = 0;
+
+                       ifstream phylipFile;
+                       openInputFile(distFile, phylipFile);
+                       phylipFile >> pNumSeqs;
+                       if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
+                       
+                       string seqName;
+                       double distance;
+                       vector<int> otuIndices(lNumSeqs, -1);
+                               
+                       truePositives = 0;
+                       falsePositives = 0;
+                       trueNegatives = 0;
+                       falseNegatives = 0;
+                       
+                       if(getCutoff == 1){
+                               if(label != "unique"){
+                                       origCutoff = label;
+                                       convert(label, cutoff);
+                                       if(hard == 0){  cutoff += (0.49 / double(precision));   }               
+                               }
+                               else{
+                                       origCutoff = "unique";
+                                       cutoff = 0.0000;
+                               }
+                       }
+                                  
+                       cout << label << endl;
+                       
+                       for(int i=0;i<lNumSeqs;i++){
+                               phylipFile >> seqName;
+                               otuIndices[i] = seqMap[seqName];
+                               
+                               for(int j=0;j<i;j++){
+                                       phylipFile >> distance;
+                                       
+                                       if(distance <= cutoff){
+                                               if(otuIndices[i] == otuIndices[j])      {       truePositives++;        }
+                                               else                                                            {       falseNegatives++;       }
+                                       }
+                                       else{
+                                               if(otuIndices[i] == otuIndices[j])      {       falsePositives++;       }
+                                               else                                                            {       trueNegatives++;        }
+                                       }
+                               }
+                       }
+                       phylipFile.close();
+                       
+                       outputStatistics(label, origCutoff);
                }
                inputListFile.close();
-               
-               int lNumSeqs = seqMap.size();
-               int pNumSeqs = 0;
 
-               ifstream phylipFile;
-               openInputFile(distFile, phylipFile);
-               phylipFile >> pNumSeqs;
-               if(pNumSeqs != lNumSeqs){       cout << "numSeq mismatch!" << endl;     }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "processPhylip");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+void SensSpecCommand::processColumn(){
+       try{            
+               ifstream inputListFile;
+               openInputFile(listFile, inputListFile);
                
-               string seqName;
-               double distance;
-               vector<int> otuIndices(lNumSeqs, -1);
-                       
-               truePositives = 0;
-               falsePositives = 0;
-               trueNegatives = 0;
-               falseNegatives = 0;
+               string origCutoff = "";
+               bool getCutoff = 0;
+               if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
+               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
                
+               set<string> seqPairSet;
                
-               for(int i=0;i<lNumSeqs;i++){
-                       phylipFile >> seqName;
-                       otuIndices[i] = seqMap[seqName];
+               string label, seqList;
+               int numOTUs;
+               int numSeqs;
+               
+               while(inputListFile){
+                       numSeqs = 0;
                        
-                       for(int j=0;j<i;j++){
-                               phylipFile >> distance;
-                               if(distance <= cutoff){
-                                       if(otuIndices[i] == otuIndices[j]){
-                                               truePositives++;
+                       inputListFile >> label >> numOTUs;
+                       for(int i=0;i<numOTUs;i++){
+                               
+                               vector<string> seqNameVector;
+                               
+                               inputListFile >> seqList;
+                               int seqListLength = seqList.length();
+                               string seqName = "";
+                               for(int j=0;j<seqListLength;j++){
+                                       
+                                       if(seqList[j] == ','){
+                                               seqNameVector.push_back(seqName);
+                                               seqName = "";
                                        }
                                        else{
-                                               falseNegatives++;
+                                               seqName += seqList[j];
                                        }
                                }
+                               seqNameVector.push_back(seqName);
+
+                               numSeqs += seqNameVector.size();
+                               
+                               int numSeqsInOTU = seqNameVector.size();
+                               for(int j=0;j<numSeqsInOTU;j++){
+                                       string seqPairString = "";                              
+                                       for(int k=0;k<j;k++){
+                                               if(seqNameVector[j] < seqNameVector[k]) {       seqPairString = seqNameVector[j] + '\t' + seqNameVector[k];     }
+                                               else                                                                    {       seqPairString = seqNameVector[k] + '\t' + seqNameVector[j];     }
+                                               seqPairSet.insert(seqPairString);
+                                       }
+                               }
+                       }
+                       gobble(inputListFile);
+                       
+                       int numDists = (numSeqs * (numSeqs-1) / 2);
+
+                       ifstream columnFile;
+                       openInputFile(distFile, columnFile);
+                       string seqNameA, seqNameB, seqPairString;
+                       double distance;
+                       
+                       truePositives = 0;
+                       falsePositives = 0;
+                       trueNegatives = numDists;
+                       falseNegatives = 0;
+                       
+                       if(getCutoff == 1){
+                               if(label != "unique"){
+                                       origCutoff = label;
+                                       convert(label, cutoff);
+                                       if(hard == 0){  cutoff += (0.49 / double(precision));   }               
+                               }
                                else{
-                                       if(otuIndices[i] == otuIndices[j]){
-                                               falsePositives++;
+                                       origCutoff = "unique";
+                                       cutoff = 0.0000;
+                               }
+                       }
+                       
+                       cout << label << endl;
+                       
+                       while(columnFile){
+                               columnFile >> seqNameA >> seqNameB >> distance;
+                               if(seqNameA < seqNameB) {       seqPairString = seqNameA + '\t' + seqNameB;     }
+                               else                                    {       seqPairString = seqNameB + '\t' + seqNameA;     }
+
+                               set<string>::iterator it = seqPairSet.find(seqPairString);
+                       
+                               if(distance <= cutoff){
+                                       if(it != seqPairSet.end()){
+                                               truePositives++;
+                                               seqPairSet.erase(it);   
                                        }
                                        else{
-                                               trueNegatives++;
+                                               falseNegatives++;
                                        }
+                                       trueNegatives--;
+                               }
+                               else if(it != seqPairSet.end()){        
+                                       falsePositives++;
+                                       trueNegatives--;
+                                       seqPairSet.erase(it);   
                                }
+                               
+                               gobble(columnFile);
                        }
+                       falsePositives += seqPairSet.size();
+                       
+                       outputStatistics(label, origCutoff);
                }
-               phylipFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "processColumn");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+void SensSpecCommand::setUpOutput(){
+       try{            
+               ofstream sensSpecFile;
+               openOutputFile(sensSpecFileName, sensSpecFile);
                
-               cout << "truePositives:\t"      << truePositives        << endl;
-               cout << "trueNegatives:\t"      << trueNegatives        << endl;
-               cout << "falsePositives:\t"     << falsePositives       << endl;
-               cout << "falseNegatives:\t"     << falseNegatives       << endl;
+               sensSpecFile << "label\tcutoff\ttp\ttn\tfp\tfn\tsensitivity\tspecificity\tppv\tnpv\tfdr\taccuracy\tmcc\tf1score\n";
+
+               sensSpecFile.close();
        }
        catch(exception& e) {
-               m->errorOut(e, "SensSpecCommand", "processPhylip");
+               m->errorOut(e, "SensSpecCommand", "setUpOutput");
                exit(1);
        }
 }
 
 //***************************************************************************************************************
 
-//void SensSpecCommand::processColumn(map<string, int> seqMap){
-//     
-//     truePositives = 0;
-//     falsePositives = 0;
-//     trueNegatives = numDists;
-//     falseNegatives = 0;
-//     
-//     ifstream columnFile;
-//     openInputFile(distFile, columnFile);
-//     
-//     string seqNameA, seqNameB, oldSeqNameA;
-//     int otuA, otuB, oldOTUA;
-//     double distance;
-//
-//     while(columnFile){
-//             columnFile >> seqNameA >> seqNameB >> distance;
-//
-//             if(seqNameA == oldSeqNameA)     {       otuA = oldOTUA; }
-//             else                                            {       otuA = seqMap[seqNameA];        oldOTUA = otuA; }
-//             
-//             otuB = seqMap[seqNameB];
-//             
-//             if(distance <= cutoff){
-//                     if(otuA == otuB){
-//                             truePositives++;
-//                     }
-//                     else{
-//                             falseNegatives++;
-//                     }
-//                     trueNegatives--;
-//             }
-//             else{
-//                     if(otuA == otuB){
-//                             falsePositives++;
-//                             trueNegatives--;
-//                     }
-//             }
-//     
-//             gobble(columnFile);
-//     }
-//     columnFile.close();
-//     
-//     cout << "truePositives:\t"      << truePositives        << endl;
-//     cout << "trueNegatives:\t"      << trueNegatives        << endl;
-//     cout << "falsePositives:\t"     << falsePositives       << endl;
-//     cout << "falseNegatives:\t"     << falseNegatives       << endl;
-//}
+void SensSpecCommand::outputStatistics(string label, string cutoff){
+       try{            
+               double tp = (double) truePositives;
+               double fp = (double) falsePositives;
+               double tn = (double) trueNegatives;
+               double fn = (double) falseNegatives;
+               
+               double p = tp + fn;
+               double n = fp + tn;
+               double pPrime = tp + fp;
+               double nPrime = tn + fn;
+               
+               double sensitivity = tp / p;    
+               double specificity = tn / n;
+               double positivePredictiveValue = tp / pPrime;
+               double negativePredictiveValue = tn / nPrime;
+               double falseDiscoveryRate = fp / pPrime;
+               
+               double accuracy = (tp + tn) / (p + n);
+               double matthewsCorrCoef = (tp * tn - fp * fn) / sqrt(p * n * pPrime * nPrime);  if(p == 0 || n == 0){   matthewsCorrCoef = 0;   }
+               double f1Score = 2.0 * tp / (p + pPrime);
+               
+               
+               if(p == 0)                      {       sensitivity = 0;        matthewsCorrCoef = 0;   }
+               if(n == 0)                      {       specificity = 0;        matthewsCorrCoef = 0;   }
+               if(p + n == 0)          {       accuracy = 0;                                                           }
+               if(p + pPrime == 0)     {       f1Score = 0;                                                            }
+               if(pPrime == 0)         {       positivePredictiveValue = 0;    falseDiscoveryRate = 0; matthewsCorrCoef = 0;   }
+               if(nPrime == 0)         {       negativePredictiveValue = 0;    matthewsCorrCoef = 0;                                                   }
+               
+               ofstream sensSpecFile;
+               openOutputFileAppend(sensSpecFileName, sensSpecFile);
+               
+               sensSpecFile << label << '\t' << cutoff << '\t';
+               sensSpecFile << truePositives << '\t' << trueNegatives << '\t' << falsePositives << '\t' << falseNegatives << '\t';
+               sensSpecFile << setprecision(4);
+               sensSpecFile << sensitivity << '\t' << specificity << '\t' << positivePredictiveValue << '\t' << negativePredictiveValue << '\t';
+               sensSpecFile << falseDiscoveryRate << '\t' << accuracy << '\t' << matthewsCorrCoef << '\t' << f1Score << endl;
+               
+               sensSpecFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "outputStatistics");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************
index 52ab31ea6c96c88edb1b9b360cac7f9e38979a4e..818667a1c52729a2610564334d13225472eaac39 100644 (file)
@@ -24,16 +24,20 @@ public:
        
 private:
        void processPhylip();
-///    void processColumn(map<string, int>);
+       void processColumn();
+       void setUpOutput();
+       void outputStatistics(string, string);
        
-       string listFile, distFile, nameFile, outputDir;
+       string listFile, distFile, nameFile, sensSpecFileName;
+       string outputDir;
        string format;
-//     int numSeqs, numDists;
-       int truePositives, falsePositives, trueNegatives, falseNegatives;
+
+       long int truePositives, falsePositives, trueNegatives, falseNegatives;
        bool abort;
        bool hard;
        string lineLabel;
        double cutoff;
+       int precision;
 };
 
 #endif
\ No newline at end of file