]> git.donarmstrong.com Git - mothur.git/blobdiff - sensspeccommand.cpp
added modify names parameter to set.dir
[mothur.git] / sensspeccommand.cpp
index dfb89b927c2de8dd34dcda801fae80d93281cc63..12786ca41c848d967ffda72e2d06f382881d8eed 100644 (file)
@@ -211,14 +211,25 @@ SensSpecCommand::SensSpecCommand(string option)  {
 int SensSpecCommand::execute(){
        try{
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-
+        
+        int startTime = time(NULL);
+        
+        //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
+        string newListFile = preProcessList();
+        if (newListFile != "") { listFile = newListFile; }
+        
                setUpOutput();
                outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
                if(format == "phylip")          {       processPhylip();        }
                else if(format == "column")     {       processColumn();        }
                
+        //remove temp file if created
+        if (newListFile != "") { m->mothurRemove(newListFile); }
+        
                if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
-               
+        
+        m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                m->mothurOut(sensSpecFileName); m->mothurOutEndLine();  
@@ -232,16 +243,49 @@ int SensSpecCommand::execute(){
                exit(1);
        }
 }
+//***************************************************************************************************************
+bool SensSpecCommand::testFile(){
+       try{
+        ifstream fileHandle;
+        m->openInputFile(phylipfile, fileHandle);
+        
+        bool square = false;
+        string numTest, name;
+        fileHandle >> numTest >> name;
+
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+
+        char d;
+        while((d=fileHandle.get()) != EOF){
+            if(isalnum(d)){
+                square = true;
+                break;
+            }
+            if(d == '\n'){
+                square = false;
+                break;
+            }
+        }
+        fileHandle.close();
+        
+        return square;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SensSpecCommand", "testFile");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************
 
 int SensSpecCommand::processPhylip(){
        try{
                //probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
-               string origCutoff = "";
+        square = testFile();
+               string origCutoff = "";
                bool getCutoff = 0;
                if(cutoff == -1.00)     {       getCutoff = 1;                                                                                                                  }
-               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }               
+               else                            {       origCutoff = toString(cutoff);  cutoff += (0.49 / double(precision));   }
                
                map<string, int> seqMap;
                string seqList;
@@ -254,7 +298,6 @@ int SensSpecCommand::processPhylip(){
                set<string> processedLabels;
                set<string> userLabels = labels;
                
-               
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
                        if(m->control_pressed){
@@ -450,6 +493,9 @@ int SensSpecCommand::process(map<string, int>& seqMap, string label, bool& getCu
                                        else                                                            {       trueNegatives++;        }
                                }
                        }
+            
+            if (square) { m->getline(phylipFile); } //get rest of line - redundant distances
+            m->gobble(phylipFile);
                }
                phylipFile.close();
                
@@ -684,6 +730,114 @@ void SensSpecCommand::outputStatistics(string label, string cutoff){
                exit(1);
        }
 }
+//***************************************************************************************************************
+
+string SensSpecCommand::preProcessList(){
+    try {
+        set<string> uniqueNames;
+        //get unique names from distance file
+        if (format == "phylip") {
+            
+            ifstream phylipFile;
+            m->openInputFile(distFile, phylipFile);
+            string numTest;
+            int pNumSeqs;
+                       phylipFile >> numTest; m->gobble(phylipFile);
+                       
+                       if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+            else {
+                m->mothurConvert(numTest, pNumSeqs);
+            }
+            
+            string seqName;
+            for(int i=0;i<pNumSeqs;i++){
+                if (m->control_pressed) { return ""; }
+                phylipFile >> seqName;  m->getline(phylipFile);  m->gobble(phylipFile);
+                uniqueNames.insert(seqName);
+            }
+            phylipFile.close();
+        }else {
+            ifstream columnFile;
+            m->openInputFile(distFile, columnFile);
+            string seqNameA, seqNameB;
+            double distance;
+            
+            while(columnFile){
+                if (m->control_pressed) { return ""; }
+                columnFile >> seqNameA >> seqNameB >> distance;
+                uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
+                m->gobble(columnFile);
+            }
+            columnFile.close();
+        }
+        
+        //read list file, if numSeqs > unique names then remove redundant names
+        string newListFile = listFile + ".temp";
+        ofstream out;
+        m->openOutputFile(newListFile, out);
+        ifstream in;
+               m->openInputFile(listFile, in);
+               
+               bool wroteSomething = false;
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
+            
+                       //read in list vector
+                       ListVector list(in);
+            
+            //listfile is already unique
+            if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
+                       
+                       //make a new list vector
+                       ListVector newList;
+                       newList.setLabel(list.getLabel());
+                       
+                       //for each bin
+                       for (int i = 0; i < list.getNumBins(); i++) {
+                
+                               //parse out names that are in accnos file
+                               string binnames = list.get(i);
+                vector<string> bnames;
+                m->splitAtComma(binnames, bnames);
+                               
+                               string newNames = "";
+                for (int j = 0; j < bnames.size(); j++) {
+                                       string name = bnames[j];
+                                       //if that name is in the .accnos file, add it
+                                       if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
+                               }
+                
+                               //if there are names in this bin add to new list
+                               if (newNames != "") { 
+                                       newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+                                       newList.push_back(newNames);    
+                               }
+                       }
+            
+                       //print new listvector
+                       if (newList.getNumBins() != 0) {
+                               wroteSomething = true;
+                               newList.print(out);
+                       }
+                       
+                       m->gobble(in);
+               }
+               in.close();     
+               out.close();
+
+        if (wroteSomething) { return newListFile; }
+        else { m->mothurRemove(newListFile); }
+        
+        return ""; 
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SensSpecCommand", "preProcessList");
+        exit(1);
+    }
+}
+
 
 //***************************************************************************************************************