]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyotucommand.cpp
fixed bug with dist.shared subsampling. added mode parameter to dist.shared so...
[mothur.git] / classifyotucommand.cpp
index d065b3a37c5ac41f9c6c3b59eb9ca3832f8697a9..ef41ee1bab8853f6741938005f775076b60723ab 100644 (file)
@@ -182,7 +182,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        else if (refTaxonomy == "not open") { abort = true; }
        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }   
+                       if (namefile == "not open") { namefile = ""; abort = true; }    
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -206,7 +206,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
                        
                        string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
-                       convert(temp, cutoff); 
+                       m->mothurConvert(temp, cutoff); 
                        
                        temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
                        probs = m->isTrue(temp);
@@ -214,6 +214,11 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
                        
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
@@ -244,7 +249,7 @@ int ClassifyOtuCommand::execute(){
                set<string> processedLabels;
                set<string> userLabels = labels;
                
-               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0; }
+               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
        
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
@@ -252,7 +257,7 @@ int ClassifyOtuCommand::execute(){
                        
                                        m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        process(list);
-                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
@@ -267,7 +272,7 @@ int ClassifyOtuCommand::execute(){
                                        process(list);
                                
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
@@ -303,12 +308,12 @@ int ClassifyOtuCommand::execute(){
                        process(list);
                        delete list;
                        
-                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                }
                
                delete input;  
                                
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -368,7 +373,7 @@ int ClassifyOtuCommand::readTaxonomyFile() {
                        m->gobble(in);
                        
                        //are there confidence scores, if so remove them
-                       if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
+                       if (tax.find_first_of('(') != -1) {  m->removeConfidences(tax); }
                        
                        taxMap[name] = tax;
                        
@@ -451,7 +456,7 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                phylo->assignHeirarchyIDs(0);
                
                TaxNode currentNode = phylo->get(0);
-               
+               int myLevel = 0;        
                //at each level
                while (currentNode.children.size() != 0) { //you still have more to explore
                
@@ -470,6 +475,9 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }
                                
                        }
+            
+            //phylotree adds an extra unknown so we want to remove that
+            if (bestChild.name == "unknown") { bestChildSize--; }
                                
                        //is this taxonomy above cutoff
                        int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
@@ -480,6 +488,7 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }else{
                                        conTax += bestChild.name + ";";
                                }
+                               myLevel++;
                        }else{ //if no, quit
                                break;
                        }
@@ -488,7 +497,12 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                        currentNode = bestChild;
                }
                
-                               
+               if (myLevel != phylo->getMaxLevel()) {
+                       while (myLevel != phylo->getMaxLevel()) {
+                               conTax += "unclassified;";
+                               myLevel++;
+                       }
+               }               
                if (conTax == "") {  conTax = "no_consensus;";  }
                
                delete phylo;   
@@ -530,7 +544,9 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        taxaSum = new PhyloSummary(groupfile);
                }
                
+
                //for each bin in the list vector
+        string snumBins = toString(processList->getNumBins());
                for (int i = 0; i < processList->getNumBins(); i++) {
                        
                        if (m->control_pressed) { break; }
@@ -541,10 +557,18 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        if (m->control_pressed) { out.close();  return 0; }
                        
                        //output to new names file
-                       out << (i+1) << '\t' << size << '\t' << conTax << endl;
+            string binLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { binLabel += "0"; }
+            }
+            binLabel += sbinNumber;
+
+                       out << binLabel << '\t' << size << '\t' << conTax << endl;
                        
                        string noConfidenceConTax = conTax;
-                       removeConfidences(noConfidenceConTax);
+                       m->removeConfidences(noConfidenceConTax);
                        
                        //add this bins taxonomy to summary
                        if (basis == "sequence") {
@@ -571,31 +595,30 @@ int ClassifyOtuCommand::process(ListVector* processList) {
        }
 }
 /**************************************************************************************************/
-void ClassifyOtuCommand::removeConfidences(string& tax) {
-       try {
-               
-               string taxon;
-               string newTax = "";
+string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
+       try{
+               string newTax, taxon;
+               int level = 0;
                
+               //keep what you have counting the levels
                while (tax.find_first_of(';') != -1) {
                        //get taxon
-                       taxon = tax.substr(0,tax.find_first_of(';'));
-                       
-                       int pos = taxon.find_first_of('(');
-                       if (pos != -1) {
-                               taxon = taxon.substr(0, pos); //rip off confidence 
-                       }
-                       
-                       taxon += ";";
-                       
+                       taxon = tax.substr(0,tax.find_first_of(';'))+';';
                        tax = tax.substr(tax.find_first_of(';')+1, tax.length());
                        newTax += taxon;
+                       level++;
+               }
+               
+               //add "unclassified" until you reach maxLevel
+               while (level < maxlevel) {
+                       newTax += "unclassified;";
+                       level++;
                }
                
-               tax = newTax;
+               return newTax;
        }
        catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "removeConfidences");
+               m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
                exit(1);
        }
 }