]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyotucommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / classifyotucommand.cpp
index d065b3a37c5ac41f9c6c3b59eb9ca3832f8697a9..00ae690214177d94020069986aedbbb0cb118b9e 100644 (file)
@@ -63,6 +63,27 @@ string ClassifyOtuCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){     
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "constaxonomy") {  outputFileName =  "cons.taxonomy"; }
+            else if (type == "taxsummary") {  outputFileName =  "cons.tax.summary"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 ClassifyOtuCommand::ClassifyOtuCommand(){      
        try {
                abort = true; calledHelp = true; 
@@ -182,7 +203,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        else if (refTaxonomy == "not open") { abort = true; }
        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }   
+                       if (namefile == "not open") { namefile = ""; abort = true; }    
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -206,7 +227,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); }
                        
                        string temp = validParameter.validFile(parameters, "cutoff", false);                    if (temp == "not found") { temp = "51"; }
-                       convert(temp, cutoff); 
+                       m->mothurConvert(temp, cutoff); 
                        
                        temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
                        probs = m->isTrue(temp);
@@ -214,6 +235,11 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
                        
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
@@ -229,10 +255,10 @@ int ClassifyOtuCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                //if user gave a namesfile then use it
-               if (namefile != "") {   readNamesFile();        }
+               if (namefile != "") {   m->readNames(namefile, nameMap, true);  }
                
                //read taxonomy file and save in map for easy access in building bin trees
-               readTaxonomyFile();
+               m->readTax(taxfile, taxMap);
                
                if (m->control_pressed) { return 0; }
                
@@ -244,7 +270,7 @@ int ClassifyOtuCommand::execute(){
                set<string> processedLabels;
                set<string> userLabels = labels;
                
-               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0; }
+               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
        
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
@@ -252,7 +278,7 @@ int ClassifyOtuCommand::execute(){
                        
                                        m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        process(list);
-                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
@@ -267,7 +293,7 @@ int ClassifyOtuCommand::execute(){
                                        process(list);
                                
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
@@ -303,12 +329,12 @@ int ClassifyOtuCommand::execute(){
                        process(list);
                        delete list;
                        
-                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                }
                
                delete input;  
                                
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -322,67 +348,6 @@ int ClassifyOtuCommand::execute(){
                exit(1);
        }
 }
-
-//**********************************************************************************************************************
-int ClassifyOtuCommand::readNamesFile() {
-       try {
-               
-               ifstream inNames;
-               m->openInputFile(namefile, inNames);
-               
-               string name, names;
-       
-               while(!inNames.eof()){
-                       inNames >> name;                        //read from first column  A
-                       inNames >> names;               //read from second column  A,B,C,D
-                       m->gobble(inNames);
-                       
-                       //parse names into vector
-                       vector<string> theseNames;
-                       m->splitAtComma(names, theseNames);
-
-                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
-                       
-                       if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
-               }
-               inNames.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "readNamesFile");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int ClassifyOtuCommand::readTaxonomyFile() {
-       try {
-               
-               ifstream in;
-               m->openInputFile(taxfile, in);
-               
-               string name, tax;
-       
-               while(!in.eof()){
-                       in >> name >> tax;              
-                       m->gobble(in);
-                       
-                       //are there confidence scores, if so remove them
-                       if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
-                       
-                       taxMap[name] = tax;
-                       
-                       if (m->control_pressed) { in.close(); taxMap.clear(); return 0; }
-               }
-               in.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile");
-               exit(1);
-       }
-}
 //**********************************************************************************************************************
 vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) {
        try{
@@ -451,7 +416,7 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                phylo->assignHeirarchyIDs(0);
                
                TaxNode currentNode = phylo->get(0);
-               
+               int myLevel = 0;        
                //at each level
                while (currentNode.children.size() != 0) { //you still have more to explore
                
@@ -470,6 +435,9 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }
                                
                        }
+            
+            //phylotree adds an extra unknown so we want to remove that
+            if (bestChild.name == "unknown") { bestChildSize--; }
                                
                        //is this taxonomy above cutoff
                        int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
@@ -480,6 +448,7 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                }else{
                                        conTax += bestChild.name + ";";
                                }
+                               myLevel++;
                        }else{ //if no, quit
                                break;
                        }
@@ -488,7 +457,12 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                        currentNode = bestChild;
                }
                
-                               
+               if (myLevel != phylo->getMaxLevel()) {
+                       while (myLevel != phylo->getMaxLevel()) {
+                               conTax += "unclassified;";
+                               myLevel++;
+                       }
+               }               
                if (conTax == "") {  conTax = "no_consensus;";  }
                
                delete phylo;   
@@ -512,12 +486,12 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                if (outputDir == "") { outputDir += m->hasPath(listfile); }
                                
                ofstream out;
-               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
+               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy");
                m->openOutputFile(outputFile, out);
                outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
                
                ofstream outSum;
-               string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary";
+               string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary");
                m->openOutputFile(outputSumFile, outSum);
                outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
                
@@ -530,7 +504,9 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        taxaSum = new PhyloSummary(groupfile);
                }
                
+
                //for each bin in the list vector
+        string snumBins = toString(processList->getNumBins());
                for (int i = 0; i < processList->getNumBins(); i++) {
                        
                        if (m->control_pressed) { break; }
@@ -541,10 +517,18 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        if (m->control_pressed) { out.close();  return 0; }
                        
                        //output to new names file
-                       out << (i+1) << '\t' << size << '\t' << conTax << endl;
+            string binLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { binLabel += "0"; }
+            }
+            binLabel += sbinNumber;
+
+                       out << binLabel << '\t' << size << '\t' << conTax << endl;
                        
                        string noConfidenceConTax = conTax;
-                       removeConfidences(noConfidenceConTax);
+                       m->removeConfidences(noConfidenceConTax);
                        
                        //add this bins taxonomy to summary
                        if (basis == "sequence") {
@@ -571,31 +555,30 @@ int ClassifyOtuCommand::process(ListVector* processList) {
        }
 }
 /**************************************************************************************************/
-void ClassifyOtuCommand::removeConfidences(string& tax) {
-       try {
-               
-               string taxon;
-               string newTax = "";
+string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) {
+       try{
+               string newTax, taxon;
+               int level = 0;
                
+               //keep what you have counting the levels
                while (tax.find_first_of(';') != -1) {
                        //get taxon
-                       taxon = tax.substr(0,tax.find_first_of(';'));
-                       
-                       int pos = taxon.find_first_of('(');
-                       if (pos != -1) {
-                               taxon = taxon.substr(0, pos); //rip off confidence 
-                       }
-                       
-                       taxon += ";";
-                       
+                       taxon = tax.substr(0,tax.find_first_of(';'))+';';
                        tax = tax.substr(tax.find_first_of(';')+1, tax.length());
                        newTax += taxon;
+                       level++;
+               }
+               
+               //add "unclassified" until you reach maxLevel
+               while (level < maxlevel) {
+                       newTax += "unclassified;";
+                       level++;
                }
                
-               tax = newTax;
+               return newTax;
        }
        catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "removeConfidences");
+               m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds");
                exit(1);
        }
 }