]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyotucommand.cpp
fixes while testing
[mothur.git] / classifyotucommand.cpp
index 5e7e78744e766d778bb9d6ce04fe244735fc81a3..3469c82c0c1507604e87ea05efc30ca74927e09f 100644 (file)
 #include "classifyotucommand.h"
 #include "phylotree.h"
 
-
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getValidParameters(){       
+       try {
+               string AlignArray[] =  {"list","label","name","taxonomy","cutoff","probs","outputdir","inputdir"};
+               vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyOtuCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ClassifyOtuCommand::ClassifyOtuCommand(){      
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["constaxonomy"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getRequiredParameters(){    
+       try {
+               string Array[] =  {"list","taxonomy"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyOtuCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getRequiredFiles(){ 
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClassifyOtuCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
        try{
@@ -37,6 +84,10 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["constaxonomy"] = tempOutNames;
+               
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -45,7 +96,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("list");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                }
@@ -53,7 +104,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -61,7 +112,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("taxonomy");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
@@ -89,7 +140,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; allLines = 1;  }
                        else { 
-                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
                        
@@ -97,7 +148,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        convert(temp, cutoff); 
                        
                        temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
-                       probs = isTrue(temp);
+                       probs = m->isTrue(temp);
                        
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
@@ -114,12 +165,12 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
 
 void ClassifyOtuCommand::help(){
        try {
-               m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, and label.  The taxonomy and list parameters are required.\n");
+               m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, label and probs.  The taxonomy and list parameters are required.\n");
                m->mothurOut("The name parameter allows you add a names file with your taxonomy file.\n");
                m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
                m->mothurOut("The default value for label is all labels in your inputfile.\n");
-               m->mothurOut("The cutoff parameter allows you to specify a concensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
-               m->mothurOut("The probs parameter shuts off the outputting of the concensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
+               m->mothurOut("The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
+               m->mothurOut("The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
                m->mothurOut("The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n");
                m->mothurOut("Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
@@ -157,7 +208,7 @@ int ClassifyOtuCommand::execute(){
                set<string> processedLabels;
                set<string> userLabels = labels;
                
-               if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str());  }  return 0; }
+               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }  return 0; }
        
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
@@ -165,13 +216,13 @@ int ClassifyOtuCommand::execute(){
                        
                                        m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                        process(list);
-                                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
                        }
                        
-                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                                        string saveLabel = list->getLabel();
                                        
                                        delete list;
@@ -180,7 +231,7 @@ int ClassifyOtuCommand::execute(){
                                        process(list);
                                
                                        
-                                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
                                                                                
                                        processedLabels.insert(list->getLabel());
                                        userLabels.erase(list->getLabel());
@@ -216,12 +267,12 @@ int ClassifyOtuCommand::execute(){
                        process(list);
                        delete list;
                        
-                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
+                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } delete input; delete list; return 0; }
                }
                
                delete input;  
                                
-               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -241,16 +292,20 @@ int ClassifyOtuCommand::readNamesFile() {
        try {
                
                ifstream inNames;
-               openInputFile(namefile, inNames);
+               m->openInputFile(namefile, inNames);
                
                string name, names;
        
-               while(inNames){
+               while(!inNames.eof()){
                        inNames >> name;                        //read from first column  A
                        inNames >> names;               //read from second column  A,B,C,D
-                       gobble(inNames);
+                       m->gobble(inNames);
                        
-                       nameMap[name] = names;
+                       //parse names into vector
+                       vector<string> theseNames;
+                       m->splitAtComma(names, theseNames);
+
+                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
                        
                        if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
                }
@@ -268,13 +323,13 @@ int ClassifyOtuCommand::readTaxonomyFile() {
        try {
                
                ifstream in;
-               openInputFile(taxfile, in);
+               m->openInputFile(taxfile, in);
                
                string name, tax;
        
                while(!in.eof()){
                        in >> name >> tax;              
-                       gobble(in);
+                       m->gobble(in);
                        
                        //are there confidence scores, if so remove them
                        if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
@@ -293,7 +348,7 @@ int ClassifyOtuCommand::readTaxonomyFile() {
        }
 }
 //**********************************************************************************************************************
-string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList, int& size) {
+string ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size) {
        try{
                string conTax = "";
                vector<string> names;
@@ -302,48 +357,55 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
 
                //parse names into vector
                string binnames = thisList->get(bin);
-               splitAtComma(binnames, names);
+               m->splitAtComma(binnames, names);
 
                //create a tree containing sequences from this bin
                PhyloTree* phylo = new PhyloTree();
                
                size = 0;
                for (int i = 0; i < names.size(); i++) {
-                       
-                       if (m->control_pressed) { delete phylo; return conTax; }
-                       
-                       //is this sequence in the taxonomy file
-                       it = taxMap.find(names[i]);
-                       
-                       if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
-                               m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the concensus."); m->mothurOutEndLine();
-                       }else{
+       
+                       //if namesfile include the names
+                       if (namefile != "") {
+       
+                               //is this sequence in the name file - namemap maps seqName -> repSeqName
+                               it2 = nameMap.find(names[i]);
                                
-                               //if namesfile include the names
-                               if (namefile != "") {
-                                       //is this sequence in the name file
-                                       it2 = nameMap.find(names[i]);
+                               if (it2 == nameMap.end()) { //this name is not in name file, skip it
+                                       m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
+                                       
+                                       //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                                       it = taxMap.find(it2->second);
+                       
+                                       if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
                                        
-                                       if (it2 == nameMap.end()) { //this name is not in name file, skip it
-                                               m->mothurOut(names[i] + " is not in your name file.  I will not include it in the concensus."); m->mothurOutEndLine();
+                                               if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                               else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
                                        }else{
-                                               
-                                               vector<string> nameFileNames;
-                                               splitAtComma(it2->second, nameFileNames);
-                                               
-                                               for (int j = 0; j < nameFileNames.size(); j++) {
-                                                       //add seq to tree
-                                                       phylo->addSeqToTree(nameFileNames[j], it->second);
-                                                       size++;
-                                               }
+                               
+                                               //add seq to tree
+                                               phylo->addSeqToTree(names[i], it->second);
+                                               size++;
                                        }
-                                       
+                               }
+                               
+                       }else{
+                               //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                               it = taxMap.find(names[i]);
+               
+                               if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+                                       m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
                                }else{
                                        //add seq to tree
                                        phylo->addSeqToTree(names[i], it->second);
                                        size++;
                                }
                        }
+
+                       
+                       if (m->control_pressed) { delete phylo; return conTax; }
+                       
                }
                
                //build tree
@@ -371,11 +433,11 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                        }
                                
                        //is this taxonomy above cutoff
-                       int concensusConfidence = ceil((bestChildSize / (float) size) * 100);
+                       int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
                        
-                       if (concensusConfidence >= cutoff) { //if yes, add it
+                       if (consensusConfidence >= cutoff) { //if yes, add it
                                if (probs) {
-                                       conTax += bestChild.name + "(" + toString(concensusConfidence) + ");";
+                                       conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
                                }else{
                                        conTax += bestChild.name + ";";
                                }
@@ -388,7 +450,7 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                }
                
                                
-               if (conTax == "") {  conTax = "unclassified;";  }
+               if (conTax == "") {  conTax = "no_consensus;";  }
                
                delete phylo;   
                
@@ -396,7 +458,7 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                        
        }
        catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "findConcensusTaxonomy");
+               m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
                exit(1);
        }
 }
@@ -408,17 +470,20 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                int size;
                
                //create output file
-               if (outputDir == "") { outputDir += hasPath(listfile); }
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
                                
                ofstream out;
-               string outputFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
-               openOutputFile(outputFile, out);
-               outputNames.push_back(outputFile);
+               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
+               m->openOutputFile(outputFile, out);
+               outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
+               
+               out << "OTU\tSize\tTaxonomy" << endl;
                
                //for each bin in the list vector
                for (int i = 0; i < processList->getNumBins(); i++) {
-                       conTax  = findConcensusTaxonomy(i, processList, size);
-                       
+       
+                       conTax  = findConsensusTaxonomy(i, processList, size);
+               
                        if (m->control_pressed) { out.close();  return 0; }
                        
                        //output to new names file