]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyotucommand.cpp
paralellized rarefaction.single
[mothur.git] / classifyotucommand.cpp
index 5e7e78744e766d778bb9d6ce04fe244735fc81a3..d27a88141534c7bf9b8115eaccb55d7966096753 100644 (file)
@@ -45,7 +45,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("list");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                }
@@ -53,7 +53,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -61,7 +61,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                it = parameters.find("taxonomy");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
@@ -89,7 +89,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; allLines = 1;  }
                        else { 
-                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
                        
@@ -97,7 +97,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        convert(temp, cutoff); 
                        
                        temp = validParameter.validFile(parameters, "probs", false);                                    if (temp == "not found"){       temp = "true";                  }
-                       probs = isTrue(temp);
+                       probs = m->isTrue(temp);
                        
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
@@ -114,12 +114,12 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
 
 void ClassifyOtuCommand::help(){
        try {
-               m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, and label.  The taxonomy and list parameters are required.\n");
+               m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, label and probs.  The taxonomy and list parameters are required.\n");
                m->mothurOut("The name parameter allows you add a names file with your taxonomy file.\n");
                m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
                m->mothurOut("The default value for label is all labels in your inputfile.\n");
-               m->mothurOut("The cutoff parameter allows you to specify a concensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
-               m->mothurOut("The probs parameter shuts off the outputting of the concensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
+               m->mothurOut("The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
+               m->mothurOut("The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
                m->mothurOut("The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n");
                m->mothurOut("Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
@@ -171,7 +171,7 @@ int ClassifyOtuCommand::execute(){
                                        userLabels.erase(list->getLabel());
                        }
                        
-                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                                        string saveLabel = list->getLabel();
                                        
                                        delete list;
@@ -241,16 +241,20 @@ int ClassifyOtuCommand::readNamesFile() {
        try {
                
                ifstream inNames;
-               openInputFile(namefile, inNames);
+               m->openInputFile(namefile, inNames);
                
                string name, names;
        
-               while(inNames){
+               while(!inNames.eof()){
                        inNames >> name;                        //read from first column  A
                        inNames >> names;               //read from second column  A,B,C,D
-                       gobble(inNames);
+                       m->gobble(inNames);
                        
-                       nameMap[name] = names;
+                       //parse names into vector
+                       vector<string> theseNames;
+                       m->splitAtComma(names, theseNames);
+
+                       for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = name;  }
                        
                        if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
                }
@@ -268,13 +272,13 @@ int ClassifyOtuCommand::readTaxonomyFile() {
        try {
                
                ifstream in;
-               openInputFile(taxfile, in);
+               m->openInputFile(taxfile, in);
                
                string name, tax;
        
                while(!in.eof()){
                        in >> name >> tax;              
-                       gobble(in);
+                       m->gobble(in);
                        
                        //are there confidence scores, if so remove them
                        if (tax.find_first_of('(') != -1) {  removeConfidences(tax);    }
@@ -293,7 +297,7 @@ int ClassifyOtuCommand::readTaxonomyFile() {
        }
 }
 //**********************************************************************************************************************
-string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList, int& size) {
+string ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size) {
        try{
                string conTax = "";
                vector<string> names;
@@ -302,48 +306,55 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
 
                //parse names into vector
                string binnames = thisList->get(bin);
-               splitAtComma(binnames, names);
+               m->splitAtComma(binnames, names);
 
                //create a tree containing sequences from this bin
                PhyloTree* phylo = new PhyloTree();
                
                size = 0;
                for (int i = 0; i < names.size(); i++) {
-                       
-                       if (m->control_pressed) { delete phylo; return conTax; }
-                       
-                       //is this sequence in the taxonomy file
-                       it = taxMap.find(names[i]);
-                       
-                       if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
-                               m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the concensus."); m->mothurOutEndLine();
-                       }else{
+       
+                       //if namesfile include the names
+                       if (namefile != "") {
+       
+                               //is this sequence in the name file - namemap maps seqName -> repSeqName
+                               it2 = nameMap.find(names[i]);
                                
-                               //if namesfile include the names
-                               if (namefile != "") {
-                                       //is this sequence in the name file
-                                       it2 = nameMap.find(names[i]);
+                               if (it2 == nameMap.end()) { //this name is not in name file, skip it
+                                       m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
+                               }else{
                                        
-                                       if (it2 == nameMap.end()) { //this name is not in name file, skip it
-                                               m->mothurOut(names[i] + " is not in your name file.  I will not include it in the concensus."); m->mothurOutEndLine();
+                                       //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                                       it = taxMap.find(it2->second);
+                       
+                                       if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+                                       
+                                               if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
+                                               else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
                                        }else{
-                                               
-                                               vector<string> nameFileNames;
-                                               splitAtComma(it2->second, nameFileNames);
-                                               
-                                               for (int j = 0; j < nameFileNames.size(); j++) {
-                                                       //add seq to tree
-                                                       phylo->addSeqToTree(nameFileNames[j], it->second);
-                                                       size++;
-                                               }
+                               
+                                               //add seq to tree
+                                               phylo->addSeqToTree(names[i], it->second);
+                                               size++;
                                        }
-                                       
+                               }
+                               
+                       }else{
+                               //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+                               it = taxMap.find(names[i]);
+               
+                               if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+                                       m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
                                }else{
                                        //add seq to tree
                                        phylo->addSeqToTree(names[i], it->second);
                                        size++;
                                }
                        }
+
+                       
+                       if (m->control_pressed) { delete phylo; return conTax; }
+                       
                }
                
                //build tree
@@ -371,11 +382,11 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                        }
                                
                        //is this taxonomy above cutoff
-                       int concensusConfidence = ceil((bestChildSize / (float) size) * 100);
+                       int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
                        
-                       if (concensusConfidence >= cutoff) { //if yes, add it
+                       if (consensusConfidence >= cutoff) { //if yes, add it
                                if (probs) {
-                                       conTax += bestChild.name + "(" + toString(concensusConfidence) + ");";
+                                       conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
                                }else{
                                        conTax += bestChild.name + ";";
                                }
@@ -388,7 +399,7 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                }
                
                                
-               if (conTax == "") {  conTax = "unclassified;";  }
+               if (conTax == "") {  conTax = "no_consensus;";  }
                
                delete phylo;   
                
@@ -396,7 +407,7 @@ string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList,
                        
        }
        catch(exception& e) {
-               m->errorOut(e, "ClassifyOtuCommand", "findConcensusTaxonomy");
+               m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
                exit(1);
        }
 }
@@ -408,17 +419,18 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                int size;
                
                //create output file
-               if (outputDir == "") { outputDir += hasPath(listfile); }
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
                                
                ofstream out;
-               string outputFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
-               openOutputFile(outputFile, out);
+               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
+               m->openOutputFile(outputFile, out);
                outputNames.push_back(outputFile);
                
                //for each bin in the list vector
                for (int i = 0; i < processList->getNumBins(); i++) {
-                       conTax  = findConcensusTaxonomy(i, processList, size);
-                       
+       
+                       conTax  = findConsensusTaxonomy(i, processList, size);
+               
                        if (m->control_pressed) { out.close();  return 0; }
                        
                        //output to new names file