#include "classifyotucommand.h"
#include "phylotree.h"
-
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getValidParameters(){
+ try {
+ string AlignArray[] = {"list","label","name","taxonomy","cutoff","probs","outputdir","inputdir"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyOtuCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ClassifyOtuCommand::ClassifyOtuCommand(){
+ try {
+ abort = true;
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyOtuCommand", "ClassifyOtuCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getRequiredParameters(){
+ try {
+ string Array[] = {"list","taxonomy"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyOtuCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClassifyOtuCommand::getRequiredFiles(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClassifyOtuCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
ClassifyOtuCommand::ClassifyOtuCommand(string option) {
try{
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["constaxonomy"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
it = parameters.find("list");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["list"] = inputDir + it->second; }
}
it = parameters.find("name");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
it = parameters.find("taxonomy");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
label = validParameter.validFile(parameters, "label", false);
if (label == "not found") { label = ""; allLines = 1; }
else {
- if(label != "all") { splitAtDash(label, labels); allLines = 0; }
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
else { allLines = 1; }
}
convert(temp, cutoff);
temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
- probs = isTrue(temp);
+ probs = m->isTrue(temp);
if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
void ClassifyOtuCommand::help(){
try {
- m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, and label. The taxonomy and list parameters are required.\n");
+ m->mothurOut("The classify.otu command parameters are list, taxonomy, name, cutoff, label and probs. The taxonomy and list parameters are required.\n");
m->mothurOut("The name parameter allows you add a names file with your taxonomy file.\n");
m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
m->mothurOut("The default value for label is all labels in your inputfile.\n");
- m->mothurOut("The cutoff parameter allows you to specify a concensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
- m->mothurOut("The probs parameter shuts off the outputting of the concensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
+ m->mothurOut("The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n");
+ m->mothurOut("The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n");
m->mothurOut("The classify.otu command should be in the following format: classify.otu(taxonomy=yourTaxonomyFile, list=yourListFile, name=yourNamesFile, label=yourLabels).\n");
m->mothurOut("Example classify.otu(taxonomy=abrecovery.silva.full.taxonomy, list=abrecovery.fn.list, label=0.10).\n");
m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
set<string> processedLabels;
set<string> userLabels = labels;
- if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
process(list);
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
}
- if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
string saveLabel = list->getLabel();
delete list;
process(list);
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
process(list);
delete list;
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; }
}
delete input;
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
try {
ifstream inNames;
- openInputFile(namefile, inNames);
+ m->openInputFile(namefile, inNames);
string name, names;
- while(inNames){
+ while(!inNames.eof()){
inNames >> name; //read from first column A
inNames >> names; //read from second column A,B,C,D
- gobble(inNames);
+ m->gobble(inNames);
- nameMap[name] = names;
+ //parse names into vector
+ vector<string> theseNames;
+ m->splitAtComma(names, theseNames);
+
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; }
if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; }
}
try {
ifstream in;
- openInputFile(taxfile, in);
+ m->openInputFile(taxfile, in);
string name, tax;
while(!in.eof()){
in >> name >> tax;
- gobble(in);
+ m->gobble(in);
//are there confidence scores, if so remove them
if (tax.find_first_of('(') != -1) { removeConfidences(tax); }
}
}
//**********************************************************************************************************************
-string ClassifyOtuCommand::findConcensusTaxonomy(int bin, ListVector* thisList, int& size) {
+string ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size) {
try{
string conTax = "";
vector<string> names;
//parse names into vector
string binnames = thisList->get(bin);
- splitAtComma(binnames, names);
+ m->splitAtComma(binnames, names);
//create a tree containing sequences from this bin
PhyloTree* phylo = new PhyloTree();
size = 0;
for (int i = 0; i < names.size(); i++) {
-
- if (m->control_pressed) { delete phylo; return conTax; }
-
- //is this sequence in the taxonomy file
- it = taxMap.find(names[i]);
-
- if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
- m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the concensus."); m->mothurOutEndLine();
- }else{
+
+ //if namesfile include the names
+ if (namefile != "") {
+
+ //is this sequence in the name file - namemap maps seqName -> repSeqName
+ it2 = nameMap.find(names[i]);
- //if namesfile include the names
- if (namefile != "") {
- //is this sequence in the name file
- it2 = nameMap.find(names[i]);
+ if (it2 == nameMap.end()) { //this name is not in name file, skip it
+ m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
+ }else{
+
+ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+ it = taxMap.find(it2->second);
+
+ if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
- if (it2 == nameMap.end()) { //this name is not in name file, skip it
- m->mothurOut(names[i] + " is not in your name file. I will not include it in the concensus."); m->mothurOutEndLine();
+ if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
+ else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
}else{
-
- vector<string> nameFileNames;
- splitAtComma(it2->second, nameFileNames);
-
- for (int j = 0; j < nameFileNames.size(); j++) {
- //add seq to tree
- phylo->addSeqToTree(nameFileNames[j], it->second);
- size++;
- }
+
+ //add seq to tree
+ phylo->addSeqToTree(names[i], it->second);
+ size++;
}
-
+ }
+
+ }else{
+ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+ it = taxMap.find(names[i]);
+
+ if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+ m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
}else{
//add seq to tree
phylo->addSeqToTree(names[i], it->second);
size++;
}
}
+
+
+ if (m->control_pressed) { delete phylo; return conTax; }
+
}
//build tree
}
//is this taxonomy above cutoff
- int concensusConfidence = ceil((bestChildSize / (float) size) * 100);
+ int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
- if (concensusConfidence >= cutoff) { //if yes, add it
+ if (consensusConfidence >= cutoff) { //if yes, add it
if (probs) {
- conTax += bestChild.name + "(" + toString(concensusConfidence) + ");";
+ conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
}else{
conTax += bestChild.name + ";";
}
}
- if (conTax == "") { conTax = "unclassified;"; }
+ if (conTax == "") { conTax = "no_consensus;"; }
delete phylo;
}
catch(exception& e) {
- m->errorOut(e, "ClassifyOtuCommand", "findConcensusTaxonomy");
+ m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
exit(1);
}
}
int size;
//create output file
- if (outputDir == "") { outputDir += hasPath(listfile); }
+ if (outputDir == "") { outputDir += m->hasPath(listfile); }
ofstream out;
- string outputFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
- openOutputFile(outputFile, out);
- outputNames.push_back(outputFile);
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy";
+ m->openOutputFile(outputFile, out);
+ outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
+
+ out << "OTU\tSize\tTaxonomy" << endl;
//for each bin in the list vector
for (int i = 0; i < processList->getNumBins(); i++) {
- conTax = findConcensusTaxonomy(i, processList, size);
-
+
+ conTax = findConsensusTaxonomy(i, processList, size);
+
if (m->control_pressed) { out.close(); return 0; }
//output to new names file