X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyotucommand.cpp;h=00ae690214177d94020069986aedbbb0cb118b9e;hb=2ecee16fec29d4c525f740ec19b27962ca09c050;hp=d065b3a37c5ac41f9c6c3b59eb9ca3832f8697a9;hpb=220dc345e493cddc569521111ce32ac4d965ab7f;p=mothur.git diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index d065b3a..00ae690 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -63,6 +63,27 @@ string ClassifyOtuCommand::getHelpString(){ } } //********************************************************************************************************************** +string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "constaxonomy") { outputFileName = "cons.taxonomy"; } + else if (type == "taxsummary") { outputFileName = "cons.tax.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ClassifyOtuCommand::ClassifyOtuCommand(){ try { abort = true; calledHelp = true; @@ -182,7 +203,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option) { else if (refTaxonomy == "not open") { abort = true; } namefile = validParameter.validFile(parameters, "name", true); - if (namefile == "not open") { abort = true; } + if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } @@ -206,7 +227,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option) { if ((basis != "otu") && (basis != "sequence")) { m->mothurOut("Invalid option for basis. basis options are otu and sequence, using otu."); m->mothurOutEndLine(); } string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; } - convert(temp, cutoff); + m->mothurConvert(temp, cutoff); temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; } probs = m->isTrue(temp); @@ -214,6 +235,11 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option) { if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; } + if (namefile == ""){ + vector files; files.push_back(taxfile); + parser.getNameFile(files); + } + } } catch(exception& e) { @@ -229,10 +255,10 @@ int ClassifyOtuCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } //if user gave a namesfile then use it - if (namefile != "") { readNamesFile(); } + if (namefile != "") { m->readNames(namefile, nameMap, true); } //read taxonomy file and save in map for easy access in building bin trees - readTaxonomyFile(); + m->readTax(taxfile, taxMap); if (m->control_pressed) { return 0; } @@ -244,7 +270,7 @@ int ClassifyOtuCommand::execute(){ set processedLabels; set userLabels = labels; - if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { @@ -252,7 +278,7 @@ int ClassifyOtuCommand::execute(){ m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine(); process(list); - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; } processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); @@ -267,7 +293,7 @@ int ClassifyOtuCommand::execute(){ process(list); - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; } processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); @@ -303,12 +329,12 @@ int ClassifyOtuCommand::execute(){ process(list); delete list; - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete input; delete list; return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; } } delete input; - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -322,67 +348,6 @@ int ClassifyOtuCommand::execute(){ exit(1); } } - -//********************************************************************************************************************** -int ClassifyOtuCommand::readNamesFile() { - try { - - ifstream inNames; - m->openInputFile(namefile, inNames); - - string name, names; - - while(!inNames.eof()){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - m->gobble(inNames); - - //parse names into vector - vector theseNames; - m->splitAtComma(names, theseNames); - - for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; } - - if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; } - } - inNames.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "readNamesFile"); - exit(1); - } -} -//********************************************************************************************************************** -int ClassifyOtuCommand::readTaxonomyFile() { - try { - - ifstream in; - m->openInputFile(taxfile, in); - - string name, tax; - - while(!in.eof()){ - in >> name >> tax; - m->gobble(in); - - //are there confidence scores, if so remove them - if (tax.find_first_of('(') != -1) { removeConfidences(tax); } - - taxMap[name] = tax; - - if (m->control_pressed) { in.close(); taxMap.clear(); return 0; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile"); - exit(1); - } -} //********************************************************************************************************************** vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) { try{ @@ -451,7 +416,7 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th phylo->assignHeirarchyIDs(0); TaxNode currentNode = phylo->get(0); - + int myLevel = 0; //at each level while (currentNode.children.size() != 0) { //you still have more to explore @@ -470,6 +435,9 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th } } + + //phylotree adds an extra unknown so we want to remove that + if (bestChild.name == "unknown") { bestChildSize--; } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); @@ -480,6 +448,7 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th }else{ conTax += bestChild.name + ";"; } + myLevel++; }else{ //if no, quit break; } @@ -488,7 +457,12 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th currentNode = bestChild; } - + if (myLevel != phylo->getMaxLevel()) { + while (myLevel != phylo->getMaxLevel()) { + conTax += "unclassified;"; + myLevel++; + } + } if (conTax == "") { conTax = "no_consensus;"; } delete phylo; @@ -512,12 +486,12 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (outputDir == "") { outputDir += m->hasPath(listfile); } ofstream out; - string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy"); m->openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream outSum; - string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary"; + string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary"); m->openOutputFile(outputSumFile, outSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); @@ -530,7 +504,9 @@ int ClassifyOtuCommand::process(ListVector* processList) { taxaSum = new PhyloSummary(groupfile); } + //for each bin in the list vector + string snumBins = toString(processList->getNumBins()); for (int i = 0; i < processList->getNumBins(); i++) { if (m->control_pressed) { break; } @@ -541,10 +517,18 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (m->control_pressed) { out.close(); return 0; } //output to new names file - out << (i+1) << '\t' << size << '\t' << conTax << endl; + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + + out << binLabel << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; - removeConfidences(noConfidenceConTax); + m->removeConfidences(noConfidenceConTax); //add this bins taxonomy to summary if (basis == "sequence") { @@ -571,31 +555,30 @@ int ClassifyOtuCommand::process(ListVector* processList) { } } /**************************************************************************************************/ -void ClassifyOtuCommand::removeConfidences(string& tax) { - try { - - string taxon; - string newTax = ""; +string ClassifyOtuCommand::addUnclassifieds(string tax, int maxlevel) { + try{ + string newTax, taxon; + int level = 0; + //keep what you have counting the levels while (tax.find_first_of(';') != -1) { //get taxon - taxon = tax.substr(0,tax.find_first_of(';')); - - int pos = taxon.find_first_of('('); - if (pos != -1) { - taxon = taxon.substr(0, pos); //rip off confidence - } - - taxon += ";"; - + taxon = tax.substr(0,tax.find_first_of(';'))+';'; tax = tax.substr(tax.find_first_of(';')+1, tax.length()); newTax += taxon; + level++; + } + + //add "unclassified" until you reach maxLevel + while (level < maxlevel) { + newTax += "unclassified;"; + level++; } - tax = newTax; + return newTax; } catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "removeConfidences"); + m->errorOut(e, "ClassifyOtuCommand", "addUnclassifieds"); exit(1); } }