X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyotucommand.cpp;h=00ae690214177d94020069986aedbbb0cb118b9e;hb=6ede3bf5c0a9eedb23f24577a97da81ab3e1f7df;hp=c8896375b016ef6c45462351a18e8e520bfe75c2;hpb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;p=mothur.git diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index c889637..00ae690 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -63,6 +63,27 @@ string ClassifyOtuCommand::getHelpString(){ } } //********************************************************************************************************************** +string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "constaxonomy") { outputFileName = "cons.taxonomy"; } + else if (type == "taxsummary") { outputFileName = "cons.tax.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ClassifyOtuCommand::ClassifyOtuCommand(){ try { abort = true; calledHelp = true; @@ -234,10 +255,10 @@ int ClassifyOtuCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } //if user gave a namesfile then use it - if (namefile != "") { readNamesFile(); } + if (namefile != "") { m->readNames(namefile, nameMap, true); } //read taxonomy file and save in map for easy access in building bin trees - readTaxonomyFile(); + m->readTax(taxfile, taxMap); if (m->control_pressed) { return 0; } @@ -327,67 +348,6 @@ int ClassifyOtuCommand::execute(){ exit(1); } } - -//********************************************************************************************************************** -int ClassifyOtuCommand::readNamesFile() { - try { - - ifstream inNames; - m->openInputFile(namefile, inNames); - - string name, names; - - while(!inNames.eof()){ - inNames >> name; //read from first column A - inNames >> names; //read from second column A,B,C,D - m->gobble(inNames); - - //parse names into vector - vector theseNames; - m->splitAtComma(names, theseNames); - - for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = name; } - - if (m->control_pressed) { inNames.close(); nameMap.clear(); return 0; } - } - inNames.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "readNamesFile"); - exit(1); - } -} -//********************************************************************************************************************** -int ClassifyOtuCommand::readTaxonomyFile() { - try { - - ifstream in; - m->openInputFile(taxfile, in); - - string name, tax; - - while(!in.eof()){ - in >> name >> tax; - m->gobble(in); - - //are there confidence scores, if so remove them - if (tax.find_first_of('(') != -1) { m->removeConfidences(tax); } - - taxMap[name] = tax; - - if (m->control_pressed) { in.close(); taxMap.clear(); return 0; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "readTaxonomyFile"); - exit(1); - } -} //********************************************************************************************************************** vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList, int& size, string& conTax) { try{ @@ -475,6 +435,9 @@ vector ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th } } + + //phylotree adds an extra unknown so we want to remove that + if (bestChild.name == "unknown") { bestChildSize--; } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); @@ -523,12 +486,12 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (outputDir == "") { outputDir += m->hasPath(listfile); } ofstream out; - string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.taxonomy"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy"); m->openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream outSum; - string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".cons.tax.summary"; + string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary"); m->openOutputFile(outputSumFile, outSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); @@ -541,7 +504,9 @@ int ClassifyOtuCommand::process(ListVector* processList) { taxaSum = new PhyloSummary(groupfile); } + //for each bin in the list vector + string snumBins = toString(processList->getNumBins()); for (int i = 0; i < processList->getNumBins(); i++) { if (m->control_pressed) { break; } @@ -552,7 +517,15 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (m->control_pressed) { out.close(); return 0; } //output to new names file - out << (i+1) << '\t' << size << '\t' << conTax << endl; + string binLabel = "Otu"; + string sbinNumber = toString(i+1); + if (sbinNumber.length() < snumBins.length()) { + int diff = snumBins.length() - sbinNumber.length(); + for (int h = 0; h < diff; h++) { binLabel += "0"; } + } + binLabel += sbinNumber; + + out << binLabel << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax);