X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=classifyotucommand.cpp;h=160928f3aa46906900ac5262f198112fb70fda36;hp=0ada1c8f77da53a27f02658184fa8fc6a0713e4b;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=79a7d3273749b08d4f9f8dfe350c964ff0c4351e diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index 0ada1c8..160928f 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -15,19 +15,19 @@ //********************************************************************************************************************** vector ClassifyOtuCommand::setParameters(){ try { - CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist); - CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy); - CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy); - CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); - CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppersample); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis); - CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff); - CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist); + CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy); + CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter ppersample("persample", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppersample); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "","",false,false); parameters.push_back(pbasis); + CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "","",false,true); parameters.push_back(pcutoff); + CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -42,7 +42,7 @@ vector ClassifyOtuCommand::setParameters(){ string ClassifyOtuCommand::getHelpString(){ try { string helpString = ""; - helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n"; + helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, persample, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n"; helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n"; helpString += "The name parameter allows you add a names file with your taxonomy file.\n"; helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n"; @@ -53,6 +53,7 @@ string ClassifyOtuCommand::getHelpString(){ helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n"; helpString += "6 is the number of otus containing sequences from groupA, 1 is the number of otus containing sequences from groupB, and 2 is the number of otus containing sequences from groupC.\n"; helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n"; + helpString += "The persample parameter allows you to find a consensus taxonomy for each group. Default=f\n"; helpString += "The default value for label is all labels in your inputfile.\n"; helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n"; helpString += "The probs parameter shuts off the outputting of the consensus confidence results. The default is true, meaning you want the confidence to be shown.\n"; @@ -67,25 +68,20 @@ string ClassifyOtuCommand::getHelpString(){ } } //********************************************************************************************************************** -string ClassifyOtuCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string ClassifyOtuCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "constaxonomy") { outputFileName = "cons.taxonomy"; } - else if (type == "taxsummary") { outputFileName = "cons.tax.summary"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "ClassifyOtuCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "constaxonomy") { pattern = "[filename],[distance],cons.taxonomy"; } + else if (type == "taxsummary") { pattern = "[filename],[distance],cons.tax.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ClassifyOtuCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** ClassifyOtuCommand::ClassifyOtuCommand(){ @@ -297,7 +293,7 @@ int ClassifyOtuCommand::execute(){ if (namefile != "") { m->readNames(namefile, nameMap, true); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); groups = groupMap->getNamesOfGroups(); } else { groupMap = NULL; } - if (countfile != "") { ct = new CountTable(); ct->readTable(countfile); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } } + if (countfile != "") { ct = new CountTable(); ct->readTable(countfile, true, false); if (ct->hasGroupInfo()) { groups = ct->getNamesOfGroups(); } } else { ct = NULL; } //read taxonomy file and save in map for easy access in building bin trees @@ -532,12 +528,15 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (outputDir == "") { outputDir += m->hasPath(listfile); } ofstream out; - string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy"); + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile)); + variables["[distance]"] = processList->getLabel(); + string outputFile = getOutputFileName("constaxonomy", variables); m->openOutputFile(outputFile, out); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream outSum; - string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary"); + string outputSumFile = getOutputFileName("taxsummary", variables); m->openOutputFile(outputSumFile, outSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); @@ -560,14 +559,15 @@ int ClassifyOtuCommand::process(ListVector* processList) { for (int i = 0; i < groups.size(); i++) { groupIndex[groups[i]] = i; ofstream* temp = new ofstream(); - string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("constaxonomy"); + variables["[distance]"] = processList->getLabel() + "." + groups[i]; + string outputFile = getOutputFileName("constaxonomy", variables); m->openOutputFile(outputFile, *temp); (*temp) << "OTU\tSize\tTaxonomy" << endl; outs.push_back(temp); outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile); ofstream* tempSum = new ofstream(); - string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + groups[i] + "." +getOutputFileNameTag("taxsummary"); + string outputSumFile = getOutputFileName("taxsummary", variables); m->openOutputFile(outputSumFile, *tempSum); outSums.push_back(tempSum); outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile); @@ -586,6 +586,7 @@ int ClassifyOtuCommand::process(ListVector* processList) { //for each bin in the list vector string snumBins = toString(processList->getNumBins()); + vector binLabels = processList->getLabels(); for (int i = 0; i < processList->getNumBins(); i++) { if (m->control_pressed) { break; } @@ -598,17 +599,8 @@ int ClassifyOtuCommand::process(ListVector* processList) { names = findConsensusTaxonomy(thisNames, size, conTax); if (m->control_pressed) { break; } - - //output to new names file - string binLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { binLabel += "0"; } - } - binLabel += sbinNumber; - out << binLabel << '\t' << size << '\t' << conTax << endl; + out << binLabels[i] << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax); @@ -616,9 +608,10 @@ int ClassifyOtuCommand::process(ListVector* processList) { //add this bins taxonomy to summary if (basis == "sequence") { for(int j = 0; j < names.size(); j++) { - int numReps = 1; - if (countfile != "") { numReps = ct->getNumSeqs(names[j]); } - for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); } + //int numReps = 1; + //if (countfile != "") { numReps = ct->getNumSeqs(names[j]); } + //for(int k = 0; k < numReps; k++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); } + taxaSum->addSeqToTree(names[j], noConfidenceConTax); } }else { //otu map containsGroup; @@ -682,16 +675,8 @@ int ClassifyOtuCommand::process(ListVector* processList) { if (m->control_pressed) { break; } - //output to new names file - string binLabel = "Otu"; - string sbinNumber = toString(i+1); - if (sbinNumber.length() < snumBins.length()) { - int diff = snumBins.length() - sbinNumber.length(); - for (int h = 0; h < diff; h++) { binLabel += "0"; } - } - binLabel += sbinNumber; - (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl; + (*outs[groupIndex[itParsed->first]]) << binLabels[i] << '\t' << size << '\t' << conTax << endl; string noConfidenceConTax = conTax; m->removeConfidences(noConfidenceConTax);