From 9a808b553f14a25f9c5e94da92a7077a57054e8e Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 25 Feb 2014 11:57:05 -0500 Subject: [PATCH] =?utf8?q?fixed=20bug=20in=20make.biom=20with=20picrust=20?= =?utf8?q?that=20didn=E2=80=99t=20allow=20for=20unclassified=20OTUs.=20fix?= =?utf8?q?ed=20bug=20in=20fastq.info=20that=20quit=20after=20100000=20seqs?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- makebiomcommand.cpp | 49 ++++++++++++++++++++++++++++++------------ parsefastaqcommand.cpp | 1 - 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/makebiomcommand.cpp b/makebiomcommand.cpp index 4c35092..36bda15 100644 --- a/makebiomcommand.cpp +++ b/makebiomcommand.cpp @@ -680,8 +680,17 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector& lookup, ma for (map::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy if (m->control_pressed) { break; } + string OTUTaxonomy = it->second; + + //remove confidences + m->removeConfidences(OTUTaxonomy); + + //remove unclassifieds to match template + int thisPos = OTUTaxonomy.find("unclassified;"); + if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); } + //get list of reference ids that map to this taxonomy - vector referenceIds = phylo.getSeqs(it->second); + vector referenceIds = phylo.getSeqs(OTUTaxonomy); if (m->control_pressed) { break; } @@ -724,6 +733,7 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector& lookup, ma map newLabelTaxMap; //loop through ggOTUID list combining mothur otus and adjusting labels //ggOTUIDs = 16097 -> + for (map >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) { if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } @@ -737,14 +747,18 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector& lookup, ma vector abunds; abunds.resize(lookup.size(), 0); string mergeString = ""; vector boots; boots.resize(scores.size(), 0); + bool scoresNULL = false; for (int j = 0; j < itMap->second.size(); j++) { // - //merge bootstrap scores - vector scores; - vector taxonomies = parseTax(it->second, scores); - for (int i = 0; i < boots.size(); i++) { - float tempScore; m->mothurConvert(scores[i], tempScore); - boots[i] += tempScore; - } + + if (scores[0] != "null") { + //merge bootstrap scores + vector scores; + vector taxonomies = parseTax(it->second, scores); + for (int i = 0; i < boots.size(); i++) { + float tempScore; m->mothurConvert(scores[i], tempScore); + boots[i] += tempScore; + } + }else { scoresNULL = true; } //merge abunds mergeString += (itMap->second)[j] + " "; @@ -757,14 +771,21 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector& lookup, ma //average scores //add merged otu to new lookup - for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); } - - //assemble new taxomoy string newTaxString = ""; - for (int j = 0; j < boots.size(); j++) { - newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");"; + if (!scoresNULL) { + for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); } + + //assemble new taxomoy + for (int j = 0; j < boots.size(); j++) { + newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");"; + } + }else { + //assemble new taxomoy + for (int j = 0; j < taxonomies.size(); j++) { + newTaxString += taxonomies[j] + ";"; + } } - + //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria //find taxonomy of this otu newLabelTaxMap[itMap->first] = newTaxString; diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp index c3bd393..a509ae9 100644 --- a/parsefastaqcommand.cpp +++ b/parsefastaqcommand.cpp @@ -308,7 +308,6 @@ int ParseFastaQCommand::execute(){ } //report progress if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); } - if(count > 100000){ break; } count++; } } -- 2.39.2