+//**********************************************************************************************************************
+int MakeBiomCommand::getGreenGenesOTUIDs(vector<SharedRAbundVector*>& lookup, map<string, string>& labelTaxMap){
+ try {
+ //read reftaxonomy
+ PhyloTree phylo(referenceTax);
+
+ //read otu map file
+ map<string, string> otuMap = readGGOtuMap(); //maps reference ID -> OTU ID
+
+ if (m->control_pressed) { return 0; }
+
+ map<string, vector<string> > ggOTUIDs;
+ //loop through otu taxonomies
+ for (map<string, string>::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy
+ if (m->control_pressed) { break; }
+
+ string OTUTaxonomy = it->second;
+
+ //remove confidences
+ m->removeConfidences(OTUTaxonomy);
+
+ //remove unclassifieds to match template
+ int thisPos = OTUTaxonomy.find("unclassified;");
+ if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); }
+
+ //get list of reference ids that map to this taxonomy
+ vector<string> referenceIds = phylo.getSeqs(OTUTaxonomy);
+
+ if (m->control_pressed) { break; }
+
+ //look for each one in otu map to find match
+ string otuID = "not found";
+ string referenceString = "";
+ for (int i = 0; i < referenceIds.size(); i++) {
+ referenceString += referenceIds[i] + " ";
+ map<string, string>::iterator itMap = otuMap.find(referenceIds[i]);
+ if (itMap != otuMap.end()) { //found it
+ otuID = itMap->second;
+ i += referenceIds.size(); //stop looking
+ }
+ }
+
+ //if found, add otu to ggOTUID list
+ if (otuID != "not found") {
+ map<string, vector<string> >::iterator itGG = ggOTUIDs.find(otuID);
+ if (itGG == ggOTUIDs.end()) {
+ vector<string> temp; temp.push_back(it->first); //save mothur OTU label
+ ggOTUIDs[otuID] = temp;
+ }else { ggOTUIDs[otuID].push_back(it->first); } //add mothur OTU label to list
+ }else { m->mothurOut("[ERROR]: could not find OTUId for " + it->second + ". Its reference sequences are " + referenceString + ".\n"); m->control_pressed = true; }
+
+ }
+
+
+ vector<SharedRAbundVector*> newLookup;
+ for (int i = 0; i < lookup.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(lookup[i]->getLabel());
+ temp->setGroup(lookup[i]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ map<string, int> labelIndex;
+ for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { labelIndex[m->getSimpleLabel(m->currentSharedBinLabels[i])] = i; }
+
+ vector<string> newBinLabels;
+ map<string, string> newLabelTaxMap;
+ //loop through ggOTUID list combining mothur otus and adjusting labels
+ //ggOTUIDs = 16097 -> <OTU01, OTU10, OTU22>
+
+ for (map<string, vector<string> >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+
+ //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+ //find taxonomy of this otu
+ map<string, string>::iterator it = labelTaxMap.find(m->getSimpleLabel(itMap->second[0]));
+ vector<string> scores;
+ vector<string> taxonomies = parseTax(it->second, scores);
+
+ //merge/set OTU abundances
+ vector<int> abunds; abunds.resize(lookup.size(), 0);
+ string mergeString = "";
+ vector<float> boots; boots.resize(scores.size(), 0);
+ bool scoresNULL = false;
+ for (int j = 0; j < itMap->second.size(); j++) { //<OTU01, OTU10, OTU22>
+
+ if (scores[0] != "null") {
+ //merge bootstrap scores
+ vector<string> scores;
+ vector<string> taxonomies = parseTax(it->second, scores);
+ for (int i = 0; i < boots.size(); i++) {
+ float tempScore; m->mothurConvert(scores[i], tempScore);
+ boots[i] += tempScore;
+ }
+ }else { scoresNULL = true; }
+
+ //merge abunds
+ mergeString += (itMap->second)[j] + " ";
+ for (int i = 0; i < lookup.size(); i++) {
+ abunds[i] += lookup[i]->getAbundance(labelIndex[m->getSimpleLabel((itMap->second)[j])]);
+ }
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: merging " + mergeString + " for ggOTUid = " + itMap->first + ".\n"); }
+
+ //average scores
+ //add merged otu to new lookup
+ string newTaxString = "";
+ if (!scoresNULL) {
+ for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); }
+
+ //assemble new taxomoy
+ for (int j = 0; j < boots.size(); j++) {
+ newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");";
+ }
+ }else {
+ //assemble new taxomoy
+ for (int j = 0; j < taxonomies.size(); j++) {
+ newTaxString += taxonomies[j] + ";";
+ }
+ }
+
+ //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+ //find taxonomy of this otu
+ newLabelTaxMap[itMap->first] = newTaxString;
+
+ //add merged otu to new lookup
+ for (int j = 0; j < abunds.size(); j++) { newLookup[j]->push_back(abunds[j], newLookup[j]->getGroup()); }
+
+ //saved otu label
+ newBinLabels.push_back(itMap->first);
+ }
+
+ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
+
+ lookup = newLookup;
+ m->currentSharedBinLabels = newBinLabels;
+ labelTaxMap = newLabelTaxMap;
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("shared",variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+
+ lookup[0]->printHeaders(out);
+
+ for (int i = 0; i < lookup.size(); i++) {
+ out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
+ lookup[i]->print(out);
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "getGreenGenesOTUIDs");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+map<string, string> MakeBiomCommand::readGGOtuMap(){
+ try {
+ map<string, string> otuMap;
+
+ ifstream in;
+ m->openInputFile(picrustOtuFile, in);
+
+ //map referenceIDs -> otuIDs
+ //lines look like:
+ //16097 671376 616121 533566 683683 4332909 4434717 772666 611808 695209
+ while(!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string line = m->getline(in); m->gobble(in);
+ vector<string> pieces = m->splitWhiteSpace(line);
+
+ if (pieces.size() != 0) {
+ string otuID = pieces[1];
+ for (int i = 1; i < pieces.size(); i++) { otuMap[pieces[i]] = otuID; }
+ }
+ }
+ in.close();
+
+ return otuMap;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "readGGOtuMap");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
+ try {
+ sampleMetadata.clear();
+ if (metadatafile == "") { for (int i = 0; i < lookup.size(); i++) { sampleMetadata.push_back("null"); } }
+ else {
+ ifstream in;
+ m->openInputFile(metadatafile, in);
+
+ vector<string> groupNames, metadataLabels;
+ map<string, vector<string> > lines;
+
+ string headerLine = m->getline(in); m->gobble(in);
+ vector<string> pieces = m->splitWhiteSpace(headerLine);
+
+ //save names of columns you are reading
+ for (int i = 1; i < pieces.size(); i++) {
+ metadataLabels.push_back(pieces[i]);
+ }
+ int count = metadataLabels.size();
+
+ vector<string> groups = m->getGroups();
+
+ //read rest of file
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); return 0; }
+
+ string group = "";
+ in >> group; m->gobble(in);
+ groupNames.push_back(group);
+
+ string line = m->getline(in); m->gobble(in);
+ vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
+
+ if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
+ else { if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
+
+ m->gobble(in);
+ }
+ in.close();
+
+ map<string, vector<string> >::iterator it;
+ for (int i = 0; i < lookup.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ it = lines.find(lookup[i]->getGroup());
+
+ if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
+ else {
+ vector<string> values = it->second;
+
+ string data = "{";
+ for (int j = 0; j < metadataLabels.size()-1; j++) {
+ values[j] = m->removeQuotes(values[j]);
+ data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", ";
+ }
+ values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
+ data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
+ sampleMetadata.push_back(data);
+ }
+ }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
+ exit(1);
+ }
+
+}
+