+ ofstream out;
+ m->openOutputFile(filename, out);
+
+ /*{
+ "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
+ "format": "Biological Observation Matrix 0.9.1",
+ "format_url": "http://biom-format.org",
+ "type": "OTU table",
+ "generated_by": "mothur1.24.0",
+ "date": "Tue Apr 17 13:12:07 2012", */
+
+ ifstream in;
+ m->openInputFile(biomfile, in);
+
+ string matrixFormat = "";
+ int numRows = 0;
+ int numCols = 0;
+ int shapeNumRows = 0;
+ int shapeNumCols = 0;
+ vector<string> otuNames;
+ vector<string> groupNames;
+ map<string, string> fileLines;
+ vector<string> names;
+ int countOpenBrace = 0;
+ int countClosedBrace = 0;
+ int openParen = -1; //account for opening brace
+ int closeParen = 0;
+ bool ignoreCommas = false;
+ bool atComma = false;
+ string line = "";
+ string matrixElementType = "";
+
+ while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
+ if (m->control_pressed) { break; }
+
+ char c = in.get(); m->gobble(in);
+
+ if (c == '[') { countOpenBrace++; }
+ else if (c == ']') { countClosedBrace++; }
+ else if (c == '{') { openParen++; }
+ else if (c == '}') { closeParen++; }
+ else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
+
+ if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
+ else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
+ if (atComma && !ignoreCommas) {
+ if (fileLines.size() == 0) { //clip first {
+ line = line.substr(1);
+ }
+ string tag = getTag(line);
+ fileLines[tag] = line;
+ line = "";
+ atComma = false;
+ ignoreCommas = false;
+
+ }else { line += c; }
+
+ }
+ if (line != "") {
+ line = line.substr(0, line.length()-1);
+ string tag = getTag(line);
+ fileLines[tag] = line;
+ }
+ in.close();
+
+ map<string, string>::iterator it;
+ it = fileLines.find("type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ string type = getTag(thisLine);
+ if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixFormat = getTag(thisLine);
+ if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("matrix_element_type");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
+ else {
+ string thisLine = it->second;
+ matrixElementType = getTag(thisLine);
+ if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
+ if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("rows");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
+ else {
+ string thisLine = it->second;
+ otuNames = readRows(thisLine, numRows);
+ }
+
+ if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("columns");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
+ else {
+ string thisLine = it->second;
+ //read sample names
+ groupNames = readRows(thisLine, numCols);
+
+ //if users selected groups, then remove the groups not wanted.
+ SharedUtil util;
+ vector<string> Groups = m->getGroups();
+ vector<string> allGroups = groupNames;
+ util.setGroups(Groups, allGroups);
+ m->setGroups(Groups);
+
+ //fill filehandles with neccessary ofstreams
+ int i;
+ ofstream* temp;
+ for (i=0; i<Groups.size(); i++) {
+ temp = new ofstream;
+ filehandles[Groups[i]] = temp;
+ }
+
+ //set fileroot
+ fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
+
+ //clears file before we start to write to it below
+ for (int i=0; i<Groups.size(); i++) {
+ m->mothurRemove((fileroot + Groups[i] + ".rabund"));
+ outputNames.push_back((fileroot + Groups[i] + ".rabund"));
+ outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
+ }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("shape");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
+ else {
+ string thisLine = it->second;
+ getDims(thisLine, shapeNumRows, shapeNumCols);
+
+ //check shape
+ if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
+
+ if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
+ }
+
+ if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
+
+ it = fileLines.find("data");
+ if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
+ else {
+ string thisLine = it->second;
+ m->currentBinLabels = otuNames;
+
+ //read data
+ vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
+
+ m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ lookup[0]->printHeaders(out);
+ printSharedData(lookup, out);
+ }
+
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
+ out.close();
+
+ if (m->control_pressed) { m->mothurRemove(filename); return 0; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SharedCommand", "createSharedFromBiom");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
+ try {
+
+ vector<SharedRAbundVector*> lookup;
+
+ //creates new sharedRAbunds
+ for (int i = 0; i < groupNames.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
+ temp->setLabel("dummy");
+ temp->setGroup(groupNames[i]);
+ lookup.push_back(temp);
+ }
+
+ bool dataStart = false;
+ bool inBrackets = false;
+ string num = "";
+ vector<int> nums;
+ int otuCount = 0;
+ for (int i = 0; i < line.length(); i++) {
+
+ if (m->control_pressed) { return lookup; }
+
+ //look for opening [ to indicate data is starting
+ if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
+ else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
+
+ if (dataStart) {
+ if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
+ else if ((line[i] == ']') && (inBrackets)) {
+ inBrackets = false;
+ int temp;
+ float temp2;
+ if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
+ else { m->mothurConvert(num, temp); }
+ nums.push_back(temp);
+ num = "";
+
+ //save info to vectors
+ if (matrixFormat == "dense") {
+
+ //sanity check
+ if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
+
+ //set abundances for this otu
+ //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
+ for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
+
+ otuCount++;
+ }else {
+ //sanity check
+ if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
+
+ //nums contains [otuNum, sampleNum, abundance]
+ lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
+ }
+ nums.clear();
+ }
+
+ if (inBrackets) {
+ if (line[i] == ',') {
+ int temp;
+ m->mothurConvert(num, temp);
+ nums.push_back(temp);
+ num = "";
+ }else { if (!isspace(line[i])) { num += line[i]; } }
+ }
+ }
+ }
+
+ SharedUtil util;
+
+ bool remove = false;
+ for (int i = 0; i < lookup.size(); i++) {
+ //if this sharedrabund is not from a group the user wants then delete it.
+ if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
+ remove = true;
+ delete lookup[i]; lookup[i] = NULL;
+ lookup.erase(lookup.begin()+i);
+ i--;
+ }