5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
12 #include "counttable.h"
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
17 return (left->getGroup() < right->getGroup());
19 //**********************************************************************************************************************
20 vector<string> SharedCommand::setParameters(){
22 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none","shared",false,false); parameters.push_back(pbiom);
23 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup","shared",false,false,true); parameters.push_back(plist);
24 CommandParameter pcount("count", "InputTypes", "", "", "none", "GroupCount", "none","",false,false); parameters.push_back(pcount);
25 CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup","",false,false,true); parameters.push_back(pgroup);
26 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
27 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "","group",false,false); parameters.push_back(pgroups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "SharedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string SharedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
46 helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
47 helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
48 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
49 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
50 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
54 m->errorOut(e, "SharedCommand", "getHelpString");
58 //**********************************************************************************************************************
59 string SharedCommand::getOutputPattern(string type) {
63 if (type == "shared") { pattern = "[filename],shared"; }
64 else if (type == "rabund") { pattern = "[filename],[group],rabund"; }
65 else if (type == "group") { pattern = "[filename],[group],groups"; }
66 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
71 m->errorOut(e, "SharedCommand", "getOutputPattern");
75 //**********************************************************************************************************************
76 SharedCommand::SharedCommand(){
78 abort = true; calledHelp = true;
80 //initialize outputTypes
81 vector<string> tempOutNames;
82 outputTypes["rabund"] = tempOutNames;
83 outputTypes["shared"] = tempOutNames;
84 outputTypes["group"] = tempOutNames;
87 m->errorOut(e, "SharedCommand", "SharedCommand");
91 //**********************************************************************************************************************
92 SharedCommand::SharedCommand(string option) {
94 abort = false; calledHelp = false;
97 //allow user to run help
98 if(option == "help") { help(); abort = true; calledHelp = true; }
99 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103 vector<string> myArray = setParameters();
105 OptionParser parser(option);
106 map<string, string> parameters = parser.getParameters();
108 ValidParameters validParameter;
109 map<string, string>::iterator it;
111 //check to make sure all parameters are valid for command
112 for (it = parameters.begin(); it != parameters.end(); it++) {
113 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
116 //if the user changes the input directory command factory will send this info to us in the output parameter
117 string inputDir = validParameter.validFile(parameters, "inputdir", false);
118 if (inputDir == "not found"){ inputDir = ""; }
121 it = parameters.find("list");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["list"] = inputDir + it->second; }
129 it = parameters.find("group");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["group"] = inputDir + it->second; }
137 it = parameters.find("count");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["count"] = inputDir + it->second; }
145 it = parameters.find("biom");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["biom"] = inputDir + it->second; }
154 vector<string> tempOutNames;
155 outputTypes["rabund"] = tempOutNames;
156 outputTypes["shared"] = tempOutNames;
157 outputTypes["group"] = tempOutNames;
159 //if the user changes the output directory command factory will send this info to us in the output parameter
160 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
162 //check for required parameters
163 listfile = validParameter.validFile(parameters, "list", true);
164 if (listfile == "not open") { listfile = ""; abort = true; }
165 else if (listfile == "not found") { listfile = ""; }
166 else { m->setListFile(listfile); }
168 biomfile = validParameter.validFile(parameters, "biom", true);
169 if (biomfile == "not open") { biomfile = ""; abort = true; }
170 else if (biomfile == "not found") { biomfile = ""; }
171 else { m->setBiomFile(biomfile); }
173 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
174 if (ordergroupfile == "not open") { abort = true; }
175 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
177 groupfile = validParameter.validFile(parameters, "group", true);
178 if (groupfile == "not open") { groupfile = ""; abort = true; }
179 else if (groupfile == "not found") { groupfile = ""; }
180 else { m->setGroupFile(groupfile); }
182 countfile = validParameter.validFile(parameters, "count", true);
183 if (countfile == "not open") { countfile = ""; abort = true; }
184 else if (countfile == "not found") { countfile = ""; }
186 m->setCountTableFile(countfile);
188 if (!temp.testGroups(countfile)) { m->mothurOut("[ERROR]: Your count file does not have group info, aborting."); m->mothurOutEndLine(); abort=true; }
191 if ((biomfile == "") && (listfile == "")) {
192 //is there are current file available for either of these?
193 //give priority to list, then biom
194 listfile = m->getListFile();
195 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
197 biomfile = m->getBiomFile();
198 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
200 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
205 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
207 if (listfile != "") {
208 if ((groupfile == "") && (countfile == "")) {
209 groupfile = m->getGroupFile();
210 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
212 countfile = m->getCountTableFile();
213 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
215 m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
223 string groups = validParameter.validFile(parameters, "groups", false);
224 if (groups == "not found") { groups = ""; }
226 m->splitAtDash(groups, Groups);
227 m->setGroups(Groups);
230 //check for optional parameter and set defaults
231 // ...at some point should added some additional type checking...
232 string label = validParameter.validFile(parameters, "label", false);
233 if (label == "not found") { label = ""; }
235 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
236 else { allLines = 1; }
241 catch(exception& e) {
242 m->errorOut(e, "SharedCommand", "SharedCommand");
246 //**********************************************************************************************************************
248 int SharedCommand::execute(){
251 if (abort == true) { if (calledHelp) { return 0; } return 2; }
253 //getting output filename
254 string filename = "";
255 if (listfile != "") { filename = listfile; }
256 else { filename = biomfile; }
258 if (outputDir == "") { outputDir += m->hasPath(filename); }
260 map<string, string> variables;
261 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
262 filename = getOutputFileName("shared",variables);
263 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
265 if (listfile != "") { createSharedFromListGroup(filename); }
266 else { createSharedFromBiom(filename); }
268 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
270 //set rabund file as new current rabundfile
272 itTypes = outputTypes.find("rabund");
273 if (itTypes != outputTypes.end()) {
274 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
277 itTypes = outputTypes.find("shared");
278 if (itTypes != outputTypes.end()) {
279 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
282 itTypes = outputTypes.find("group");
283 if (itTypes != outputTypes.end()) {
284 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
287 m->mothurOutEndLine();
288 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
289 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
290 m->mothurOutEndLine();
294 catch(exception& e) {
295 m->errorOut(e, "SharedCommand", "execute");
299 //**********************************************************************************************************************
300 int SharedCommand::createSharedFromBiom(string filename) {
303 m->openOutputFile(filename, out);
306 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
307 "format": "Biological Observation Matrix 0.9.1",
308 "format_url": "http://biom-format.org",
310 "generated_by": "mothur1.24.0",
311 "date": "Tue Apr 17 13:12:07 2012", */
314 m->openInputFile(biomfile, in);
316 string matrixFormat = "";
319 int shapeNumRows = 0;
320 int shapeNumCols = 0;
321 vector<string> otuNames;
322 vector<string> groupNames;
323 map<string, string> fileLines;
324 vector<string> names;
325 int countOpenBrace = 0;
326 int countClosedBrace = 0;
327 int openParen = -1; //account for opening brace
329 bool ignoreCommas = false;
330 bool atComma = false;
332 string matrixElementType = "";
334 while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
335 if (m->control_pressed) { break; }
337 char c = in.get(); m->gobble(in);
339 if (c == '[') { countOpenBrace++; }
340 else if (c == ']') { countClosedBrace++; }
341 else if (c == '{') { openParen++; }
342 else if (c == '}') { closeParen++; }
343 else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
345 if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
346 else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
347 if (atComma && !ignoreCommas) {
348 if (fileLines.size() == 0) { //clip first {
349 line = line.substr(1);
351 string tag = getTag(line);
352 fileLines[tag] = line;
355 ignoreCommas = false;
361 line = line.substr(0, line.length()-1);
362 string tag = getTag(line);
363 fileLines[tag] = line;
367 map<string, string>::iterator it;
368 it = fileLines.find("type");
369 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
371 string thisLine = it->second;
372 string type = getTag(thisLine);
373 if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
376 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
378 it = fileLines.find("matrix_type");
379 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
381 string thisLine = it->second;
382 matrixFormat = getTag(thisLine);
383 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
386 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
388 it = fileLines.find("matrix_element_type");
389 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
391 string thisLine = it->second;
392 matrixElementType = getTag(thisLine);
393 if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
394 if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
397 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
399 it = fileLines.find("rows");
400 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
402 string thisLine = it->second;
403 otuNames = readRows(thisLine, numRows);
406 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
408 it = fileLines.find("columns");
409 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
411 string thisLine = it->second;
413 groupNames = readRows(thisLine, numCols);
415 //if users selected groups, then remove the groups not wanted.
417 vector<string> Groups = m->getGroups();
418 vector<string> allGroups = groupNames;
419 util.setGroups(Groups, allGroups);
420 m->setGroups(Groups);
422 //fill filehandles with neccessary ofstreams
425 for (i=0; i<Groups.size(); i++) {
427 filehandles[Groups[i]] = temp;
431 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
433 //clears file before we start to write to it below
434 for (int i=0; i<Groups.size(); i++) {
435 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
436 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
437 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
441 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
443 it = fileLines.find("shape");
444 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
446 string thisLine = it->second;
447 getDims(thisLine, shapeNumRows, shapeNumCols);
450 if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
452 if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
455 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
457 it = fileLines.find("data");
458 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
460 string thisLine = it->second;
461 m->currentBinLabels = otuNames;
464 vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
466 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
467 lookup[0]->printHeaders(out);
468 printSharedData(lookup, out);
471 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
474 if (m->control_pressed) { m->mothurRemove(filename); return 0; }
478 catch(exception& e) {
479 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
483 //**********************************************************************************************************************
484 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
487 vector<SharedRAbundVector*> lookup;
489 //creates new sharedRAbunds
490 for (int i = 0; i < groupNames.size(); i++) {
491 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
492 temp->setLabel("dummy");
493 temp->setGroup(groupNames[i]);
494 lookup.push_back(temp);
497 bool dataStart = false;
498 bool inBrackets = false;
502 for (int i = 0; i < line.length(); i++) {
504 if (m->control_pressed) { return lookup; }
506 //look for opening [ to indicate data is starting
507 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
508 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
511 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
512 else if ((line[i] == ']') && (inBrackets)) {
516 if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
517 else { m->mothurConvert(num, temp); }
518 nums.push_back(temp);
521 //save info to vectors
522 if (matrixFormat == "dense") {
525 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
527 //set abundances for this otu
528 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
529 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
534 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
536 //nums contains [otuNum, sampleNum, abundance]
537 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
543 if (line[i] == ',') {
545 m->mothurConvert(num, temp);
546 nums.push_back(temp);
548 }else { if (!isspace(line[i])) { num += line[i]; } }
556 for (int i = 0; i < lookup.size(); i++) {
557 //if this sharedrabund is not from a group the user wants then delete it.
558 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
560 delete lookup[i]; lookup[i] = NULL;
561 lookup.erase(lookup.begin()+i);
566 if (remove) { eliminateZeroOTUS(lookup); }
571 catch(exception& e) {
572 m->errorOut(e, "SharedCommand", "readData");
576 //**********************************************************************************************************************
577 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
580 vector<SharedRAbundVector*> newLookup;
581 for (int i = 0; i < thislookup.size(); i++) {
582 SharedRAbundVector* temp = new SharedRAbundVector();
583 temp->setLabel(thislookup[i]->getLabel());
584 temp->setGroup(thislookup[i]->getGroup());
585 newLookup.push_back(temp);
589 vector<string> newBinLabels;
590 string snumBins = toString(thislookup[0]->getNumBins());
591 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
592 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
594 //look at each sharedRabund and make sure they are not all zero
596 for (int j = 0; j < thislookup.size(); j++) {
597 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
600 //if they are not all zero add this bin
602 for (int j = 0; j < thislookup.size(); j++) {
603 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
606 //if there is a bin label use it otherwise make one
607 string binLabel = "Otu";
608 string sbinNumber = toString(i+1);
609 if (sbinNumber.length() < snumBins.length()) {
610 int diff = snumBins.length() - sbinNumber.length();
611 for (int h = 0; h < diff; h++) { binLabel += "0"; }
613 binLabel += sbinNumber;
614 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
616 newBinLabels.push_back(binLabel);
620 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
622 thislookup = newLookup;
623 m->currentBinLabels = newBinLabels;
628 catch(exception& e) {
629 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
633 //**********************************************************************************************************************
634 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
640 for (int i = 0; i < line.length(); i++) {
642 //you want to ignore any ; until you reach the next '
643 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
644 else if ((line[i] == ']') && (inBar)) {
646 m->mothurConvert(num, shapeNumCols);
651 if (line[i] == ',') {
652 m->mothurConvert(num, shapeNumRows);
654 }else { if (!isspace(line[i])) { num += line[i]; } }
660 catch(exception& e) {
661 m->errorOut(e, "SharedCommand", "getDims");
665 //**********************************************************************************************************************
666 vector<string> SharedCommand::readRows(string line, int& numRows) {
669 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
670 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
673 vector<string> names;
674 int countOpenBrace = 0;
675 int countClosedBrace = 0;
681 for (int i = 0; i < line.length(); i++) {
683 if (m->control_pressed) { return names; }
685 if (line[i] == '[') { countOpenBrace++; }
686 else if (line[i] == ']') { countClosedBrace++; }
687 else if (line[i] == '{') { openParen++; }
688 else if (line[i] == '}') { closeParen++; }
689 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
691 //you have reached the end of the rows info
692 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
693 if ((openParen == closeParen) && (closeParen != 0)) { //process row
695 vector<string> items;
696 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
697 string part = items[0]; items.clear();
698 m->splitAtChar(part, items, ':'); //split part we want containing the ids
699 string name = items[1];
701 //remove "" if needed
702 int pos = name.find("\"");
703 if (pos != string::npos) {
705 for (int k = 0; k < name.length(); k++) {
706 if (name[k] != '\"') { newName += name[k]; }
710 names.push_back(name);
719 catch(exception& e) {
720 m->errorOut(e, "SharedCommand", "readRows");
724 //**********************************************************************************************************************
725 //designed for things like "type": "OTU table", returns type
726 string SharedCommand::getTag(string& line) {
728 bool inQuotes = false;
732 for (int i = 0; i < line.length(); i++) {
734 //you want to ignore any ; until you reach the next '
735 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
736 else if ((line[i] == c) && (inQuotes)) {
738 line = line.substr(i+1);
742 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
747 catch(exception& e) {
748 m->errorOut(e, "SharedCommand", "getInfo");
752 //**********************************************************************************************************************
753 int SharedCommand::createSharedFromListGroup(string filename) {
756 m->openOutputFile(filename, out);
758 GroupMap* groupMap = NULL;
759 CountTable* countTable = NULL;
760 if (groupfile != "") {
761 groupMap = new GroupMap(groupfile);
763 int groupError = groupMap->readMap();
764 if (groupError == 1) { delete groupMap; return 0; }
765 vector<string> allGroups = groupMap->getNamesOfGroups();
766 m->setAllGroups(allGroups);
768 countTable = new CountTable();
769 countTable->readTable(countfile, true);
772 if (m->control_pressed) { return 0; }
774 pickedGroups = false;
776 //if hte user has not specified any groups then use them all
777 if (Groups.size() == 0) {
778 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
779 else { Groups = countTable->getNamesOfGroups(); }
780 m->setGroups(Groups);
781 }else { pickedGroups = true; }
783 //fill filehandles with neccessary ofstreams
786 for (i=0; i<Groups.size(); i++) {
788 filehandles[Groups[i]] = temp;
792 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
793 map<string, string> variables;
794 variables["[filename]"] = fileroot;
795 //clears file before we start to write to it below
796 for (int i=0; i<Groups.size(); i++) {
797 variables["[group]"] = Groups[i];
798 string rabundFIleName = getOutputFileName("rabund",variables);
799 m->mothurRemove(rabundFIleName);
800 outputNames.push_back(rabundFIleName);
801 outputTypes["rabund"].push_back(rabundFIleName);
804 string errorOff = "no error";
806 //if user provided an order file containing the order the shared file should be in read it
807 //if (ordergroupfile != "") { readOrderFile(); }
809 InputData input(listfile, "shared");
810 SharedListVector* SharedList = input.getSharedListVector();
811 string lastLabel = SharedList->getLabel();
812 vector<SharedRAbundVector*> lookup;
814 if (m->control_pressed) {
815 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
816 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
817 out.close(); m->mothurRemove(filename);
818 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
819 string rabundFIleName = getOutputFileName("rabund",variables);
820 m->mothurRemove(rabundFIleName); }
825 vector<string> namesSeqs;
826 int numGroupNames = 0;
827 if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
828 else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
829 int error = ListGroupSameSeqs(namesSeqs, SharedList);
831 if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
832 m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
834 out.close(); m->mothurRemove(filename); //remove blank shared file you made
837 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
838 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
842 if (error == 1) { m->control_pressed = true; }
844 //if user has specified groups make new groupfile for them
845 if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
847 if (m->getNumGroups() < 4) {
848 for (int i = 0; i < m->getNumGroups(); i++) {
849 groups += (m->getGroups())[i] + ".";
851 }else { groups = "merge"; }
852 map<string, string> variables;
853 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
854 variables["[group]"] = groups;
855 string newGroupFile = getOutputFileName("group",variables);
856 outputTypes["group"].push_back(newGroupFile);
857 outputNames.push_back(newGroupFile);
859 m->openOutputFile(newGroupFile, outGroups);
861 vector<string> names = groupMap->getNamesSeqs();
863 for (int i = 0; i < names.size(); i++) {
864 groupName = groupMap->getGroup(names[i]);
865 if (isValidGroup(groupName, m->getGroups())) {
866 outGroups << names[i] << '\t' << groupName << endl;
872 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
873 set<string> processedLabels;
874 set<string> userLabels = labels;
876 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
877 if (m->control_pressed) {
878 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
879 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
880 out.close(); m->mothurRemove(filename);
881 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
882 string rabundFIleName = getOutputFileName("rabund",variables);
883 m->mothurRemove(rabundFIleName); }
887 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
889 lookup = SharedList->getSharedRAbundVector();
891 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
892 if (pickedGroups) { //check for otus with no seqs in them
893 eliminateZeroOTUS(lookup);
896 if (m->control_pressed) {
897 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
898 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
899 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
900 out.close(); m->mothurRemove(filename);
901 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
902 string rabundFIleName = getOutputFileName("rabund",variables);
903 m->mothurRemove(rabundFIleName); }
907 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
908 printSharedData(lookup, out); //prints info to the .shared file
909 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
911 processedLabels.insert(SharedList->getLabel());
912 userLabels.erase(SharedList->getLabel());
915 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
916 string saveLabel = SharedList->getLabel();
919 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
921 lookup = SharedList->getSharedRAbundVector();
922 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
923 if (pickedGroups) { //check for otus with no seqs in them
924 eliminateZeroOTUS(lookup);
928 if (m->control_pressed) {
929 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
930 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
931 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
932 out.close(); m->mothurRemove(filename);
933 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
934 string rabundFIleName = getOutputFileName("rabund",variables);
935 m->mothurRemove(rabundFIleName); }
939 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
940 printSharedData(lookup, out); //prints info to the .shared file
941 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
943 processedLabels.insert(SharedList->getLabel());
944 userLabels.erase(SharedList->getLabel());
946 //restore real lastlabel to save below
947 SharedList->setLabel(saveLabel);
951 lastLabel = SharedList->getLabel();
954 SharedList = input.getSharedListVector(); //get new list vector to process
957 //output error messages about any remaining user labels
958 set<string>::iterator it;
959 bool needToRun = false;
960 for (it = userLabels.begin(); it != userLabels.end(); it++) {
961 if (processedLabels.count(lastLabel) != 1) {
966 //run last label if you need to
967 if (needToRun == true) {
968 if (SharedList != NULL) { delete SharedList; }
969 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
971 lookup = SharedList->getSharedRAbundVector();
972 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
973 if (pickedGroups) { //check for otus with no seqs in them
974 eliminateZeroOTUS(lookup);
977 if (m->control_pressed) {
978 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
979 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
980 out.close(); m->mothurRemove(filename);
981 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
982 string rabundFIleName = getOutputFileName("rabund",variables);
983 m->mothurRemove(rabundFIleName); }
987 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
988 printSharedData(lookup, out); //prints info to the .shared file
989 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
995 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
999 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
1001 if (m->control_pressed) {
1002 m->mothurRemove(filename);
1003 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
1004 string rabundFIleName = getOutputFileName("rabund",variables);
1005 m->mothurRemove(rabundFIleName); }
1011 catch(exception& e) {
1012 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
1016 //**********************************************************************************************************************
1017 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
1020 if (order.size() == 0) { //user has not specified an order so do aplabetically
1021 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
1024 vector<string> Groups;
1026 //initialize bin values
1027 for (int i = 0; i < thislookup.size(); i++) {
1028 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
1029 thislookup[i]->print(out);
1031 Groups.push_back(thislookup[i]->getGroup());
1033 RAbundVector rav = thislookup[i]->getRAbundVector();
1034 map<string, string> variables;
1035 variables["[filename]"] = fileroot;
1036 variables["[group]"] = thislookup[i]->getGroup();
1037 m->openOutputFileAppend(getOutputFileName("rabund",variables), *(filehandles[thislookup[i]->getGroup()]));
1038 rav.print(*(filehandles[thislookup[i]->getGroup()]));
1039 (*(filehandles[thislookup[i]->getGroup()])).close();
1041 m->setGroups(Groups);
1043 //create a map from groupName to each sharedrabund
1044 map<string, SharedRAbundVector*> myMap;
1045 map<string, SharedRAbundVector*>::iterator myIt;
1047 for (int i = 0; i < thislookup.size(); i++) {
1048 myMap[thislookup[i]->getGroup()] = thislookup[i];
1052 vector<string> Groups;
1054 //loop through ordered list and print the rabund
1055 for (int i = 0; i < order.size(); i++) {
1056 myIt = myMap.find(order[i]);
1058 if(myIt != myMap.end()) { //we found it
1059 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1060 (myIt->second)->print(out);
1062 Groups.push_back((myIt->second)->getGroup());
1064 RAbundVector rav = (myIt->second)->getRAbundVector();
1065 map<string, string> variables;
1066 variables["[filename]"] = fileroot;
1067 variables["[group]"] = (myIt->second)->getGroup();
1068 m->openOutputFileAppend(getOutputFileName("rabund",variables), *(filehandles[(myIt->second)->getGroup()]));
1069 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1070 (*(filehandles[(myIt->second)->getGroup()])).close();
1072 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1076 m->setGroups(Groups);
1081 catch(exception& e) {
1082 m->errorOut(e, "SharedCommand", "printSharedData");
1086 //**********************************************************************************************************************
1087 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1091 set<string> groupNamesSeqs;
1092 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1093 groupNamesSeqs.insert(groupMapsSeqs[i]);
1096 //go through list and if group returns "not found" output it
1097 for (int i = 0; i < SharedList->getNumBins(); i++) {
1098 if (m->control_pressed) { return 0; }
1100 string names = SharedList->get(i);
1102 vector<string> listNames;
1103 m->splitAtComma(names, listNames);
1105 for (int j = 0; j < listNames.size(); j++) {
1106 int num = groupNamesSeqs.count(listNames[j]);
1110 if (groupfile != "") {
1111 m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1112 else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct."); m->mothurOutEndLine(); }
1113 }else { groupNamesSeqs.erase(listNames[j]); }
1117 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1119 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1124 catch(exception& e) {
1125 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1129 //**********************************************************************************************************************
1131 SharedCommand::~SharedCommand(){
1136 //**********************************************************************************************************************
1137 int SharedCommand::readOrderFile() {
1143 m->openInputFile(ordergroupfile, in);
1147 in >> thisGroup; m->gobble(in);
1149 order.push_back(thisGroup);
1151 if (m->control_pressed) { order.clear(); break; }
1157 catch(exception& e) {
1158 m->errorOut(e, "SharedCommand", "readOrderFile");
1162 //**********************************************************************************************************************
1164 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1166 for (int i = 0; i < groups.size(); i++) {
1167 if (groupname == groups[i]) { return true; }
1172 catch(exception& e) {
1173 m->errorOut(e, "SharedCommand", "isValidGroup");
1177 /************************************************************/