5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
12 #include "counttable.h"
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
17 return (left->getGroup() < right->getGroup());
19 //**********************************************************************************************************************
20 vector<string> SharedCommand::setParameters(){
22 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none","shared",false,false); parameters.push_back(pbiom);
23 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup","shared",false,false,true); parameters.push_back(plist);
24 CommandParameter pcount("count", "InputTypes", "", "", "none", "GroupCount", "none","",false,false); parameters.push_back(pcount);
25 CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup","",false,false,true); parameters.push_back(pgroup);
26 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
27 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "","group",false,false); parameters.push_back(pgroups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "SharedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string SharedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
46 helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
47 helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
48 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
49 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
50 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
54 m->errorOut(e, "SharedCommand", "getHelpString");
58 //**********************************************************************************************************************
59 string SharedCommand::getOutputPattern(string type) {
63 if (type == "shared") { pattern = "[filename],shared-[filename],[distance],shared"; }
64 else if (type == "rabund") { pattern = "[filename],[group],rabund"; }
65 else if (type == "group") { pattern = "[filename],[group],groups"; }
66 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
71 m->errorOut(e, "SharedCommand", "getOutputPattern");
75 //**********************************************************************************************************************
76 SharedCommand::SharedCommand(){
78 abort = true; calledHelp = true;
80 //initialize outputTypes
81 vector<string> tempOutNames;
82 outputTypes["rabund"] = tempOutNames;
83 outputTypes["shared"] = tempOutNames;
84 outputTypes["group"] = tempOutNames;
87 m->errorOut(e, "SharedCommand", "SharedCommand");
91 //**********************************************************************************************************************
92 SharedCommand::SharedCommand(string option) {
94 abort = false; calledHelp = false;
97 //allow user to run help
98 if(option == "help") { help(); abort = true; calledHelp = true; }
99 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103 vector<string> myArray = setParameters();
105 OptionParser parser(option);
106 map<string, string> parameters = parser.getParameters();
108 ValidParameters validParameter;
109 map<string, string>::iterator it;
111 //check to make sure all parameters are valid for command
112 for (it = parameters.begin(); it != parameters.end(); it++) {
113 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
116 //if the user changes the input directory command factory will send this info to us in the output parameter
117 string inputDir = validParameter.validFile(parameters, "inputdir", false);
118 if (inputDir == "not found"){ inputDir = ""; }
121 it = parameters.find("list");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["list"] = inputDir + it->second; }
129 it = parameters.find("group");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["group"] = inputDir + it->second; }
137 it = parameters.find("count");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["count"] = inputDir + it->second; }
145 it = parameters.find("biom");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["biom"] = inputDir + it->second; }
154 vector<string> tempOutNames;
155 outputTypes["rabund"] = tempOutNames;
156 outputTypes["shared"] = tempOutNames;
157 outputTypes["group"] = tempOutNames;
159 //if the user changes the output directory command factory will send this info to us in the output parameter
160 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
162 //check for required parameters
163 listfile = validParameter.validFile(parameters, "list", true);
164 if (listfile == "not open") { listfile = ""; abort = true; }
165 else if (listfile == "not found") { listfile = ""; }
166 else { m->setListFile(listfile); }
168 biomfile = validParameter.validFile(parameters, "biom", true);
169 if (biomfile == "not open") { biomfile = ""; abort = true; }
170 else if (biomfile == "not found") { biomfile = ""; }
171 else { m->setBiomFile(biomfile); }
173 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
174 if (ordergroupfile == "not open") { abort = true; }
175 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
177 groupfile = validParameter.validFile(parameters, "group", true);
178 if (groupfile == "not open") { groupfile = ""; abort = true; }
179 else if (groupfile == "not found") { groupfile = ""; }
180 else { m->setGroupFile(groupfile); }
182 countfile = validParameter.validFile(parameters, "count", true);
183 if (countfile == "not open") { countfile = ""; abort = true; }
184 else if (countfile == "not found") { countfile = ""; }
186 m->setCountTableFile(countfile);
188 if (!temp.testGroups(countfile)) { m->mothurOut("[ERROR]: Your count file does not have group info, aborting."); m->mothurOutEndLine(); abort=true; }
191 if ((biomfile == "") && (listfile == "")) {
192 //is there are current file available for either of these?
193 //give priority to list, then biom
194 listfile = m->getListFile();
195 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
197 biomfile = m->getBiomFile();
198 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
200 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
205 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
207 if (listfile != "") {
208 if ((groupfile == "") && (countfile == "")) {
209 groupfile = m->getGroupFile();
210 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
212 countfile = m->getCountTableFile();
213 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
215 m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
223 string groups = validParameter.validFile(parameters, "groups", false);
224 if (groups == "not found") { groups = ""; }
226 m->splitAtDash(groups, Groups);
227 m->setGroups(Groups);
230 //check for optional parameter and set defaults
231 // ...at some point should added some additional type checking...
232 string label = validParameter.validFile(parameters, "label", false);
233 if (label == "not found") { label = ""; }
235 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
236 else { allLines = 1; }
241 catch(exception& e) {
242 m->errorOut(e, "SharedCommand", "SharedCommand");
246 //**********************************************************************************************************************
248 int SharedCommand::execute(){
251 if (abort == true) { if (calledHelp) { return 0; } return 2; }
253 if (listfile != "") { createSharedFromListGroup(); }
254 else { createSharedFromBiom(); }
256 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
258 //set rabund file as new current rabundfile
260 itTypes = outputTypes.find("rabund");
261 if (itTypes != outputTypes.end()) {
262 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
265 itTypes = outputTypes.find("shared");
266 if (itTypes != outputTypes.end()) {
267 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
270 itTypes = outputTypes.find("group");
271 if (itTypes != outputTypes.end()) {
272 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
275 m->mothurOutEndLine();
276 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
277 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
278 m->mothurOutEndLine();
282 catch(exception& e) {
283 m->errorOut(e, "SharedCommand", "execute");
287 //**********************************************************************************************************************
288 int SharedCommand::createSharedFromBiom() {
290 //getting output filename
291 string filename = biomfile;
292 if (outputDir == "") { outputDir += m->hasPath(filename); }
294 map<string, string> variables;
295 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
296 filename = getOutputFileName("shared",variables);
297 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
300 m->openOutputFile(filename, out);
303 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
304 "format": "Biological Observation Matrix 0.9.1",
305 "format_url": "http://biom-format.org",
307 "generated_by": "mothur1.24.0",
308 "date": "Tue Apr 17 13:12:07 2012", */
311 m->openInputFile(biomfile, in);
313 string matrixFormat = "";
316 int shapeNumRows = 0;
317 int shapeNumCols = 0;
318 vector<string> otuNames;
319 vector<string> groupNames;
320 map<string, string> fileLines;
321 vector<string> names;
322 int countOpenBrace = 0;
323 int countClosedBrace = 0;
324 int openParen = -1; //account for opening brace
326 bool ignoreCommas = false;
327 bool atComma = false;
329 string matrixElementType = "";
331 while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
332 if (m->control_pressed) { break; }
334 char c = in.get(); m->gobble(in);
336 if (c == '[') { countOpenBrace++; }
337 else if (c == ']') { countClosedBrace++; }
338 else if (c == '{') { openParen++; }
339 else if (c == '}') { closeParen++; }
340 else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
342 if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
343 else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
344 if (atComma && !ignoreCommas) {
345 if (fileLines.size() == 0) { //clip first {
346 line = line.substr(1);
348 string tag = getTag(line);
349 fileLines[tag] = line;
352 ignoreCommas = false;
358 line = line.substr(0, line.length()-1);
359 string tag = getTag(line);
360 fileLines[tag] = line;
364 map<string, string>::iterator it;
365 it = fileLines.find("type");
366 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
368 string thisLine = it->second;
369 string type = getTag(thisLine);
370 if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
373 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
375 it = fileLines.find("matrix_type");
376 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
378 string thisLine = it->second;
379 matrixFormat = getTag(thisLine);
380 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
383 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
385 it = fileLines.find("matrix_element_type");
386 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
388 string thisLine = it->second;
389 matrixElementType = getTag(thisLine);
390 if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
391 if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
394 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
396 it = fileLines.find("rows");
397 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
399 string thisLine = it->second;
400 otuNames = readRows(thisLine, numRows);
403 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
405 it = fileLines.find("columns");
406 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
408 string thisLine = it->second;
410 groupNames = readRows(thisLine, numCols);
412 //if users selected groups, then remove the groups not wanted.
414 vector<string> Groups = m->getGroups();
415 vector<string> allGroups = groupNames;
416 util.setGroups(Groups, allGroups);
417 m->setGroups(Groups);
419 //fill filehandles with neccessary ofstreams
422 for (i=0; i<Groups.size(); i++) {
424 filehandles[Groups[i]] = temp;
428 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
430 //clears file before we start to write to it below
431 for (int i=0; i<Groups.size(); i++) {
432 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
433 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
434 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
438 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
440 it = fileLines.find("shape");
441 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
443 string thisLine = it->second;
444 getDims(thisLine, shapeNumRows, shapeNumCols);
447 if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
449 if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
452 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
454 it = fileLines.find("data");
455 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
457 string thisLine = it->second;
458 m->currentSharedBinLabels = otuNames;
461 vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
463 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
464 lookup[0]->printHeaders(out);
465 printSharedData(lookup, out);
468 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
471 if (m->control_pressed) { m->mothurRemove(filename); return 0; }
475 catch(exception& e) {
476 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
480 //**********************************************************************************************************************
481 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
484 vector<SharedRAbundVector*> lookup;
486 //creates new sharedRAbunds
487 for (int i = 0; i < groupNames.size(); i++) {
488 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
489 temp->setLabel("dummy");
490 temp->setGroup(groupNames[i]);
491 lookup.push_back(temp);
494 bool dataStart = false;
495 bool inBrackets = false;
499 for (int i = 0; i < line.length(); i++) {
501 if (m->control_pressed) { return lookup; }
503 //look for opening [ to indicate data is starting
504 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
505 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
508 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
509 else if ((line[i] == ']') && (inBrackets)) {
513 if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
514 else { m->mothurConvert(num, temp); }
515 nums.push_back(temp);
518 //save info to vectors
519 if (matrixFormat == "dense") {
522 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
524 //set abundances for this otu
525 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
526 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
531 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
533 //nums contains [otuNum, sampleNum, abundance]
534 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
540 if (line[i] == ',') {
542 m->mothurConvert(num, temp);
543 nums.push_back(temp);
545 }else { if (!isspace(line[i])) { num += line[i]; } }
553 for (int i = 0; i < lookup.size(); i++) {
554 //if this sharedrabund is not from a group the user wants then delete it.
555 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
557 delete lookup[i]; lookup[i] = NULL;
558 lookup.erase(lookup.begin()+i);
563 if (remove) { eliminateZeroOTUS(lookup); }
568 catch(exception& e) {
569 m->errorOut(e, "SharedCommand", "readData");
573 //**********************************************************************************************************************
574 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
577 vector<SharedRAbundVector*> newLookup;
578 for (int i = 0; i < thislookup.size(); i++) {
579 SharedRAbundVector* temp = new SharedRAbundVector();
580 temp->setLabel(thislookup[i]->getLabel());
581 temp->setGroup(thislookup[i]->getGroup());
582 newLookup.push_back(temp);
586 vector<string> newBinLabels;
587 string snumBins = toString(thislookup[0]->getNumBins());
588 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
589 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
591 //look at each sharedRabund and make sure they are not all zero
593 for (int j = 0; j < thislookup.size(); j++) {
594 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
597 //if they are not all zero add this bin
599 for (int j = 0; j < thislookup.size(); j++) {
600 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
603 //if there is a bin label use it otherwise make one
604 string binLabel = "Otu";
605 string sbinNumber = toString(i+1);
606 if (sbinNumber.length() < snumBins.length()) {
607 int diff = snumBins.length() - sbinNumber.length();
608 for (int h = 0; h < diff; h++) { binLabel += "0"; }
610 binLabel += sbinNumber;
611 if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
613 newBinLabels.push_back(binLabel);
617 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
619 thislookup = newLookup;
620 m->currentSharedBinLabels = newBinLabels;
625 catch(exception& e) {
626 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
630 //**********************************************************************************************************************
631 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
637 for (int i = 0; i < line.length(); i++) {
639 //you want to ignore any ; until you reach the next '
640 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
641 else if ((line[i] == ']') && (inBar)) {
643 m->mothurConvert(num, shapeNumCols);
648 if (line[i] == ',') {
649 m->mothurConvert(num, shapeNumRows);
651 }else { if (!isspace(line[i])) { num += line[i]; } }
657 catch(exception& e) {
658 m->errorOut(e, "SharedCommand", "getDims");
662 //**********************************************************************************************************************
663 vector<string> SharedCommand::readRows(string line, int& numRows) {
666 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
667 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
670 vector<string> names;
671 int countOpenBrace = 0;
672 int countClosedBrace = 0;
678 for (int i = 0; i < line.length(); i++) {
680 if (m->control_pressed) { return names; }
682 if (line[i] == '[') { countOpenBrace++; }
683 else if (line[i] == ']') { countClosedBrace++; }
684 else if (line[i] == '{') { openParen++; }
685 else if (line[i] == '}') { closeParen++; }
686 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
688 //you have reached the end of the rows info
689 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
690 if ((openParen == closeParen) && (closeParen != 0)) { //process row
692 vector<string> items;
693 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
694 string part = items[0]; items.clear();
695 m->splitAtChar(part, items, ':'); //split part we want containing the ids
696 string name = items[1];
698 //remove "" if needed
699 int pos = name.find("\"");
700 if (pos != string::npos) {
702 for (int k = 0; k < name.length(); k++) {
703 if (name[k] != '\"') { newName += name[k]; }
707 names.push_back(name);
716 catch(exception& e) {
717 m->errorOut(e, "SharedCommand", "readRows");
721 //**********************************************************************************************************************
722 //designed for things like "type": "OTU table", returns type
723 string SharedCommand::getTag(string& line) {
725 bool inQuotes = false;
729 for (int i = 0; i < line.length(); i++) {
731 //you want to ignore any ; until you reach the next '
732 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
733 else if ((line[i] == c) && (inQuotes)) {
735 line = line.substr(i+1);
739 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
744 catch(exception& e) {
745 m->errorOut(e, "SharedCommand", "getInfo");
749 //**********************************************************************************************************************
750 int SharedCommand::createSharedFromListGroup() {
753 GroupMap* groupMap = NULL;
754 CountTable* countTable = NULL;
755 if (groupfile != "") {
756 groupMap = new GroupMap(groupfile);
758 int groupError = groupMap->readMap();
759 if (groupError == 1) { delete groupMap; return 0; }
760 vector<string> allGroups = groupMap->getNamesOfGroups();
761 m->setAllGroups(allGroups);
763 countTable = new CountTable();
764 countTable->readTable(countfile, true, false);
767 if (m->control_pressed) { return 0; }
769 pickedGroups = false;
771 //if hte user has not specified any groups then use them all
772 if (Groups.size() == 0) {
773 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
774 else { Groups = countTable->getNamesOfGroups(); }
775 m->setGroups(Groups);
776 }else { pickedGroups = true; }
780 string filename = "";
782 string filename = listfile;
783 if (outputDir == "") { outputDir += m->hasPath(filename); }
785 map<string, string> variables;
786 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
787 filename = getOutputFileName("shared",variables);
788 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
789 m->openOutputFile(filename, out);
792 //fill filehandles with neccessary ofstreams
795 for (i=0; i<Groups.size(); i++) {
797 filehandles[Groups[i]] = temp;
801 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
802 map<string, string> variables;
803 variables["[filename]"] = fileroot;
804 //clears file before we start to write to it below
805 for (int i=0; i<Groups.size(); i++) {
806 variables["[group]"] = Groups[i];
807 string rabundFIleName = getOutputFileName("rabund",variables);
808 m->mothurRemove(rabundFIleName);
809 outputNames.push_back(rabundFIleName);
810 outputTypes["rabund"].push_back(rabundFIleName);
813 string errorOff = "no error";
815 //if user provided an order file containing the order the shared file should be in read it
816 //if (ordergroupfile != "") { readOrderFile(); }
818 InputData input(listfile, "shared");
819 SharedListVector* SharedList = input.getSharedListVector();
820 string lastLabel = SharedList->getLabel();
821 vector<SharedRAbundVector*> lookup;
823 if (m->control_pressed) {
824 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
825 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
826 out.close(); if (!pickedGroups) { m->mothurRemove(filename); }
827 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
828 string rabundFIleName = getOutputFileName("rabund",variables);
829 m->mothurRemove(rabundFIleName); }
834 vector<string> namesSeqs;
835 int numGroupNames = 0;
836 if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
837 else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
838 int error = ListGroupSameSeqs(namesSeqs, SharedList);
840 if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
841 m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
843 out.close(); if (!pickedGroups) { m->mothurRemove(filename); } //remove blank shared file you made
846 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
847 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
851 if (error == 1) { m->control_pressed = true; }
853 //if user has specified groups make new groupfile for them
854 if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
856 if (m->getNumGroups() < 4) {
857 for (int i = 0; i < m->getNumGroups()-1; i++) {
858 groups += (m->getGroups())[i] + ".";
860 groups+=(m->getGroups())[m->getNumGroups()-1];
861 }else { groups = "merge"; }
862 map<string, string> variables;
863 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
864 variables["[group]"] = groups;
865 string newGroupFile = getOutputFileName("group",variables);
866 outputTypes["group"].push_back(newGroupFile);
867 outputNames.push_back(newGroupFile);
869 m->openOutputFile(newGroupFile, outGroups);
871 vector<string> names = groupMap->getNamesSeqs();
873 for (int i = 0; i < names.size(); i++) {
874 groupName = groupMap->getGroup(names[i]);
875 if (isValidGroup(groupName, m->getGroups())) {
876 outGroups << names[i] << '\t' << groupName << endl;
882 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
883 set<string> processedLabels;
884 set<string> userLabels = labels;
886 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
887 if (m->control_pressed) {
888 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
889 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
890 if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
891 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
892 string rabundFIleName = getOutputFileName("rabund",variables);
893 m->mothurRemove(rabundFIleName); }
897 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
899 lookup = SharedList->getSharedRAbundVector();
901 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
903 if (m->control_pressed) {
904 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
905 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
906 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
907 if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
908 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
909 string rabundFIleName = getOutputFileName("rabund",variables);
910 m->mothurRemove(rabundFIleName); }
914 //if picked groups must split the shared file by label
916 string filename = listfile;
917 if (outputDir == "") { outputDir += m->hasPath(filename); }
919 map<string, string> variables;
920 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
921 variables["[distance]"] = lookup[0]->getLabel();
922 filename = getOutputFileName("shared",variables);
923 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
925 m->openOutputFile(filename, out2);
927 vector<string> savedLabels = m->currentSharedBinLabels;
928 eliminateZeroOTUS(lookup);
929 lookup[0]->printHeaders(out2);
930 printSharedData(lookup, out2);
932 m->currentSharedBinLabels = savedLabels; //restore old labels
935 if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
936 printSharedData(lookup, out); //prints info to the .shared file
938 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
940 processedLabels.insert(SharedList->getLabel());
941 userLabels.erase(SharedList->getLabel());
944 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
945 string saveLabel = SharedList->getLabel();
948 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
950 lookup = SharedList->getSharedRAbundVector();
951 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
953 if (m->control_pressed) {
954 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
955 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
956 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
957 if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
958 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
959 string rabundFIleName = getOutputFileName("rabund",variables);
960 m->mothurRemove(rabundFIleName); }
964 //if picked groups must split the shared file by label
966 string filename = listfile;
967 if (outputDir == "") { outputDir += m->hasPath(filename); }
969 map<string, string> variables;
970 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
971 variables["[distance]"] = lookup[0]->getLabel();
972 filename = getOutputFileName("shared",variables);
973 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
975 m->openOutputFile(filename, out2);
977 vector<string> savedLabels = m->currentSharedBinLabels;
978 eliminateZeroOTUS(lookup);
979 lookup[0]->printHeaders(out2);
980 printSharedData(lookup, out2);
982 m->currentSharedBinLabels = savedLabels; //restore old labels
985 if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
986 printSharedData(lookup, out); //prints info to the .shared file
989 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
991 processedLabels.insert(SharedList->getLabel());
992 userLabels.erase(SharedList->getLabel());
994 //restore real lastlabel to save below
995 SharedList->setLabel(saveLabel);
999 lastLabel = SharedList->getLabel();
1002 SharedList = input.getSharedListVector(); //get new list vector to process
1005 //output error messages about any remaining user labels
1006 set<string>::iterator it;
1007 bool needToRun = false;
1008 for (it = userLabels.begin(); it != userLabels.end(); it++) {
1009 if (processedLabels.count(lastLabel) != 1) {
1014 //run last label if you need to
1015 if (needToRun == true) {
1016 if (SharedList != NULL) { delete SharedList; }
1017 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
1019 lookup = SharedList->getSharedRAbundVector();
1020 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
1022 if (m->control_pressed) {
1023 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
1024 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
1025 if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
1026 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
1027 string rabundFIleName = getOutputFileName("rabund",variables);
1028 m->mothurRemove(rabundFIleName); }
1032 //if picked groups must split the shared file by label
1034 string filename = listfile;
1035 if (outputDir == "") { outputDir += m->hasPath(filename); }
1037 map<string, string> variables;
1038 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
1039 variables["[distance]"] = lookup[0]->getLabel();
1040 filename = getOutputFileName("shared",variables);
1041 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
1043 m->openOutputFile(filename, out2);
1045 vector<string> savedLabels = m->currentSharedBinLabels;
1046 eliminateZeroOTUS(lookup);
1047 lookup[0]->printHeaders(out2);
1048 printSharedData(lookup, out2);
1050 m->currentSharedBinLabels = savedLabels; //restore old labels
1053 if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
1054 printSharedData(lookup, out); //prints info to the .shared file
1056 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
1060 if (!pickedGroups) { out.close(); }
1062 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1066 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
1068 if (m->control_pressed) {
1069 if (!pickedGroups) { m->mothurRemove(filename); }
1070 for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
1071 string rabundFIleName = getOutputFileName("rabund",variables);
1072 m->mothurRemove(rabundFIleName); }
1078 catch(exception& e) {
1079 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
1083 //**********************************************************************************************************************
1084 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
1087 if (order.size() == 0) { //user has not specified an order so do aplabetically
1088 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
1091 vector<string> Groups;
1093 //initialize bin values
1094 for (int i = 0; i < thislookup.size(); i++) {
1095 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
1096 thislookup[i]->print(out);
1098 Groups.push_back(thislookup[i]->getGroup());
1100 RAbundVector rav = thislookup[i]->getRAbundVector();
1101 map<string, string> variables;
1102 variables["[filename]"] = fileroot;
1103 variables["[group]"] = thislookup[i]->getGroup();
1104 m->openOutputFileAppend(getOutputFileName("rabund",variables), *(filehandles[thislookup[i]->getGroup()]));
1105 rav.print(*(filehandles[thislookup[i]->getGroup()]));
1106 (*(filehandles[thislookup[i]->getGroup()])).close();
1108 m->setGroups(Groups);
1110 //create a map from groupName to each sharedrabund
1111 map<string, SharedRAbundVector*> myMap;
1112 map<string, SharedRAbundVector*>::iterator myIt;
1114 for (int i = 0; i < thislookup.size(); i++) {
1115 myMap[thislookup[i]->getGroup()] = thislookup[i];
1119 vector<string> Groups;
1121 //loop through ordered list and print the rabund
1122 for (int i = 0; i < order.size(); i++) {
1123 myIt = myMap.find(order[i]);
1125 if(myIt != myMap.end()) { //we found it
1126 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1127 (myIt->second)->print(out);
1129 Groups.push_back((myIt->second)->getGroup());
1131 RAbundVector rav = (myIt->second)->getRAbundVector();
1132 map<string, string> variables;
1133 variables["[filename]"] = fileroot;
1134 variables["[group]"] = (myIt->second)->getGroup();
1135 m->openOutputFileAppend(getOutputFileName("rabund",variables), *(filehandles[(myIt->second)->getGroup()]));
1136 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1137 (*(filehandles[(myIt->second)->getGroup()])).close();
1139 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1143 m->setGroups(Groups);
1148 catch(exception& e) {
1149 m->errorOut(e, "SharedCommand", "printSharedData");
1153 //**********************************************************************************************************************
1154 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1158 set<string> groupNamesSeqs;
1159 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1160 groupNamesSeqs.insert(groupMapsSeqs[i]);
1163 //go through list and if group returns "not found" output it
1164 for (int i = 0; i < SharedList->getNumBins(); i++) {
1165 if (m->control_pressed) { return 0; }
1167 string names = SharedList->get(i);
1169 vector<string> listNames;
1170 m->splitAtComma(names, listNames);
1172 for (int j = 0; j < listNames.size(); j++) {
1173 int num = groupNamesSeqs.count(listNames[j]);
1177 if (groupfile != "") {
1178 m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1179 else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct."); m->mothurOutEndLine(); }
1180 }else { groupNamesSeqs.erase(listNames[j]); }
1184 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1186 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1191 catch(exception& e) {
1192 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1196 //**********************************************************************************************************************
1198 SharedCommand::~SharedCommand(){
1203 //**********************************************************************************************************************
1204 int SharedCommand::readOrderFile() {
1210 m->openInputFile(ordergroupfile, in);
1214 in >> thisGroup; m->gobble(in);
1216 order.push_back(thisGroup);
1218 if (m->control_pressed) { order.clear(); break; }
1224 catch(exception& e) {
1225 m->errorOut(e, "SharedCommand", "readOrderFile");
1229 //**********************************************************************************************************************
1231 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1233 for (int i = 0; i < groups.size(); i++) {
1234 if (groupname == groups[i]) { return true; }
1239 catch(exception& e) {
1240 m->errorOut(e, "SharedCommand", "isValidGroup");
1244 /************************************************************/