5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
12 #include "counttable.h"
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
17 return (left->getGroup() < right->getGroup());
19 //**********************************************************************************************************************
20 vector<string> SharedCommand::setParameters(){
22 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
23 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
24 CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
25 CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
26 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
27 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "SharedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string SharedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
46 helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
47 helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
48 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
49 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
50 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
54 m->errorOut(e, "SharedCommand", "getHelpString");
58 //**********************************************************************************************************************
59 string SharedCommand::getOutputFileNameTag(string type, string inputName=""){
61 string outputFileName = "";
62 map<string, vector<string> >::iterator it;
64 //is this a type this command creates
65 it = outputTypes.find(type);
66 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
68 if (type == "shared") { outputFileName = "shared"; }
69 else if (type == "rabund") { outputFileName = "rabund"; }
70 else if (type == "group") { outputFileName = "groups"; }
71 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
73 return outputFileName;
76 m->errorOut(e, "SharedCommand", "getOutputFileNameTag");
81 //**********************************************************************************************************************
82 SharedCommand::SharedCommand(){
84 abort = true; calledHelp = true;
86 //initialize outputTypes
87 vector<string> tempOutNames;
88 outputTypes["rabund"] = tempOutNames;
89 outputTypes["shared"] = tempOutNames;
90 outputTypes["group"] = tempOutNames;
93 m->errorOut(e, "SharedCommand", "SharedCommand");
97 //**********************************************************************************************************************
98 SharedCommand::SharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109 vector<string> myArray = setParameters();
111 OptionParser parser(option);
112 map<string, string> parameters = parser.getParameters();
114 ValidParameters validParameter;
115 map<string, string>::iterator it;
117 //check to make sure all parameters are valid for command
118 for (it = parameters.begin(); it != parameters.end(); it++) {
119 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
122 //if the user changes the input directory command factory will send this info to us in the output parameter
123 string inputDir = validParameter.validFile(parameters, "inputdir", false);
124 if (inputDir == "not found"){ inputDir = ""; }
127 it = parameters.find("list");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["list"] = inputDir + it->second; }
135 it = parameters.find("group");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["group"] = inputDir + it->second; }
143 it = parameters.find("count");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["count"] = inputDir + it->second; }
151 it = parameters.find("biom");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["biom"] = inputDir + it->second; }
160 vector<string> tempOutNames;
161 outputTypes["rabund"] = tempOutNames;
162 outputTypes["shared"] = tempOutNames;
163 outputTypes["group"] = tempOutNames;
165 //if the user changes the output directory command factory will send this info to us in the output parameter
166 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
168 //check for required parameters
169 listfile = validParameter.validFile(parameters, "list", true);
170 if (listfile == "not open") { listfile = ""; abort = true; }
171 else if (listfile == "not found") { listfile = ""; }
172 else { m->setListFile(listfile); }
174 biomfile = validParameter.validFile(parameters, "biom", true);
175 if (biomfile == "not open") { biomfile = ""; abort = true; }
176 else if (biomfile == "not found") { biomfile = ""; }
177 else { m->setBiomFile(biomfile); }
179 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
180 if (ordergroupfile == "not open") { abort = true; }
181 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
183 groupfile = validParameter.validFile(parameters, "group", true);
184 if (groupfile == "not open") { groupfile = ""; abort = true; }
185 else if (groupfile == "not found") { groupfile = ""; }
186 else { m->setGroupFile(groupfile); }
188 countfile = validParameter.validFile(parameters, "count", true);
189 if (countfile == "not open") { countfile = ""; abort = true; }
190 else if (countfile == "not found") { countfile = ""; }
191 else { m->setCountTableFile(countfile); }
193 if ((biomfile == "") && (listfile == "")) {
194 //is there are current file available for either of these?
195 //give priority to list, then biom
196 listfile = m->getListFile();
197 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
199 biomfile = m->getBiomFile();
200 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
202 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
207 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
209 if (listfile != "") {
210 if ((groupfile == "") && (countfile == "")) {
211 groupfile = m->getGroupFile();
212 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
214 countfile = m->getCountTableFile();
215 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
217 m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
225 string groups = validParameter.validFile(parameters, "groups", false);
226 if (groups == "not found") { groups = ""; }
228 m->splitAtDash(groups, Groups);
229 m->setGroups(Groups);
232 //check for optional parameter and set defaults
233 // ...at some point should added some additional type checking...
234 string label = validParameter.validFile(parameters, "label", false);
235 if (label == "not found") { label = ""; }
237 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
238 else { allLines = 1; }
243 catch(exception& e) {
244 m->errorOut(e, "SharedCommand", "SharedCommand");
248 //**********************************************************************************************************************
250 int SharedCommand::execute(){
253 if (abort == true) { if (calledHelp) { return 0; } return 2; }
255 //getting output filename
256 string filename = "";
257 if (listfile != "") { filename = listfile; }
258 else { filename = biomfile; }
260 if (outputDir == "") { outputDir += m->hasPath(filename); }
262 filename = outputDir + m->getRootName(m->getSimpleName(filename));
263 filename = filename + getOutputFileNameTag("shared");
264 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
266 if (listfile != "") { createSharedFromListGroup(filename); }
267 else { createSharedFromBiom(filename); }
269 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
271 //set rabund file as new current rabundfile
273 itTypes = outputTypes.find("rabund");
274 if (itTypes != outputTypes.end()) {
275 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
278 itTypes = outputTypes.find("shared");
279 if (itTypes != outputTypes.end()) {
280 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
283 itTypes = outputTypes.find("group");
284 if (itTypes != outputTypes.end()) {
285 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
288 m->mothurOutEndLine();
289 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
290 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
291 m->mothurOutEndLine();
295 catch(exception& e) {
296 m->errorOut(e, "SharedCommand", "execute");
300 //**********************************************************************************************************************
301 int SharedCommand::createSharedFromBiom(string filename) {
304 m->openOutputFile(filename, out);
307 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
308 "format": "Biological Observation Matrix 0.9.1",
309 "format_url": "http://biom-format.org",
311 "generated_by": "mothur1.24.0",
312 "date": "Tue Apr 17 13:12:07 2012", */
315 m->openInputFile(biomfile, in);
317 string matrixFormat = "";
320 int shapeNumRows = 0;
321 int shapeNumCols = 0;
322 vector<string> otuNames;
323 vector<string> groupNames;
324 map<string, string> fileLines;
325 vector<string> names;
326 int countOpenBrace = 0;
327 int countClosedBrace = 0;
328 int openParen = -1; //account for opening brace
330 bool ignoreCommas = false;
331 bool atComma = false;
333 string matrixElementType = "";
335 while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
336 if (m->control_pressed) { break; }
338 char c = in.get(); m->gobble(in);
340 if (c == '[') { countOpenBrace++; }
341 else if (c == ']') { countClosedBrace++; }
342 else if (c == '{') { openParen++; }
343 else if (c == '}') { closeParen++; }
344 else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
346 if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
347 else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
348 if (atComma && !ignoreCommas) {
349 if (fileLines.size() == 0) { //clip first {
350 line = line.substr(1);
352 string tag = getTag(line);
353 fileLines[tag] = line;
356 ignoreCommas = false;
362 line = line.substr(0, line.length()-1);
363 string tag = getTag(line);
364 fileLines[tag] = line;
368 map<string, string>::iterator it;
369 it = fileLines.find("type");
370 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
372 string thisLine = it->second;
373 string type = getTag(thisLine);
374 if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
377 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
379 it = fileLines.find("matrix_type");
380 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
382 string thisLine = it->second;
383 matrixFormat = getTag(thisLine);
384 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
387 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
389 it = fileLines.find("matrix_element_type");
390 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
392 string thisLine = it->second;
393 matrixElementType = getTag(thisLine);
394 if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
395 if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
398 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
400 it = fileLines.find("rows");
401 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
403 string thisLine = it->second;
404 otuNames = readRows(thisLine, numRows);
407 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
409 it = fileLines.find("columns");
410 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
412 string thisLine = it->second;
414 groupNames = readRows(thisLine, numCols);
416 //if users selected groups, then remove the groups not wanted.
418 vector<string> Groups = m->getGroups();
419 vector<string> allGroups = groupNames;
420 util.setGroups(Groups, allGroups);
421 m->setGroups(Groups);
423 //fill filehandles with neccessary ofstreams
426 for (i=0; i<Groups.size(); i++) {
428 filehandles[Groups[i]] = temp;
432 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
434 //clears file before we start to write to it below
435 for (int i=0; i<Groups.size(); i++) {
436 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
437 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
438 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
442 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
444 it = fileLines.find("shape");
445 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
447 string thisLine = it->second;
448 getDims(thisLine, shapeNumRows, shapeNumCols);
451 if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
453 if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
456 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
458 it = fileLines.find("data");
459 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
461 string thisLine = it->second;
462 m->currentBinLabels = otuNames;
465 vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
467 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
468 lookup[0]->printHeaders(out);
469 printSharedData(lookup, out);
472 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
475 if (m->control_pressed) { m->mothurRemove(filename); return 0; }
479 catch(exception& e) {
480 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
484 //**********************************************************************************************************************
485 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
488 vector<SharedRAbundVector*> lookup;
490 //creates new sharedRAbunds
491 for (int i = 0; i < groupNames.size(); i++) {
492 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
493 temp->setLabel("dummy");
494 temp->setGroup(groupNames[i]);
495 lookup.push_back(temp);
498 bool dataStart = false;
499 bool inBrackets = false;
503 for (int i = 0; i < line.length(); i++) {
505 if (m->control_pressed) { return lookup; }
507 //look for opening [ to indicate data is starting
508 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
509 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
512 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
513 else if ((line[i] == ']') && (inBrackets)) {
517 if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
518 else { m->mothurConvert(num, temp); }
519 nums.push_back(temp);
522 //save info to vectors
523 if (matrixFormat == "dense") {
526 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
528 //set abundances for this otu
529 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
530 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
535 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
537 //nums contains [otuNum, sampleNum, abundance]
538 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
544 if (line[i] == ',') {
546 m->mothurConvert(num, temp);
547 nums.push_back(temp);
549 }else { if (!isspace(line[i])) { num += line[i]; } }
557 for (int i = 0; i < lookup.size(); i++) {
558 //if this sharedrabund is not from a group the user wants then delete it.
559 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
561 delete lookup[i]; lookup[i] = NULL;
562 lookup.erase(lookup.begin()+i);
567 if (remove) { eliminateZeroOTUS(lookup); }
572 catch(exception& e) {
573 m->errorOut(e, "SharedCommand", "readData");
577 //**********************************************************************************************************************
578 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
581 vector<SharedRAbundVector*> newLookup;
582 for (int i = 0; i < thislookup.size(); i++) {
583 SharedRAbundVector* temp = new SharedRAbundVector();
584 temp->setLabel(thislookup[i]->getLabel());
585 temp->setGroup(thislookup[i]->getGroup());
586 newLookup.push_back(temp);
590 vector<string> newBinLabels;
591 string snumBins = toString(thislookup[0]->getNumBins());
592 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
593 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
595 //look at each sharedRabund and make sure they are not all zero
597 for (int j = 0; j < thislookup.size(); j++) {
598 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
601 //if they are not all zero add this bin
603 for (int j = 0; j < thislookup.size(); j++) {
604 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
607 //if there is a bin label use it otherwise make one
608 string binLabel = "Otu";
609 string sbinNumber = toString(i+1);
610 if (sbinNumber.length() < snumBins.length()) {
611 int diff = snumBins.length() - sbinNumber.length();
612 for (int h = 0; h < diff; h++) { binLabel += "0"; }
614 binLabel += sbinNumber;
615 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
617 newBinLabels.push_back(binLabel);
621 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
623 thislookup = newLookup;
624 m->currentBinLabels = newBinLabels;
629 catch(exception& e) {
630 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
634 //**********************************************************************************************************************
635 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
641 for (int i = 0; i < line.length(); i++) {
643 //you want to ignore any ; until you reach the next '
644 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
645 else if ((line[i] == ']') && (inBar)) {
647 m->mothurConvert(num, shapeNumCols);
652 if (line[i] == ',') {
653 m->mothurConvert(num, shapeNumRows);
655 }else { if (!isspace(line[i])) { num += line[i]; } }
661 catch(exception& e) {
662 m->errorOut(e, "SharedCommand", "getDims");
666 //**********************************************************************************************************************
667 vector<string> SharedCommand::readRows(string line, int& numRows) {
670 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
671 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
674 vector<string> names;
675 int countOpenBrace = 0;
676 int countClosedBrace = 0;
682 for (int i = 0; i < line.length(); i++) {
684 if (m->control_pressed) { return names; }
686 if (line[i] == '[') { countOpenBrace++; }
687 else if (line[i] == ']') { countClosedBrace++; }
688 else if (line[i] == '{') { openParen++; }
689 else if (line[i] == '}') { closeParen++; }
690 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
692 //you have reached the end of the rows info
693 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
694 if ((openParen == closeParen) && (closeParen != 0)) { //process row
696 vector<string> items;
697 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
698 string part = items[0]; items.clear();
699 m->splitAtChar(part, items, ':'); //split part we want containing the ids
700 string name = items[1];
702 //remove "" if needed
703 int pos = name.find("\"");
704 if (pos != string::npos) {
706 for (int k = 0; k < name.length(); k++) {
707 if (name[k] != '\"') { newName += name[k]; }
711 names.push_back(name);
720 catch(exception& e) {
721 m->errorOut(e, "SharedCommand", "readRows");
725 //**********************************************************************************************************************
726 //designed for things like "type": "OTU table", returns type
727 string SharedCommand::getTag(string& line) {
729 bool inQuotes = false;
733 for (int i = 0; i < line.length(); i++) {
735 //you want to ignore any ; until you reach the next '
736 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
737 else if ((line[i] == c) && (inQuotes)) {
739 line = line.substr(i+1);
743 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
748 catch(exception& e) {
749 m->errorOut(e, "SharedCommand", "getInfo");
753 //**********************************************************************************************************************
754 int SharedCommand::createSharedFromListGroup(string filename) {
757 m->openOutputFile(filename, out);
759 GroupMap* groupMap = NULL;
760 CountTable* countTable = NULL;
761 if (groupfile != "") {
762 groupMap = new GroupMap(groupfile);
764 int groupError = groupMap->readMap();
765 if (groupError == 1) { delete groupMap; return 0; }
766 vector<string> allGroups = groupMap->getNamesOfGroups();
767 m->setAllGroups(allGroups);
769 countTable = new CountTable();
770 countTable->readTable(countfile);
773 if (m->control_pressed) { return 0; }
775 pickedGroups = false;
777 //if hte user has not specified any groups then use them all
778 if (Groups.size() == 0) {
779 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
780 else { Groups = countTable->getNamesOfGroups(); }
781 m->setGroups(Groups);
782 }else { pickedGroups = true; }
784 //fill filehandles with neccessary ofstreams
787 for (i=0; i<Groups.size(); i++) {
789 filehandles[Groups[i]] = temp;
793 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
795 //clears file before we start to write to it below
796 for (int i=0; i<Groups.size(); i++) {
797 m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
798 outputNames.push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
799 outputTypes["rabund"].push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
802 string errorOff = "no error";
804 //if user provided an order file containing the order the shared file should be in read it
805 //if (ordergroupfile != "") { readOrderFile(); }
807 InputData input(listfile, "shared");
808 SharedListVector* SharedList = input.getSharedListVector();
809 string lastLabel = SharedList->getLabel();
810 vector<SharedRAbundVector*> lookup;
812 if (m->control_pressed) {
813 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
814 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
815 out.close(); m->mothurRemove(filename);
816 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
821 vector<string> namesSeqs;
822 int numGroupNames = 0;
823 if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
824 else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
825 int error = ListGroupSameSeqs(namesSeqs, SharedList);
827 if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
828 m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
830 out.close(); m->mothurRemove(filename); //remove blank shared file you made
833 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
834 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
838 if (error == 1) { m->control_pressed = true; }
840 //if user has specified groups make new groupfile for them
841 if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
843 if (m->getNumGroups() < 4) {
844 for (int i = 0; i < m->getNumGroups(); i++) {
845 groups += (m->getGroups())[i] + ".";
847 }else { groups = "merge"; }
849 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + getOutputFileNameTag("group");
850 outputTypes["group"].push_back(newGroupFile);
851 outputNames.push_back(newGroupFile);
853 m->openOutputFile(newGroupFile, outGroups);
855 vector<string> names = groupMap->getNamesSeqs();
857 for (int i = 0; i < names.size(); i++) {
858 groupName = groupMap->getGroup(names[i]);
859 if (isValidGroup(groupName, m->getGroups())) {
860 outGroups << names[i] << '\t' << groupName << endl;
866 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
867 set<string> processedLabels;
868 set<string> userLabels = labels;
870 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
871 if (m->control_pressed) {
872 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
873 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
874 out.close(); m->mothurRemove(filename);
875 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
879 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
881 lookup = SharedList->getSharedRAbundVector();
883 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
884 if (pickedGroups) { //check for otus with no seqs in them
885 eliminateZeroOTUS(lookup);
888 if (m->control_pressed) {
889 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
890 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
891 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
892 out.close(); m->mothurRemove(filename);
893 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
897 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
898 printSharedData(lookup, out); //prints info to the .shared file
899 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
901 processedLabels.insert(SharedList->getLabel());
902 userLabels.erase(SharedList->getLabel());
905 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
906 string saveLabel = SharedList->getLabel();
909 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
911 lookup = SharedList->getSharedRAbundVector();
912 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
913 if (pickedGroups) { //check for otus with no seqs in them
914 eliminateZeroOTUS(lookup);
918 if (m->control_pressed) {
919 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
920 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
921 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
922 out.close(); m->mothurRemove(filename);
923 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
927 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
928 printSharedData(lookup, out); //prints info to the .shared file
929 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
931 processedLabels.insert(SharedList->getLabel());
932 userLabels.erase(SharedList->getLabel());
934 //restore real lastlabel to save below
935 SharedList->setLabel(saveLabel);
939 lastLabel = SharedList->getLabel();
942 SharedList = input.getSharedListVector(); //get new list vector to process
945 //output error messages about any remaining user labels
946 set<string>::iterator it;
947 bool needToRun = false;
948 for (it = userLabels.begin(); it != userLabels.end(); it++) {
949 if (processedLabels.count(lastLabel) != 1) {
954 //run last label if you need to
955 if (needToRun == true) {
956 if (SharedList != NULL) { delete SharedList; }
957 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
959 lookup = SharedList->getSharedRAbundVector();
960 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
961 if (pickedGroups) { //check for otus with no seqs in them
962 eliminateZeroOTUS(lookup);
965 if (m->control_pressed) {
966 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
967 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
968 out.close(); m->mothurRemove(filename);
969 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
973 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
974 printSharedData(lookup, out); //prints info to the .shared file
975 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
981 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
985 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
987 if (m->control_pressed) {
988 m->mothurRemove(filename);
989 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
995 catch(exception& e) {
996 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
1000 //**********************************************************************************************************************
1001 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
1004 if (order.size() == 0) { //user has not specified an order so do aplabetically
1005 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
1008 vector<string> Groups;
1010 //initialize bin values
1011 for (int i = 0; i < thislookup.size(); i++) {
1012 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
1013 thislookup[i]->print(out);
1015 Groups.push_back(thislookup[i]->getGroup());
1017 RAbundVector rav = thislookup[i]->getRAbundVector();
1018 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[thislookup[i]->getGroup()]));
1019 rav.print(*(filehandles[thislookup[i]->getGroup()]));
1020 (*(filehandles[thislookup[i]->getGroup()])).close();
1022 m->setGroups(Groups);
1024 //create a map from groupName to each sharedrabund
1025 map<string, SharedRAbundVector*> myMap;
1026 map<string, SharedRAbundVector*>::iterator myIt;
1028 for (int i = 0; i < thislookup.size(); i++) {
1029 myMap[thislookup[i]->getGroup()] = thislookup[i];
1033 vector<string> Groups;
1035 //loop through ordered list and print the rabund
1036 for (int i = 0; i < order.size(); i++) {
1037 myIt = myMap.find(order[i]);
1039 if(myIt != myMap.end()) { //we found it
1040 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1041 (myIt->second)->print(out);
1043 Groups.push_back((myIt->second)->getGroup());
1045 RAbundVector rav = (myIt->second)->getRAbundVector();
1046 m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[(myIt->second)->getGroup()]));
1047 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1048 (*(filehandles[(myIt->second)->getGroup()])).close();
1050 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1054 m->setGroups(Groups);
1059 catch(exception& e) {
1060 m->errorOut(e, "SharedCommand", "printSharedData");
1064 //**********************************************************************************************************************
1065 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1069 set<string> groupNamesSeqs;
1070 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1071 groupNamesSeqs.insert(groupMapsSeqs[i]);
1074 //go through list and if group returns "not found" output it
1075 for (int i = 0; i < SharedList->getNumBins(); i++) {
1076 if (m->control_pressed) { return 0; }
1078 string names = SharedList->get(i);
1080 vector<string> listNames;
1081 m->splitAtComma(names, listNames);
1083 for (int j = 0; j < listNames.size(); j++) {
1084 int num = groupNamesSeqs.count(listNames[j]);
1086 if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1087 else { groupNamesSeqs.erase(listNames[j]); }
1091 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1093 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1098 catch(exception& e) {
1099 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1103 //**********************************************************************************************************************
1105 SharedCommand::~SharedCommand(){
1110 //**********************************************************************************************************************
1111 int SharedCommand::readOrderFile() {
1117 m->openInputFile(ordergroupfile, in);
1121 in >> thisGroup; m->gobble(in);
1123 order.push_back(thisGroup);
1125 if (m->control_pressed) { order.clear(); break; }
1131 catch(exception& e) {
1132 m->errorOut(e, "SharedCommand", "readOrderFile");
1136 //**********************************************************************************************************************
1138 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1140 for (int i = 0; i < groups.size(); i++) {
1141 if (groupname == groups[i]) { return true; }
1146 catch(exception& e) {
1147 m->errorOut(e, "SharedCommand", "isValidGroup");
1151 /************************************************************/