5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
12 #include "counttable.h"
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
17 return (left->getGroup() < right->getGroup());
19 //**********************************************************************************************************************
20 vector<string> SharedCommand::setParameters(){
22 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
23 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
24 CommandParameter pcount("count", "InputTypes", "", "", "", "GroupCount", "",false,false); parameters.push_back(pcount);
25 CommandParameter pgroup("group", "InputTypes", "", "", "none", "GroupCount", "ListGroup",false,false); parameters.push_back(pgroup);
26 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
27 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "SharedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string SharedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
46 helpString += "The make.shared command parameters are list, group, biom, groups, count and label. list and group or count are required unless a current file is available or you provide a biom file.\n";
47 helpString += "The count parameter allows you to provide a count file containing the group info for the list file.\n";
48 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
49 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
50 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
54 m->errorOut(e, "SharedCommand", "getHelpString");
58 //**********************************************************************************************************************
59 string SharedCommand::getOutputFileNameTag(string type, string inputName=""){
61 string outputFileName = "";
62 map<string, vector<string> >::iterator it;
64 //is this a type this command creates
65 it = outputTypes.find(type);
66 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
68 if (type == "shared") { outputFileName = "shared"; }
69 else if (type == "rabund") { outputFileName = "rabund"; }
70 else if (type == "group") { outputFileName = "groups"; }
71 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
73 return outputFileName;
76 m->errorOut(e, "SharedCommand", "getOutputFileNameTag");
81 //**********************************************************************************************************************
82 SharedCommand::SharedCommand(){
84 abort = true; calledHelp = true;
86 //initialize outputTypes
87 vector<string> tempOutNames;
88 outputTypes["rabund"] = tempOutNames;
89 outputTypes["shared"] = tempOutNames;
90 outputTypes["group"] = tempOutNames;
93 m->errorOut(e, "SharedCommand", "SharedCommand");
97 //**********************************************************************************************************************
98 SharedCommand::SharedCommand(string option) {
100 abort = false; calledHelp = false;
103 //allow user to run help
104 if(option == "help") { help(); abort = true; calledHelp = true; }
105 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109 vector<string> myArray = setParameters();
111 OptionParser parser(option);
112 map<string, string> parameters = parser.getParameters();
114 ValidParameters validParameter;
115 map<string, string>::iterator it;
117 //check to make sure all parameters are valid for command
118 for (it = parameters.begin(); it != parameters.end(); it++) {
119 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
122 //if the user changes the input directory command factory will send this info to us in the output parameter
123 string inputDir = validParameter.validFile(parameters, "inputdir", false);
124 if (inputDir == "not found"){ inputDir = ""; }
127 it = parameters.find("list");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["list"] = inputDir + it->second; }
135 it = parameters.find("group");
136 //user has given a template file
137 if(it != parameters.end()){
138 path = m->hasPath(it->second);
139 //if the user has not given a path then, add inputdir. else leave path alone.
140 if (path == "") { parameters["group"] = inputDir + it->second; }
143 it = parameters.find("count");
144 //user has given a template file
145 if(it != parameters.end()){
146 path = m->hasPath(it->second);
147 //if the user has not given a path then, add inputdir. else leave path alone.
148 if (path == "") { parameters["count"] = inputDir + it->second; }
151 it = parameters.find("biom");
152 //user has given a template file
153 if(it != parameters.end()){
154 path = m->hasPath(it->second);
155 //if the user has not given a path then, add inputdir. else leave path alone.
156 if (path == "") { parameters["biom"] = inputDir + it->second; }
160 vector<string> tempOutNames;
161 outputTypes["rabund"] = tempOutNames;
162 outputTypes["shared"] = tempOutNames;
163 outputTypes["group"] = tempOutNames;
165 //if the user changes the output directory command factory will send this info to us in the output parameter
166 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
168 //check for required parameters
169 listfile = validParameter.validFile(parameters, "list", true);
170 if (listfile == "not open") { listfile = ""; abort = true; }
171 else if (listfile == "not found") { listfile = ""; }
172 else { m->setListFile(listfile); }
174 biomfile = validParameter.validFile(parameters, "biom", true);
175 if (biomfile == "not open") { biomfile = ""; abort = true; }
176 else if (biomfile == "not found") { biomfile = ""; }
177 else { m->setBiomFile(biomfile); }
179 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
180 if (ordergroupfile == "not open") { abort = true; }
181 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
183 groupfile = validParameter.validFile(parameters, "group", true);
184 if (groupfile == "not open") { groupfile = ""; abort = true; }
185 else if (groupfile == "not found") { groupfile = ""; }
186 else { m->setGroupFile(groupfile); }
188 countfile = validParameter.validFile(parameters, "count", true);
189 if (countfile == "not open") { countfile = ""; abort = true; }
190 else if (countfile == "not found") { countfile = ""; }
192 m->setCountTableFile(countfile);
194 if (!temp.testGroups(countfile)) { m->mothurOut("[ERROR]: Your count file does not have group info, aborting."); m->mothurOutEndLine(); abort=true; }
197 if ((biomfile == "") && (listfile == "")) {
198 //is there are current file available for either of these?
199 //give priority to list, then biom
200 listfile = m->getListFile();
201 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
203 biomfile = m->getBiomFile();
204 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
206 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
211 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
213 if (listfile != "") {
214 if ((groupfile == "") && (countfile == "")) {
215 groupfile = m->getGroupFile();
216 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
218 countfile = m->getCountTableFile();
219 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
221 m->mothurOut("You need to provide a groupfile or countfile if you are going to use the list format."); m->mothurOutEndLine();
229 string groups = validParameter.validFile(parameters, "groups", false);
230 if (groups == "not found") { groups = ""; }
232 m->splitAtDash(groups, Groups);
233 m->setGroups(Groups);
236 //check for optional parameter and set defaults
237 // ...at some point should added some additional type checking...
238 string label = validParameter.validFile(parameters, "label", false);
239 if (label == "not found") { label = ""; }
241 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
242 else { allLines = 1; }
247 catch(exception& e) {
248 m->errorOut(e, "SharedCommand", "SharedCommand");
252 //**********************************************************************************************************************
254 int SharedCommand::execute(){
257 if (abort == true) { if (calledHelp) { return 0; } return 2; }
259 //getting output filename
260 string filename = "";
261 if (listfile != "") { filename = listfile; }
262 else { filename = biomfile; }
264 if (outputDir == "") { outputDir += m->hasPath(filename); }
266 filename = outputDir + m->getRootName(m->getSimpleName(filename));
267 filename = filename + getOutputFileNameTag("shared");
268 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
270 if (listfile != "") { createSharedFromListGroup(filename); }
271 else { createSharedFromBiom(filename); }
273 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
275 //set rabund file as new current rabundfile
277 itTypes = outputTypes.find("rabund");
278 if (itTypes != outputTypes.end()) {
279 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
282 itTypes = outputTypes.find("shared");
283 if (itTypes != outputTypes.end()) {
284 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
287 itTypes = outputTypes.find("group");
288 if (itTypes != outputTypes.end()) {
289 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
292 m->mothurOutEndLine();
293 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
294 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
295 m->mothurOutEndLine();
299 catch(exception& e) {
300 m->errorOut(e, "SharedCommand", "execute");
304 //**********************************************************************************************************************
305 int SharedCommand::createSharedFromBiom(string filename) {
308 m->openOutputFile(filename, out);
311 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
312 "format": "Biological Observation Matrix 0.9.1",
313 "format_url": "http://biom-format.org",
315 "generated_by": "mothur1.24.0",
316 "date": "Tue Apr 17 13:12:07 2012", */
319 m->openInputFile(biomfile, in);
321 string matrixFormat = "";
324 int shapeNumRows = 0;
325 int shapeNumCols = 0;
326 vector<string> otuNames;
327 vector<string> groupNames;
328 map<string, string> fileLines;
329 vector<string> names;
330 int countOpenBrace = 0;
331 int countClosedBrace = 0;
332 int openParen = -1; //account for opening brace
334 bool ignoreCommas = false;
335 bool atComma = false;
337 string matrixElementType = "";
339 while (!in.eof()) { //split file by tags, so each "line" will have something like "id":"/Users/SarahsWork/Desktop/release/final.tx.1.subsample.1.pick.shared-1"
340 if (m->control_pressed) { break; }
342 char c = in.get(); m->gobble(in);
344 if (c == '[') { countOpenBrace++; }
345 else if (c == ']') { countClosedBrace++; }
346 else if (c == '{') { openParen++; }
347 else if (c == '}') { closeParen++; }
348 else if ((!ignoreCommas) && (c == ',')) { atComma = true; }
350 if ((countOpenBrace != countClosedBrace) && (countOpenBrace != countClosedBrace)) { ignoreCommas = true; }
351 else if ((countOpenBrace == countClosedBrace) && (countOpenBrace == countClosedBrace)) { ignoreCommas = false; }
352 if (atComma && !ignoreCommas) {
353 if (fileLines.size() == 0) { //clip first {
354 line = line.substr(1);
356 string tag = getTag(line);
357 fileLines[tag] = line;
360 ignoreCommas = false;
366 line = line.substr(0, line.length()-1);
367 string tag = getTag(line);
368 fileLines[tag] = line;
372 map<string, string>::iterator it;
373 it = fileLines.find("type");
374 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a type provided.\n"); }
376 string thisLine = it->second;
377 string type = getTag(thisLine);
378 if ((type != "OTU table") && (type != "OTUtable")) { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
381 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
383 it = fileLines.find("matrix_type");
384 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_type provided.\n"); }
386 string thisLine = it->second;
387 matrixFormat = getTag(thisLine);
388 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
391 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
393 it = fileLines.find("matrix_element_type");
394 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a matrix_element_type provided.\n"); }
396 string thisLine = it->second;
397 matrixElementType = getTag(thisLine);
398 if ((matrixElementType != "int") && (matrixElementType != "float")) { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid biom matrix_element_type for mothur. Types allowed are int and float.\n"); m->control_pressed = true; }
399 if (matrixElementType == "float") { m->mothurOut("[WARNING]: the shared file only uses integers, any float values will be rounded down to the nearest integer.\n"); }
402 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
404 it = fileLines.find("rows");
405 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a rows provided.\n"); }
407 string thisLine = it->second;
408 otuNames = readRows(thisLine, numRows);
411 if (m->control_pressed) { out.close(); m->mothurRemove(filename); return 0; }
413 it = fileLines.find("columns");
414 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a columns provided.\n"); }
416 string thisLine = it->second;
418 groupNames = readRows(thisLine, numCols);
420 //if users selected groups, then remove the groups not wanted.
422 vector<string> Groups = m->getGroups();
423 vector<string> allGroups = groupNames;
424 util.setGroups(Groups, allGroups);
425 m->setGroups(Groups);
427 //fill filehandles with neccessary ofstreams
430 for (i=0; i<Groups.size(); i++) {
432 filehandles[Groups[i]] = temp;
436 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
438 //clears file before we start to write to it below
439 for (int i=0; i<Groups.size(); i++) {
440 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
441 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
442 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
446 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
448 it = fileLines.find("shape");
449 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a shape provided.\n"); }
451 string thisLine = it->second;
452 getDims(thisLine, shapeNumRows, shapeNumCols);
455 if (shapeNumCols != numCols) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true; }
457 if (shapeNumRows != numRows) { m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true; }
460 if (m->control_pressed) { for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; } out.close(); m->mothurRemove(filename); return 0; }
462 it = fileLines.find("data");
463 if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
465 string thisLine = it->second;
466 m->currentBinLabels = otuNames;
469 vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
471 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
472 lookup[0]->printHeaders(out);
473 printSharedData(lookup, out);
476 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
479 if (m->control_pressed) { m->mothurRemove(filename); return 0; }
483 catch(exception& e) {
484 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
488 //**********************************************************************************************************************
489 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, string matrixElementType, vector<string>& groupNames, int numOTUs) {
492 vector<SharedRAbundVector*> lookup;
494 //creates new sharedRAbunds
495 for (int i = 0; i < groupNames.size(); i++) {
496 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
497 temp->setLabel("dummy");
498 temp->setGroup(groupNames[i]);
499 lookup.push_back(temp);
502 bool dataStart = false;
503 bool inBrackets = false;
507 for (int i = 0; i < line.length(); i++) {
509 if (m->control_pressed) { return lookup; }
511 //look for opening [ to indicate data is starting
512 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
513 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
516 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
517 else if ((line[i] == ']') && (inBrackets)) {
521 if (matrixElementType == "float") { m->mothurConvert(num, temp2); temp = (int)temp2; }
522 else { m->mothurConvert(num, temp); }
523 nums.push_back(temp);
526 //save info to vectors
527 if (matrixFormat == "dense") {
530 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
532 //set abundances for this otu
533 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
534 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
539 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
541 //nums contains [otuNum, sampleNum, abundance]
542 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
548 if (line[i] == ',') {
550 m->mothurConvert(num, temp);
551 nums.push_back(temp);
553 }else { if (!isspace(line[i])) { num += line[i]; } }
561 for (int i = 0; i < lookup.size(); i++) {
562 //if this sharedrabund is not from a group the user wants then delete it.
563 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
565 delete lookup[i]; lookup[i] = NULL;
566 lookup.erase(lookup.begin()+i);
571 if (remove) { eliminateZeroOTUS(lookup); }
576 catch(exception& e) {
577 m->errorOut(e, "SharedCommand", "readData");
581 //**********************************************************************************************************************
582 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
585 vector<SharedRAbundVector*> newLookup;
586 for (int i = 0; i < thislookup.size(); i++) {
587 SharedRAbundVector* temp = new SharedRAbundVector();
588 temp->setLabel(thislookup[i]->getLabel());
589 temp->setGroup(thislookup[i]->getGroup());
590 newLookup.push_back(temp);
594 vector<string> newBinLabels;
595 string snumBins = toString(thislookup[0]->getNumBins());
596 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
597 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
599 //look at each sharedRabund and make sure they are not all zero
601 for (int j = 0; j < thislookup.size(); j++) {
602 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
605 //if they are not all zero add this bin
607 for (int j = 0; j < thislookup.size(); j++) {
608 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
611 //if there is a bin label use it otherwise make one
612 string binLabel = "Otu";
613 string sbinNumber = toString(i+1);
614 if (sbinNumber.length() < snumBins.length()) {
615 int diff = snumBins.length() - sbinNumber.length();
616 for (int h = 0; h < diff; h++) { binLabel += "0"; }
618 binLabel += sbinNumber;
619 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
621 newBinLabels.push_back(binLabel);
625 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
627 thislookup = newLookup;
628 m->currentBinLabels = newBinLabels;
633 catch(exception& e) {
634 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
638 //**********************************************************************************************************************
639 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
645 for (int i = 0; i < line.length(); i++) {
647 //you want to ignore any ; until you reach the next '
648 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
649 else if ((line[i] == ']') && (inBar)) {
651 m->mothurConvert(num, shapeNumCols);
656 if (line[i] == ',') {
657 m->mothurConvert(num, shapeNumRows);
659 }else { if (!isspace(line[i])) { num += line[i]; } }
665 catch(exception& e) {
666 m->errorOut(e, "SharedCommand", "getDims");
670 //**********************************************************************************************************************
671 vector<string> SharedCommand::readRows(string line, int& numRows) {
674 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
675 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
678 vector<string> names;
679 int countOpenBrace = 0;
680 int countClosedBrace = 0;
686 for (int i = 0; i < line.length(); i++) {
688 if (m->control_pressed) { return names; }
690 if (line[i] == '[') { countOpenBrace++; }
691 else if (line[i] == ']') { countClosedBrace++; }
692 else if (line[i] == '{') { openParen++; }
693 else if (line[i] == '}') { closeParen++; }
694 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
696 //you have reached the end of the rows info
697 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
698 if ((openParen == closeParen) && (closeParen != 0)) { //process row
700 vector<string> items;
701 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
702 string part = items[0]; items.clear();
703 m->splitAtChar(part, items, ':'); //split part we want containing the ids
704 string name = items[1];
706 //remove "" if needed
707 int pos = name.find("\"");
708 if (pos != string::npos) {
710 for (int k = 0; k < name.length(); k++) {
711 if (name[k] != '\"') { newName += name[k]; }
715 names.push_back(name);
724 catch(exception& e) {
725 m->errorOut(e, "SharedCommand", "readRows");
729 //**********************************************************************************************************************
730 //designed for things like "type": "OTU table", returns type
731 string SharedCommand::getTag(string& line) {
733 bool inQuotes = false;
737 for (int i = 0; i < line.length(); i++) {
739 //you want to ignore any ; until you reach the next '
740 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
741 else if ((line[i] == c) && (inQuotes)) {
743 line = line.substr(i+1);
747 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
752 catch(exception& e) {
753 m->errorOut(e, "SharedCommand", "getInfo");
757 //**********************************************************************************************************************
758 int SharedCommand::createSharedFromListGroup(string filename) {
761 m->openOutputFile(filename, out);
763 GroupMap* groupMap = NULL;
764 CountTable* countTable = NULL;
765 if (groupfile != "") {
766 groupMap = new GroupMap(groupfile);
768 int groupError = groupMap->readMap();
769 if (groupError == 1) { delete groupMap; return 0; }
770 vector<string> allGroups = groupMap->getNamesOfGroups();
771 m->setAllGroups(allGroups);
773 countTable = new CountTable();
774 countTable->readTable(countfile);
777 if (m->control_pressed) { return 0; }
779 pickedGroups = false;
781 //if hte user has not specified any groups then use them all
782 if (Groups.size() == 0) {
783 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
784 else { Groups = countTable->getNamesOfGroups(); }
785 m->setGroups(Groups);
786 }else { pickedGroups = true; }
788 //fill filehandles with neccessary ofstreams
791 for (i=0; i<Groups.size(); i++) {
793 filehandles[Groups[i]] = temp;
797 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
799 //clears file before we start to write to it below
800 for (int i=0; i<Groups.size(); i++) {
801 m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
802 outputNames.push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
803 outputTypes["rabund"].push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
806 string errorOff = "no error";
808 //if user provided an order file containing the order the shared file should be in read it
809 //if (ordergroupfile != "") { readOrderFile(); }
811 InputData input(listfile, "shared");
812 SharedListVector* SharedList = input.getSharedListVector();
813 string lastLabel = SharedList->getLabel();
814 vector<SharedRAbundVector*> lookup;
816 if (m->control_pressed) {
817 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
818 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
819 out.close(); m->mothurRemove(filename);
820 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
825 vector<string> namesSeqs;
826 int numGroupNames = 0;
827 if (m->groupMode == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); }
828 else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); }
829 int error = ListGroupSameSeqs(namesSeqs, SharedList);
831 if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
832 m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
834 out.close(); m->mothurRemove(filename); //remove blank shared file you made
837 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
838 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
842 if (error == 1) { m->control_pressed = true; }
844 //if user has specified groups make new groupfile for them
845 if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
847 if (m->getNumGroups() < 4) {
848 for (int i = 0; i < m->getNumGroups(); i++) {
849 groups += (m->getGroups())[i] + ".";
851 }else { groups = "merge"; }
853 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + getOutputFileNameTag("group");
854 outputTypes["group"].push_back(newGroupFile);
855 outputNames.push_back(newGroupFile);
857 m->openOutputFile(newGroupFile, outGroups);
859 vector<string> names = groupMap->getNamesSeqs();
861 for (int i = 0; i < names.size(); i++) {
862 groupName = groupMap->getGroup(names[i]);
863 if (isValidGroup(groupName, m->getGroups())) {
864 outGroups << names[i] << '\t' << groupName << endl;
870 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
871 set<string> processedLabels;
872 set<string> userLabels = labels;
874 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
875 if (m->control_pressed) {
876 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
877 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
878 out.close(); m->mothurRemove(filename);
879 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
883 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
885 lookup = SharedList->getSharedRAbundVector();
887 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
888 if (pickedGroups) { //check for otus with no seqs in them
889 eliminateZeroOTUS(lookup);
892 if (m->control_pressed) {
893 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
894 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
895 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
896 out.close(); m->mothurRemove(filename);
897 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
901 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
902 printSharedData(lookup, out); //prints info to the .shared file
903 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
905 processedLabels.insert(SharedList->getLabel());
906 userLabels.erase(SharedList->getLabel());
909 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
910 string saveLabel = SharedList->getLabel();
913 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
915 lookup = SharedList->getSharedRAbundVector();
916 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
917 if (pickedGroups) { //check for otus with no seqs in them
918 eliminateZeroOTUS(lookup);
922 if (m->control_pressed) {
923 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
924 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
925 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
926 out.close(); m->mothurRemove(filename);
927 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
931 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
932 printSharedData(lookup, out); //prints info to the .shared file
933 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
935 processedLabels.insert(SharedList->getLabel());
936 userLabels.erase(SharedList->getLabel());
938 //restore real lastlabel to save below
939 SharedList->setLabel(saveLabel);
943 lastLabel = SharedList->getLabel();
946 SharedList = input.getSharedListVector(); //get new list vector to process
949 //output error messages about any remaining user labels
950 set<string>::iterator it;
951 bool needToRun = false;
952 for (it = userLabels.begin(); it != userLabels.end(); it++) {
953 if (processedLabels.count(lastLabel) != 1) {
958 //run last label if you need to
959 if (needToRun == true) {
960 if (SharedList != NULL) { delete SharedList; }
961 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
963 lookup = SharedList->getSharedRAbundVector();
964 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
965 if (pickedGroups) { //check for otus with no seqs in them
966 eliminateZeroOTUS(lookup);
969 if (m->control_pressed) {
970 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
971 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
972 out.close(); m->mothurRemove(filename);
973 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
977 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
978 printSharedData(lookup, out); //prints info to the .shared file
979 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
985 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
989 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
991 if (m->control_pressed) {
992 m->mothurRemove(filename);
993 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
999 catch(exception& e) {
1000 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
1004 //**********************************************************************************************************************
1005 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
1008 if (order.size() == 0) { //user has not specified an order so do aplabetically
1009 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
1012 vector<string> Groups;
1014 //initialize bin values
1015 for (int i = 0; i < thislookup.size(); i++) {
1016 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
1017 thislookup[i]->print(out);
1019 Groups.push_back(thislookup[i]->getGroup());
1021 RAbundVector rav = thislookup[i]->getRAbundVector();
1022 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[thislookup[i]->getGroup()]));
1023 rav.print(*(filehandles[thislookup[i]->getGroup()]));
1024 (*(filehandles[thislookup[i]->getGroup()])).close();
1026 m->setGroups(Groups);
1028 //create a map from groupName to each sharedrabund
1029 map<string, SharedRAbundVector*> myMap;
1030 map<string, SharedRAbundVector*>::iterator myIt;
1032 for (int i = 0; i < thislookup.size(); i++) {
1033 myMap[thislookup[i]->getGroup()] = thislookup[i];
1037 vector<string> Groups;
1039 //loop through ordered list and print the rabund
1040 for (int i = 0; i < order.size(); i++) {
1041 myIt = myMap.find(order[i]);
1043 if(myIt != myMap.end()) { //we found it
1044 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1045 (myIt->second)->print(out);
1047 Groups.push_back((myIt->second)->getGroup());
1049 RAbundVector rav = (myIt->second)->getRAbundVector();
1050 m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[(myIt->second)->getGroup()]));
1051 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1052 (*(filehandles[(myIt->second)->getGroup()])).close();
1054 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1058 m->setGroups(Groups);
1063 catch(exception& e) {
1064 m->errorOut(e, "SharedCommand", "printSharedData");
1068 //**********************************************************************************************************************
1069 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1073 set<string> groupNamesSeqs;
1074 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1075 groupNamesSeqs.insert(groupMapsSeqs[i]);
1078 //go through list and if group returns "not found" output it
1079 for (int i = 0; i < SharedList->getNumBins(); i++) {
1080 if (m->control_pressed) { return 0; }
1082 string names = SharedList->get(i);
1084 vector<string> listNames;
1085 m->splitAtComma(names, listNames);
1087 for (int j = 0; j < listNames.size(); j++) {
1088 int num = groupNamesSeqs.count(listNames[j]);
1092 if (groupfile != "") {
1093 m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1094 else{ m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your count file. Please correct."); m->mothurOutEndLine(); }
1095 }else { groupNamesSeqs.erase(listNames[j]); }
1099 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1101 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1106 catch(exception& e) {
1107 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1111 //**********************************************************************************************************************
1113 SharedCommand::~SharedCommand(){
1118 //**********************************************************************************************************************
1119 int SharedCommand::readOrderFile() {
1125 m->openInputFile(ordergroupfile, in);
1129 in >> thisGroup; m->gobble(in);
1131 order.push_back(thisGroup);
1133 if (m->control_pressed) { order.clear(); break; }
1139 catch(exception& e) {
1140 m->errorOut(e, "SharedCommand", "readOrderFile");
1144 //**********************************************************************************************************************
1146 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1148 for (int i = 0; i < groups.size(); i++) {
1149 if (groupname == groups[i]) { return true; }
1154 catch(exception& e) {
1155 m->errorOut(e, "SharedCommand", "isValidGroup");
1159 /************************************************************/