5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
13 //********************************************************************************************************************
14 //sorts lowest to highest
15 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
16 return (left->getGroup() < right->getGroup());
18 //**********************************************************************************************************************
19 vector<string> SharedCommand::setParameters(){
21 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
22 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
24 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
25 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
26 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "SharedCommand", "setParameters");
39 //**********************************************************************************************************************
40 string SharedCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
44 helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
45 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
46 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
47 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
51 m->errorOut(e, "SharedCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string SharedCommand::getOutputFileNameTag(string type, string inputName=""){
58 string outputFileName = "";
59 map<string, vector<string> >::iterator it;
61 //is this a type this command creates
62 it = outputTypes.find(type);
63 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65 if (type == "shared") { outputFileName = "shared"; }
66 else if (type == "rabund") { outputFileName = "rabund"; }
67 else if (type == "group") { outputFileName = "groups"; }
68 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
70 return outputFileName;
73 m->errorOut(e, "SharedCommand", "getOutputFileNameTag");
78 //**********************************************************************************************************************
79 SharedCommand::SharedCommand(){
81 abort = true; calledHelp = true;
83 //initialize outputTypes
84 vector<string> tempOutNames;
85 outputTypes["rabund"] = tempOutNames;
86 outputTypes["shared"] = tempOutNames;
87 outputTypes["group"] = tempOutNames;
90 m->errorOut(e, "SharedCommand", "SharedCommand");
94 //**********************************************************************************************************************
95 SharedCommand::SharedCommand(string option) {
97 abort = false; calledHelp = false;
100 //allow user to run help
101 if(option == "help") { help(); abort = true; calledHelp = true; }
102 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106 vector<string> myArray = setParameters();
108 OptionParser parser(option);
109 map<string, string> parameters = parser.getParameters();
111 ValidParameters validParameter;
112 map<string, string>::iterator it;
114 //check to make sure all parameters are valid for command
115 for (it = parameters.begin(); it != parameters.end(); it++) {
116 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
119 //if the user changes the input directory command factory will send this info to us in the output parameter
120 string inputDir = validParameter.validFile(parameters, "inputdir", false);
121 if (inputDir == "not found"){ inputDir = ""; }
124 it = parameters.find("list");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["list"] = inputDir + it->second; }
132 it = parameters.find("group");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["group"] = inputDir + it->second; }
140 /*it = parameters.find("ordergroup");
141 //user has given a template file
142 if(it != parameters.end()){
143 path = m->hasPath(it->second);
144 //if the user has not given a path then, add inputdir. else leave path alone.
145 if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
148 it = parameters.find("biom");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["biom"] = inputDir + it->second; }
157 vector<string> tempOutNames;
158 outputTypes["rabund"] = tempOutNames;
159 outputTypes["shared"] = tempOutNames;
160 outputTypes["group"] = tempOutNames;
162 //if the user changes the output directory command factory will send this info to us in the output parameter
163 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for required parameters
166 listfile = validParameter.validFile(parameters, "list", true);
167 if (listfile == "not open") { listfile = ""; abort = true; }
168 else if (listfile == "not found") { listfile = ""; }
169 else { m->setListFile(listfile); }
171 biomfile = validParameter.validFile(parameters, "biom", true);
172 if (biomfile == "not open") { biomfile = ""; abort = true; }
173 else if (biomfile == "not found") { biomfile = ""; }
174 else { m->setBiomFile(biomfile); }
176 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
177 if (ordergroupfile == "not open") { abort = true; }
178 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
180 groupfile = validParameter.validFile(parameters, "group", true);
181 if (groupfile == "not open") { groupfile = ""; abort = true; }
182 else if (groupfile == "not found") { groupfile = ""; }
183 else { m->setGroupFile(groupfile); }
185 if ((biomfile == "") && (listfile == "")) {
186 //is there are current file available for either of these?
187 //give priority to list, then biom
188 listfile = m->getListFile();
189 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
191 biomfile = m->getBiomFile();
192 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
194 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
199 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
201 if (listfile != "") {
202 if (groupfile == "") {
203 groupfile = m->getGroupFile();
204 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
206 m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine();
213 string groups = validParameter.validFile(parameters, "groups", false);
214 if (groups == "not found") { groups = ""; }
216 m->splitAtDash(groups, Groups);
217 m->setGroups(Groups);
220 //check for optional parameter and set defaults
221 // ...at some point should added some additional type checking...
222 string label = validParameter.validFile(parameters, "label", false);
223 if (label == "not found") { label = ""; }
225 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
226 else { allLines = 1; }
231 catch(exception& e) {
232 m->errorOut(e, "SharedCommand", "SharedCommand");
236 //**********************************************************************************************************************
238 int SharedCommand::execute(){
241 if (abort == true) { if (calledHelp) { return 0; } return 2; }
243 //getting output filename
244 string filename = "";
245 if (listfile != "") { filename = listfile; }
246 else { filename = biomfile; }
248 if (outputDir == "") { outputDir += m->hasPath(filename); }
250 filename = outputDir + m->getRootName(m->getSimpleName(filename));
251 filename = filename + getOutputFileNameTag("shared");
252 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
254 if (listfile != "") { createSharedFromListGroup(filename); }
255 else { createSharedFromBiom(filename); }
257 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
259 //set rabund file as new current rabundfile
261 itTypes = outputTypes.find("rabund");
262 if (itTypes != outputTypes.end()) {
263 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
266 itTypes = outputTypes.find("shared");
267 if (itTypes != outputTypes.end()) {
268 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
271 itTypes = outputTypes.find("group");
272 if (itTypes != outputTypes.end()) {
273 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
276 m->mothurOutEndLine();
277 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
279 m->mothurOutEndLine();
283 catch(exception& e) {
284 m->errorOut(e, "SharedCommand", "execute");
288 //**********************************************************************************************************************
289 int SharedCommand::createSharedFromBiom(string filename) {
292 m->openOutputFile(filename, out);
295 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
296 "format": "Biological Observation Matrix 0.9.1",
297 "format_url": "http://biom-format.org",
299 "generated_by": "mothur1.24.0",
300 "date": "Tue Apr 17 13:12:07 2012", */
303 m->openInputFile(biomfile, in);
305 m->getline(in); m->gobble(in); //grab first '{'
307 string matrixFormat = "";
310 int shapeNumRows = 0;
311 int shapeNumCols = 0;
312 vector<string> otuNames;
313 vector<string> groupNames;
316 if (m->control_pressed) { break; }
318 string line = m->getline(in); m->gobble(in);
320 string tag = getTag(line);
323 //check to make sure this is an OTU table
324 string type = getTag(line);
325 if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
326 }else if (tag == "matrix_type") {
327 //get type and check type
328 matrixFormat = getTag(line);
329 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
330 }else if (tag == "matrix_element_type") {
331 //get type and check type
332 string matrixElementType = getTag(line);
333 if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
334 }else if (tag == "rows") {
336 otuNames = readRows(line, in, numRows);
337 }else if (tag == "columns") {
339 groupNames = readRows(line, in, numCols);
341 //if users selected groups, then remove the groups not wanted.
343 vector<string> Groups = m->getGroups();
344 vector<string> allGroups = groupNames;
345 util.setGroups(Groups, allGroups);
346 m->setGroups(Groups);
348 //fill filehandles with neccessary ofstreams
351 for (i=0; i<Groups.size(); i++) {
353 filehandles[Groups[i]] = temp;
357 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
359 //clears file before we start to write to it below
360 for (int i=0; i<Groups.size(); i++) {
361 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
362 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
363 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
366 }else if (tag == "shape") {
367 getDims(line, shapeNumRows, shapeNumCols);
370 if (shapeNumCols != numCols) {
371 m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
374 if (shapeNumRows != numRows) {
375 m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
377 }else if (tag == "data") {
378 m->currentBinLabels = otuNames;
381 vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
383 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
384 lookup[0]->printHeaders(out);
385 printSharedData(lookup, out);
393 catch(exception& e) {
394 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
398 //**********************************************************************************************************************
399 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
402 vector<SharedRAbundVector*> lookup;
404 //creates new sharedRAbunds
405 for (int i = 0; i < groupNames.size(); i++) {
406 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
407 temp->setLabel("dummy");
408 temp->setGroup(groupNames[i]);
409 lookup.push_back(temp);
412 bool dataStart = false;
413 bool inBrackets = false;
417 for (int i = 0; i < line.length(); i++) {
419 if (m->control_pressed) { return lookup; }
421 //look for opening [ to indicate data is starting
422 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
423 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
426 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
427 else if ((line[i] == ']') && (inBrackets)) {
430 m->mothurConvert(num, temp);
431 nums.push_back(temp);
434 //save info to vectors
435 if (matrixFormat == "dense") {
438 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
440 //set abundances for this otu
441 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
442 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
447 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
449 //nums contains [otuNum, sampleNum, abundance]
450 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
456 if (line[i] == ',') {
458 m->mothurConvert(num, temp);
459 nums.push_back(temp);
461 }else { if (!isspace(line[i])) { num += line[i]; } }
466 //same as above just reading from file.
469 char c = in.get(); m->gobble(in);
471 if (m->control_pressed) { return lookup; }
473 //look for opening [ to indicate data is starting
474 if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get(); if (in.eof()) { break; } }
475 else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
478 if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get(); if (in.eof()) { break; } }
479 else if ((c == ']') && (inBrackets)) {
482 m->mothurConvert(num, temp);
483 nums.push_back(temp);
486 //save info to vectors
487 if (matrixFormat == "dense") {
490 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
492 //set abundances for this otu
493 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
494 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
499 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
501 //nums contains [otuNum, sampleNum, abundance]
502 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
510 m->mothurConvert(num, temp);
511 nums.push_back(temp);
513 }else { if (!isspace(c)) { num += c; } }
521 for (int i = 0; i < lookup.size(); i++) {
522 //if this sharedrabund is not from a group the user wants then delete it.
523 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
525 delete lookup[i]; lookup[i] = NULL;
526 lookup.erase(lookup.begin()+i);
531 if (remove) { eliminateZeroOTUS(lookup); }
536 catch(exception& e) {
537 m->errorOut(e, "SharedCommand", "readData");
541 //**********************************************************************************************************************
542 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
545 vector<SharedRAbundVector*> newLookup;
546 for (int i = 0; i < thislookup.size(); i++) {
547 SharedRAbundVector* temp = new SharedRAbundVector();
548 temp->setLabel(thislookup[i]->getLabel());
549 temp->setGroup(thislookup[i]->getGroup());
550 newLookup.push_back(temp);
554 vector<string> newBinLabels;
555 string snumBins = toString(thislookup[0]->getNumBins());
556 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
557 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
559 //look at each sharedRabund and make sure they are not all zero
561 for (int j = 0; j < thislookup.size(); j++) {
562 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
565 //if they are not all zero add this bin
567 for (int j = 0; j < thislookup.size(); j++) {
568 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
571 //if there is a bin label use it otherwise make one
572 string binLabel = "Otu";
573 string sbinNumber = toString(i+1);
574 if (sbinNumber.length() < snumBins.length()) {
575 int diff = snumBins.length() - sbinNumber.length();
576 for (int h = 0; h < diff; h++) { binLabel += "0"; }
578 binLabel += sbinNumber;
579 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
581 newBinLabels.push_back(binLabel);
585 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
587 thislookup = newLookup;
588 m->currentBinLabels = newBinLabels;
593 catch(exception& e) {
594 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
598 //**********************************************************************************************************************
599 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
605 for (int i = 0; i < line.length(); i++) {
607 //you want to ignore any ; until you reach the next '
608 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
609 else if ((line[i] == ']') && (inBar)) {
611 m->mothurConvert(num, shapeNumCols);
616 if (line[i] == ',') {
617 m->mothurConvert(num, shapeNumRows);
619 }else { if (!isspace(line[i])) { num += line[i]; } }
625 catch(exception& e) {
626 m->errorOut(e, "SharedCommand", "getDims");
630 //**********************************************************************************************************************
631 vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
634 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
635 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
638 vector<string> names;
639 int countOpenBrace = 0;
640 int countClosedBrace = 0;
646 for (int i = 0; i < line.length(); i++) {
648 if (m->control_pressed) { return names; }
650 if (line[i] == '[') { countOpenBrace++; }
651 else if (line[i] == ']') { countClosedBrace++; }
652 else if (line[i] == '{') { openParen++; }
653 else if (line[i] == '}') { closeParen++; }
654 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
656 //you have reached the end of the rows info
657 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
658 if ((openParen == closeParen) && (closeParen != 0)) { //process row
660 vector<string> items;
661 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
662 string part = items[0]; items.clear();
663 m->splitAtChar(part, items, ':'); //split part we want containing the ids
664 string name = items[1];
666 //remove "" if needed
667 int pos = name.find("\"");
668 if (pos != string::npos) {
670 for (int k = 0; k < name.length(); k++) {
671 if (name[k] != '\"') { newName += name[k]; }
675 names.push_back(name);
686 if (m->control_pressed) { break; }
688 char c = in.get(); m->gobble(in);
690 if (c == '[') { countOpenBrace++; }
691 else if (c == ']') { countClosedBrace++; }
692 else if (c == '{') { openParen++; }
693 else if (c == '}') { closeParen++; }
694 else if (openParen != 0) { nextRow += c; } //you are reading the row info
697 //you have reached the end of the rows info
698 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
699 if ((openParen == closeParen) && (closeParen != 0)) { //process row
701 vector<string> items;
702 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
703 string part = items[0]; items.clear();
704 m->splitAtChar(part, items, ':'); //split part we want containing the ids
705 string name = items[1];
707 //remove "" if needed
708 int pos = name.find("\"");
709 if (pos != string::npos) {
711 for (int k = 0; k < name.length(); k++) {
712 if (name[k] != '\"') { newName += name[k]; }
716 names.push_back(name);
726 catch(exception& e) {
727 m->errorOut(e, "SharedCommand", "readRows");
731 //**********************************************************************************************************************
732 //designed for things like "type": "OTU table", returns map type -> OTU table
733 string SharedCommand::getTag(string& line) {
735 bool inQuotes = false;
739 for (int i = 0; i < line.length(); i++) {
741 //you want to ignore any ; until you reach the next '
742 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
743 else if ((line[i] == c) && (inQuotes)) {
745 line = line.substr(i+1);
749 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
754 catch(exception& e) {
755 m->errorOut(e, "SharedCommand", "getInfo");
759 //**********************************************************************************************************************
760 int SharedCommand::createSharedFromListGroup(string filename) {
763 m->openOutputFile(filename, out);
765 GroupMap* groupMap = new GroupMap(groupfile);
767 int groupError = groupMap->readMap();
768 if (groupError == 1) { delete groupMap; return 0; }
769 vector<string> allGroups = groupMap->getNamesOfGroups();
770 m->setAllGroups(allGroups);
772 pickedGroups = false;
774 //if hte user has not specified any groups then use them all
775 if (Groups.size() == 0) {
776 Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
777 }else { pickedGroups = true; }
779 //fill filehandles with neccessary ofstreams
782 for (i=0; i<Groups.size(); i++) {
784 filehandles[Groups[i]] = temp;
788 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
790 //clears file before we start to write to it below
791 for (int i=0; i<Groups.size(); i++) {
792 m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
793 outputNames.push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
794 outputTypes["rabund"].push_back((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund")));
797 string errorOff = "no error";
799 //if user provided an order file containing the order the shared file should be in read it
800 //if (ordergroupfile != "") { readOrderFile(); }
802 InputData input(listfile, "shared");
803 SharedListVector* SharedList = input.getSharedListVector();
804 string lastLabel = SharedList->getLabel();
805 vector<SharedRAbundVector*> lookup;
807 if (m->control_pressed) {
808 delete SharedList; delete groupMap;
809 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
810 out.close(); m->mothurRemove(filename);
811 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
816 vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
817 int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
819 if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
820 m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
823 m->mothurRemove(filename); //remove blank shared file you made
825 createMisMatchFile(SharedList, groupMap);
828 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
832 delete SharedList; delete groupMap;
837 if (error == 1) { m->control_pressed = true; }
839 //if user has specified groups make new groupfile for them
840 if (pickedGroups) { //make new group file
842 if (m->getNumGroups() < 4) {
843 for (int i = 0; i < m->getNumGroups(); i++) {
844 groups += (m->getGroups())[i] + ".";
846 }else { groups = "merge"; }
848 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + getOutputFileNameTag("group");
849 outputTypes["group"].push_back(newGroupFile);
850 outputNames.push_back(newGroupFile);
852 m->openOutputFile(newGroupFile, outGroups);
854 vector<string> names = groupMap->getNamesSeqs();
856 for (int i = 0; i < names.size(); i++) {
857 groupName = groupMap->getGroup(names[i]);
858 if (isValidGroup(groupName, m->getGroups())) {
859 outGroups << names[i] << '\t' << groupName << endl;
865 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
866 set<string> processedLabels;
867 set<string> userLabels = labels;
869 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
870 if (m->control_pressed) {
871 delete SharedList; delete groupMap;
872 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
873 out.close(); m->mothurRemove(filename);
874 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
878 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
880 lookup = SharedList->getSharedRAbundVector();
882 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
883 if (pickedGroups) { //check for otus with no seqs in them
884 eliminateZeroOTUS(lookup);
887 if (m->control_pressed) {
888 delete SharedList; delete groupMap;
889 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
890 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
891 out.close(); m->mothurRemove(filename);
892 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
896 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
897 printSharedData(lookup, out); //prints info to the .shared file
898 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
900 processedLabels.insert(SharedList->getLabel());
901 userLabels.erase(SharedList->getLabel());
904 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
905 string saveLabel = SharedList->getLabel();
908 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
910 lookup = SharedList->getSharedRAbundVector();
911 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
912 if (pickedGroups) { //check for otus with no seqs in them
913 eliminateZeroOTUS(lookup);
917 if (m->control_pressed) {
918 delete SharedList; delete groupMap;
919 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
920 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
921 out.close(); m->mothurRemove(filename);
922 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
926 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
927 printSharedData(lookup, out); //prints info to the .shared file
928 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
930 processedLabels.insert(SharedList->getLabel());
931 userLabels.erase(SharedList->getLabel());
933 //restore real lastlabel to save below
934 SharedList->setLabel(saveLabel);
938 lastLabel = SharedList->getLabel();
941 SharedList = input.getSharedListVector(); //get new list vector to process
944 //output error messages about any remaining user labels
945 set<string>::iterator it;
946 bool needToRun = false;
947 for (it = userLabels.begin(); it != userLabels.end(); it++) {
948 if (processedLabels.count(lastLabel) != 1) {
953 //run last label if you need to
954 if (needToRun == true) {
955 if (SharedList != NULL) { delete SharedList; }
956 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
958 lookup = SharedList->getSharedRAbundVector();
959 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
960 if (pickedGroups) { //check for otus with no seqs in them
961 eliminateZeroOTUS(lookup);
964 if (m->control_pressed) {
966 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
967 out.close(); m->mothurRemove(filename);
968 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
972 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
973 printSharedData(lookup, out); //prints info to the .shared file
974 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
980 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
986 if (m->control_pressed) {
987 m->mothurRemove(filename);
988 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + "." + getOutputFileNameTag("rabund"))); }
994 catch(exception& e) {
995 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
999 //**********************************************************************************************************************
1000 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
1003 if (order.size() == 0) { //user has not specified an order so do aplabetically
1004 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
1007 vector<string> Groups;
1009 //initialize bin values
1010 for (int i = 0; i < thislookup.size(); i++) {
1011 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
1012 thislookup[i]->print(out);
1014 Groups.push_back(thislookup[i]->getGroup());
1016 RAbundVector rav = thislookup[i]->getRAbundVector();
1017 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[thislookup[i]->getGroup()]));
1018 rav.print(*(filehandles[thislookup[i]->getGroup()]));
1019 (*(filehandles[thislookup[i]->getGroup()])).close();
1021 m->setGroups(Groups);
1023 //create a map from groupName to each sharedrabund
1024 map<string, SharedRAbundVector*> myMap;
1025 map<string, SharedRAbundVector*>::iterator myIt;
1027 for (int i = 0; i < thislookup.size(); i++) {
1028 myMap[thislookup[i]->getGroup()] = thislookup[i];
1032 vector<string> Groups;
1034 //loop through ordered list and print the rabund
1035 for (int i = 0; i < order.size(); i++) {
1036 myIt = myMap.find(order[i]);
1038 if(myIt != myMap.end()) { //we found it
1039 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1040 (myIt->second)->print(out);
1042 Groups.push_back((myIt->second)->getGroup());
1044 RAbundVector rav = (myIt->second)->getRAbundVector();
1045 m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + "." + getOutputFileNameTag("rabund"), *(filehandles[(myIt->second)->getGroup()]));
1046 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1047 (*(filehandles[(myIt->second)->getGroup()])).close();
1049 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1053 m->setGroups(Groups);
1058 catch(exception& e) {
1059 m->errorOut(e, "SharedCommand", "printSharedData");
1063 //**********************************************************************************************************************
1064 int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
1066 ofstream outMisMatch;
1067 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
1069 //you have sequences in your list file that are not in your group file
1070 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
1071 outputMisMatchName += "missing.group";
1072 m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
1074 m->openOutputFile(outputMisMatchName, outMisMatch);
1076 set<string> listNames;
1077 set<string>::iterator itList;
1079 //go through list and if group returns "not found" output it
1080 for (int i = 0; i < SharedList->getNumBins(); i++) {
1081 if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
1083 string names = SharedList->get(i);
1085 vector<string> binNames;
1086 m->splitAtComma(names, binNames);
1088 for (int j = 0; j < binNames.size(); j++) {
1089 string name = binNames[j];
1090 string group = groupMap->getGroup(name);
1092 if(group == "not found") { outMisMatch << name << endl; }
1094 itList = listNames.find(name);
1095 if (itList != listNames.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
1096 else { listNames.insert(name); }
1100 outMisMatch.close();
1103 }else {//you have sequences in your group file that are not in you list file
1105 outputMisMatchName += "missing.name";
1106 m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
1108 map<string, string> namesInList;
1109 map<string, string>::iterator itList;
1111 //go through listfile and get names
1112 for (int i = 0; i < SharedList->getNumBins(); i++) {
1113 if (m->control_pressed) { return 0; }
1116 string names = SharedList->get(i);
1118 vector<string> binNames;
1119 m->splitAtComma(names, binNames);
1121 for (int j = 0; j < binNames.size(); j++) {
1123 string name = binNames[j];
1125 itList = namesInList.find(name);
1126 if (itList != namesInList.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
1128 namesInList[name] = name;
1133 //get names of sequences in groupfile
1134 vector<string> seqNames = groupMap->getNamesSeqs();
1136 map<string, string>::iterator itMatch;
1138 m->openOutputFile(outputMisMatchName, outMisMatch);
1140 //loop through names in seqNames and if they aren't in namesIn list output them
1141 for (int i = 0; i < seqNames.size(); i++) {
1142 if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
1144 itMatch = namesInList.find(seqNames[i]);
1146 if (itMatch == namesInList.end()) {
1148 outMisMatch << seqNames[i] << endl;
1151 outMisMatch.close();
1156 catch(exception& e) {
1157 m->errorOut(e, "SharedCommand", "createMisMatchFile");
1161 //**********************************************************************************************************************
1162 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1166 set<string> groupNamesSeqs;
1167 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1168 groupNamesSeqs.insert(groupMapsSeqs[i]);
1171 //go through list and if group returns "not found" output it
1172 for (int i = 0; i < SharedList->getNumBins(); i++) {
1173 if (m->control_pressed) { return 0; }
1175 string names = SharedList->get(i);
1177 vector<string> listNames;
1178 m->splitAtComma(names, listNames);
1180 for (int j = 0; j < listNames.size(); j++) {
1181 int num = groupNamesSeqs.count(listNames[j]);
1183 if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1184 else { groupNamesSeqs.erase(listNames[j]); }
1188 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1190 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1195 catch(exception& e) {
1196 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1200 //**********************************************************************************************************************
1202 SharedCommand::~SharedCommand(){
1207 //**********************************************************************************************************************
1208 int SharedCommand::readOrderFile() {
1214 m->openInputFile(ordergroupfile, in);
1218 in >> thisGroup; m->gobble(in);
1220 order.push_back(thisGroup);
1222 if (m->control_pressed) { order.clear(); break; }
1228 catch(exception& e) {
1229 m->errorOut(e, "SharedCommand", "readOrderFile");
1233 //**********************************************************************************************************************
1235 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1237 for (int i = 0; i < groups.size(); i++) {
1238 if (groupname == groups[i]) { return true; }
1243 catch(exception& e) {
1244 m->errorOut(e, "SharedCommand", "isValidGroup");
1248 /************************************************************/