5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "sharedcommand.h"
11 #include "sharedutilities.h"
13 //********************************************************************************************************************
14 //sorts lowest to highest
15 inline bool compareSharedRabunds(SharedRAbundVector* left, SharedRAbundVector* right){
16 return (left->getGroup() < right->getGroup());
18 //**********************************************************************************************************************
19 vector<string> SharedCommand::setParameters(){
21 CommandParameter pbiom("biom", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "none",false,false); parameters.push_back(pbiom);
22 CommandParameter plist("list", "InputTypes", "", "", "BiomListGroup", "BiomListGroup", "ListGroup",false,false); parameters.push_back(plist);
23 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "ListGroup",false,false); parameters.push_back(pgroup);
24 //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup);
25 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
26 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "SharedCommand", "setParameters");
39 //**********************************************************************************************************************
40 string SharedCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The make.shared command reads a list and group file or a biom file and creates a shared file. If a list and group are provided a rabund file is created for each group.\n";
44 helpString += "The make.shared command parameters are list, group, biom, groups and label. list and group are required unless a current file is available or you provide a biom file.\n";
45 helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n";
46 helpString += "The label parameter is only valid with the list and group option and allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n";
47 //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n";
51 m->errorOut(e, "SharedCommand", "getHelpString");
55 //**********************************************************************************************************************
56 SharedCommand::SharedCommand(){
58 abort = true; calledHelp = true;
60 //initialize outputTypes
61 vector<string> tempOutNames;
62 outputTypes["rabund"] = tempOutNames;
63 outputTypes["shared"] = tempOutNames;
64 outputTypes["group"] = tempOutNames;
67 m->errorOut(e, "SharedCommand", "SharedCommand");
71 //**********************************************************************************************************************
72 SharedCommand::SharedCommand(string option) {
74 abort = false; calledHelp = false;
77 //allow user to run help
78 if(option == "help") { help(); abort = true; calledHelp = true; }
79 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83 vector<string> myArray = setParameters();
85 OptionParser parser(option);
86 map<string, string> parameters = parser.getParameters();
88 ValidParameters validParameter;
89 map<string, string>::iterator it;
91 //check to make sure all parameters are valid for command
92 for (it = parameters.begin(); it != parameters.end(); it++) {
93 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
96 //if the user changes the input directory command factory will send this info to us in the output parameter
97 string inputDir = validParameter.validFile(parameters, "inputdir", false);
98 if (inputDir == "not found"){ inputDir = ""; }
101 it = parameters.find("list");
102 //user has given a template file
103 if(it != parameters.end()){
104 path = m->hasPath(it->second);
105 //if the user has not given a path then, add inputdir. else leave path alone.
106 if (path == "") { parameters["list"] = inputDir + it->second; }
109 it = parameters.find("group");
110 //user has given a template file
111 if(it != parameters.end()){
112 path = m->hasPath(it->second);
113 //if the user has not given a path then, add inputdir. else leave path alone.
114 if (path == "") { parameters["group"] = inputDir + it->second; }
117 /*it = parameters.find("ordergroup");
118 //user has given a template file
119 if(it != parameters.end()){
120 path = m->hasPath(it->second);
121 //if the user has not given a path then, add inputdir. else leave path alone.
122 if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
125 it = parameters.find("biom");
126 //user has given a template file
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 //if the user has not given a path then, add inputdir. else leave path alone.
130 if (path == "") { parameters["biom"] = inputDir + it->second; }
135 //if the user changes the output directory command factory will send this info to us in the output parameter
136 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
138 //check for required parameters
139 listfile = validParameter.validFile(parameters, "list", true);
140 if (listfile == "not open") { listfile = ""; abort = true; }
141 else if (listfile == "not found") { listfile = ""; }
142 else { m->setListFile(listfile); }
144 biomfile = validParameter.validFile(parameters, "biom", true);
145 if (biomfile == "not open") { biomfile = ""; abort = true; }
146 else if (biomfile == "not found") { biomfile = ""; }
147 else { m->setBiomFile(biomfile); }
149 ordergroupfile = validParameter.validFile(parameters, "ordergroup", true);
150 if (ordergroupfile == "not open") { abort = true; }
151 else if (ordergroupfile == "not found") { ordergroupfile = ""; }
153 groupfile = validParameter.validFile(parameters, "group", true);
154 if (groupfile == "not open") { groupfile = ""; abort = true; }
155 else if (groupfile == "not found") { groupfile = ""; }
156 else { m->setGroupFile(groupfile); }
158 if ((biomfile == "") && (listfile == "")) {
159 //is there are current file available for either of these?
160 //give priority to list, then biom
161 listfile = m->getListFile();
162 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
164 biomfile = m->getBiomFile();
165 if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter."); m->mothurOutEndLine(); }
167 m->mothurOut("No valid current files. You must provide a list or biom file before you can use the make.shared command."); m->mothurOutEndLine();
172 else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom."); m->mothurOutEndLine(); abort = true; }
174 if (listfile != "") {
175 if (groupfile == "") {
176 groupfile = m->getGroupFile();
177 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
179 m->mothurOut("You need to provide a groupfle if you are going to use the list format."); m->mothurOutEndLine();
186 string groups = validParameter.validFile(parameters, "groups", false);
187 if (groups == "not found") { groups = ""; }
189 m->splitAtDash(groups, Groups);
190 m->setGroups(Groups);
193 //check for optional parameter and set defaults
194 // ...at some point should added some additional type checking...
195 string label = validParameter.validFile(parameters, "label", false);
196 if (label == "not found") { label = ""; }
198 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
199 else { allLines = 1; }
204 catch(exception& e) {
205 m->errorOut(e, "SharedCommand", "SharedCommand");
209 //**********************************************************************************************************************
211 int SharedCommand::execute(){
214 if (abort == true) { if (calledHelp) { return 0; } return 2; }
216 //getting output filename
217 string filename = "";
218 if (listfile != "") { filename = listfile; }
219 else { filename = biomfile; }
221 if (outputDir == "") { outputDir += m->hasPath(filename); }
223 filename = outputDir + m->getRootName(m->getSimpleName(filename));
224 filename = filename + "shared";
225 outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
227 if (listfile != "") { createSharedFromListGroup(filename); }
228 else { createSharedFromBiom(filename); }
230 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
232 //set rabund file as new current rabundfile
234 itTypes = outputTypes.find("rabund");
235 if (itTypes != outputTypes.end()) {
236 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
239 itTypes = outputTypes.find("shared");
240 if (itTypes != outputTypes.end()) {
241 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
244 itTypes = outputTypes.find("group");
245 if (itTypes != outputTypes.end()) {
246 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
249 m->mothurOutEndLine();
250 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
251 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
252 m->mothurOutEndLine();
256 catch(exception& e) {
257 m->errorOut(e, "SharedCommand", "execute");
261 //**********************************************************************************************************************
262 int SharedCommand::createSharedFromBiom(string filename) {
265 m->openOutputFile(filename, out);
268 "id":"/Users/SarahsWork/Desktop/release/temp.job2.shared-unique",
269 "format": "Biological Observation Matrix 0.9.1",
270 "format_url": "http://biom-format.org",
272 "generated_by": "mothur1.24.0",
273 "date": "Tue Apr 17 13:12:07 2012", */
276 m->openInputFile(biomfile, in);
278 m->getline(in); m->gobble(in); //grab first '{'
280 string matrixFormat = "";
283 int shapeNumRows = 0;
284 int shapeNumCols = 0;
285 vector<string> otuNames;
286 vector<string> groupNames;
289 if (m->control_pressed) { break; }
291 string line = m->getline(in); m->gobble(in);
293 string tag = getTag(line);
296 //check to make sure this is an OTU table
297 string type = getTag(line);
298 if (type != "OTU table") { m->mothurOut("[ERROR]: " + type + " is not a valid biom type for mothur. Only type allowed is OTU table.\n"); m->control_pressed = true; }
299 }else if (tag == "matrix_type") {
300 //get type and check type
301 matrixFormat = getTag(line);
302 if ((matrixFormat != "sparse") && (matrixFormat != "dense")) { m->mothurOut("[ERROR]: " + matrixFormat + " is not a valid biom matrix_type for mothur. Types allowed are sparse and dense.\n"); m->control_pressed = true; }
303 }else if (tag == "matrix_element_type") {
304 //get type and check type
305 string matrixElementType = getTag(line);
306 if (matrixElementType != "int") { m->mothurOut("[ERROR]: " + matrixElementType + " is not a valid matrix_element_type for mothur. Only type allowed is int.\n"); m->control_pressed = true; }
307 }else if (tag == "rows") {
309 otuNames = readRows(line, in, numRows);
310 }else if (tag == "columns") {
312 groupNames = readRows(line, in, numCols);
314 //if users selected groups, then remove the groups not wanted.
316 vector<string> Groups = m->getGroups();
317 vector<string> allGroups = groupNames;
318 util.setGroups(Groups, allGroups);
319 m->setGroups(Groups);
321 //fill filehandles with neccessary ofstreams
324 for (i=0; i<Groups.size(); i++) {
326 filehandles[Groups[i]] = temp;
330 fileroot = outputDir + m->getRootName(m->getSimpleName(biomfile));
332 //clears file before we start to write to it below
333 for (int i=0; i<Groups.size(); i++) {
334 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
335 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
336 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
339 }else if (tag == "shape") {
340 getDims(line, shapeNumRows, shapeNumCols);
343 if (shapeNumCols != numCols) {
344 m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumCols) + " columns, but I only read " + toString(numCols) + " columns.\n"); m->control_pressed = true;
347 if (shapeNumRows != numRows) {
348 m->mothurOut("[ERROR]: shape indicates " + toString(shapeNumRows) + " rows, but I only read " + toString(numRows) + " rows.\n"); m->control_pressed = true;
350 }else if (tag == "data") {
351 m->currentBinLabels = otuNames;
354 vector<SharedRAbundVector*> lookup = readData(matrixFormat, line, in, groupNames, otuNames.size());
356 m->mothurOutEndLine(); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
357 lookup[0]->printHeaders(out);
358 printSharedData(lookup, out);
366 catch(exception& e) {
367 m->errorOut(e, "SharedCommand", "createSharedFromBiom");
371 //**********************************************************************************************************************
372 vector<SharedRAbundVector*> SharedCommand::readData(string matrixFormat, string line, ifstream& in, vector<string>& groupNames, int numOTUs) {
375 vector<SharedRAbundVector*> lookup;
377 //creates new sharedRAbunds
378 for (int i = 0; i < groupNames.size(); i++) {
379 SharedRAbundVector* temp = new SharedRAbundVector(numOTUs); //sets all abunds to 0
380 temp->setLabel("dummy");
381 temp->setGroup(groupNames[i]);
382 lookup.push_back(temp);
385 bool dataStart = false;
386 bool inBrackets = false;
390 for (int i = 0; i < line.length(); i++) {
392 if (m->control_pressed) { return lookup; }
394 //look for opening [ to indicate data is starting
395 if ((line[i] == '[') && (!dataStart)) { dataStart = true; i++; if (!(i < line.length())) { break; } }
396 else if ((line[i] == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
399 if ((line[i] == '[') && (!inBrackets)) { inBrackets = true; i++; if (!(i < line.length())) { break; } }
400 else if ((line[i] == ']') && (inBrackets)) {
403 m->mothurConvert(num, temp);
404 nums.push_back(temp);
407 //save info to vectors
408 if (matrixFormat == "dense") {
411 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
413 //set abundances for this otu
414 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
415 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
420 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
422 //nums contains [otuNum, sampleNum, abundance]
423 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
429 if (line[i] == ',') {
431 m->mothurConvert(num, temp);
432 nums.push_back(temp);
434 }else { if (!isspace(line[i])) { num += line[i]; } }
439 //same as above just reading from file.
442 char c = in.get(); m->gobble(in);
444 if (m->control_pressed) { return lookup; }
446 //look for opening [ to indicate data is starting
447 if ((c == '[') && (!dataStart)) { dataStart = true; c = in.get(); if (in.eof()) { break; } }
448 else if ((c == ']') && dataStart && (!inBrackets)) { break; } //we are done reading data
451 if ((c == '[') && (!inBrackets)) { inBrackets = true; c = in.get(); if (in.eof()) { break; } }
452 else if ((c == ']') && (inBrackets)) {
455 m->mothurConvert(num, temp);
456 nums.push_back(temp);
459 //save info to vectors
460 if (matrixFormat == "dense") {
463 if (nums.size() != lookup.size()) { m->mothurOut("[ERROR]: trouble parsing OTU data. OTU " + toString(otuCount) + " causing errors.\n"); m->control_pressed = true; }
465 //set abundances for this otu
466 //nums contains [abundSample0, abundSample1, abundSample2, ...] for current OTU
467 for (int j = 0; j < lookup.size(); j++) { lookup[j]->set(otuCount, nums[j], groupNames[j]); }
472 if (nums.size() != 3) { m->mothurOut("[ERROR]: trouble parsing OTU data.\n"); m->control_pressed = true; }
474 //nums contains [otuNum, sampleNum, abundance]
475 lookup[nums[1]]->set(nums[0], nums[2], groupNames[nums[1]]);
483 m->mothurConvert(num, temp);
484 nums.push_back(temp);
486 }else { if (!isspace(c)) { num += c; } }
494 for (int i = 0; i < lookup.size(); i++) {
495 //if this sharedrabund is not from a group the user wants then delete it.
496 if (util.isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) {
498 delete lookup[i]; lookup[i] = NULL;
499 lookup.erase(lookup.begin()+i);
504 if (remove) { eliminateZeroOTUS(lookup); }
509 catch(exception& e) {
510 m->errorOut(e, "SharedCommand", "readData");
514 //**********************************************************************************************************************
515 int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
518 vector<SharedRAbundVector*> newLookup;
519 for (int i = 0; i < thislookup.size(); i++) {
520 SharedRAbundVector* temp = new SharedRAbundVector();
521 temp->setLabel(thislookup[i]->getLabel());
522 temp->setGroup(thislookup[i]->getGroup());
523 newLookup.push_back(temp);
527 vector<string> newBinLabels;
528 string snumBins = toString(thislookup[0]->getNumBins());
529 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
530 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
532 //look at each sharedRabund and make sure they are not all zero
534 for (int j = 0; j < thislookup.size(); j++) {
535 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
538 //if they are not all zero add this bin
540 for (int j = 0; j < thislookup.size(); j++) {
541 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
544 //if there is a bin label use it otherwise make one
545 string binLabel = "Otu";
546 string sbinNumber = toString(i+1);
547 if (sbinNumber.length() < snumBins.length()) {
548 int diff = snumBins.length() - sbinNumber.length();
549 for (int h = 0; h < diff; h++) { binLabel += "0"; }
551 binLabel += sbinNumber;
552 if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
554 newBinLabels.push_back(binLabel);
558 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
560 thislookup = newLookup;
561 m->currentBinLabels = newBinLabels;
566 catch(exception& e) {
567 m->errorOut(e, "SharedCommand", "eliminateZeroOTUS");
571 //**********************************************************************************************************************
572 int SharedCommand::getDims(string line, int& shapeNumRows, int& shapeNumCols) {
578 for (int i = 0; i < line.length(); i++) {
580 //you want to ignore any ; until you reach the next '
581 if ((line[i] == '[') && (!inBar)) { inBar = true; i++; if (!(i < line.length())) { break; } }
582 else if ((line[i] == ']') && (inBar)) {
584 m->mothurConvert(num, shapeNumCols);
589 if (line[i] == ',') {
590 m->mothurConvert(num, shapeNumRows);
592 }else { if (!isspace(line[i])) { num += line[i]; } }
598 catch(exception& e) {
599 m->errorOut(e, "SharedCommand", "getDims");
603 //**********************************************************************************************************************
604 vector<string> SharedCommand::readRows(string line, ifstream& in, int& numRows) {
607 {"id":"Otu01", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Porphyromonadaceae", "unclassified"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
608 {"id":"Otu02", "metadata":{"taxonomy":["Bacteria", "Bacteroidetes", "Bacteroidia", "Bacteroidales", "Rikenellaceae", "Alistipes"], "bootstrap":[100, 100, 100, 100, 100, 100]}},
611 vector<string> names;
612 int countOpenBrace = 0;
613 int countClosedBrace = 0;
619 for (int i = 0; i < line.length(); i++) {
621 if (m->control_pressed) { return names; }
623 if (line[i] == '[') { countOpenBrace++; }
624 else if (line[i] == ']') { countClosedBrace++; }
625 else if (line[i] == '{') { openParen++; }
626 else if (line[i] == '}') { closeParen++; }
627 else if (openParen != 0) { nextRow += line[i]; } //you are reading the row info
629 //you have reached the end of the rows info
630 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
631 if ((openParen == closeParen) && (closeParen != 0)) { //process row
633 vector<string> items;
634 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
635 string part = items[0]; items.clear();
636 m->splitAtChar(part, items, ':'); //split part we want containing the ids
637 string name = items[1];
639 //remove "" if needed
640 int pos = name.find("\"");
641 if (pos != string::npos) {
643 for (int k = 0; k < name.length(); k++) {
644 if (name[k] != '\"') { newName += name[k]; }
648 names.push_back(name);
659 if (m->control_pressed) { break; }
661 char c = in.get(); m->gobble(in);
663 if (c == '[') { countOpenBrace++; }
664 else if (c == ']') { countClosedBrace++; }
665 else if (c == '{') { openParen++; }
666 else if (c == '}') { closeParen++; }
667 else if (openParen != 0) { nextRow += c; } //you are reading the row info
670 //you have reached the end of the rows info
671 if ((countOpenBrace == countClosedBrace) && (countClosedBrace != 0)) { end = true; break; }
672 if ((openParen == closeParen) && (closeParen != 0)) { //process row
674 vector<string> items;
675 m->splitAtChar(nextRow, items, ','); //parse by comma, will return junk for metadata but we aren't using that anyway
676 string part = items[0]; items.clear();
677 m->splitAtChar(part, items, ':'); //split part we want containing the ids
678 string name = items[1];
680 //remove "" if needed
681 int pos = name.find("\"");
682 if (pos != string::npos) {
684 for (int k = 0; k < name.length(); k++) {
685 if (name[k] != '\"') { newName += name[k]; }
689 names.push_back(name);
699 catch(exception& e) {
700 m->errorOut(e, "SharedCommand", "readRows");
704 //**********************************************************************************************************************
705 //designed for things like "type": "OTU table", returns map type -> OTU table
706 string SharedCommand::getTag(string& line) {
708 bool inQuotes = false;
712 for (int i = 0; i < line.length(); i++) {
714 //you want to ignore any ; until you reach the next '
715 if ((line[i] == c) && (!inQuotes)) { inQuotes = true; }
716 else if ((line[i] == c) && (inQuotes)) {
718 line = line.substr(i+1);
722 if (inQuotes) { if (line[i] != c) { tag += line[i]; } }
727 catch(exception& e) {
728 m->errorOut(e, "SharedCommand", "getInfo");
732 //**********************************************************************************************************************
733 int SharedCommand::createSharedFromListGroup(string filename) {
736 m->openOutputFile(filename, out);
738 GroupMap* groupMap = new GroupMap(groupfile);
740 int groupError = groupMap->readMap();
741 if (groupError == 1) { delete groupMap; return 0; }
742 vector<string> allGroups = groupMap->getNamesOfGroups();
743 m->setAllGroups(allGroups);
745 pickedGroups = false;
747 //if hte user has not specified any groups then use them all
748 if (Groups.size() == 0) {
749 Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups);
750 }else { pickedGroups = true; }
752 //fill filehandles with neccessary ofstreams
755 for (i=0; i<Groups.size(); i++) {
757 filehandles[Groups[i]] = temp;
761 fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
763 //clears file before we start to write to it below
764 for (int i=0; i<Groups.size(); i++) {
765 m->mothurRemove((fileroot + Groups[i] + ".rabund"));
766 outputNames.push_back((fileroot + Groups[i] + ".rabund"));
767 outputTypes["rabund"].push_back((fileroot + Groups[i] + ".rabund"));
770 string errorOff = "no error";
772 //if user provided an order file containing the order the shared file should be in read it
773 //if (ordergroupfile != "") { readOrderFile(); }
775 InputData input(listfile, "shared");
776 SharedListVector* SharedList = input.getSharedListVector();
777 string lastLabel = SharedList->getLabel();
778 vector<SharedRAbundVector*> lookup;
780 if (m->control_pressed) {
781 delete SharedList; delete groupMap;
782 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
783 out.close(); m->mothurRemove(filename);
784 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
789 vector<string> groupMapNamesSeqs = groupMap->getNamesSeqs();
790 int error = ListGroupSameSeqs(groupMapNamesSeqs, SharedList);
792 if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error
793 m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine();
796 m->mothurRemove(filename); //remove blank shared file you made
798 createMisMatchFile(SharedList, groupMap);
801 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
805 delete SharedList; delete groupMap;
810 if (error == 1) { m->control_pressed = true; }
812 //if user has specified groups make new groupfile for them
813 if (pickedGroups) { //make new group file
815 if (m->getNumGroups() < 4) {
816 for (int i = 0; i < m->getNumGroups(); i++) {
817 groups += (m->getGroups())[i] + ".";
819 }else { groups = "merge"; }
821 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + groups + "groups";
822 outputTypes["group"].push_back(newGroupFile);
823 outputNames.push_back(newGroupFile);
825 m->openOutputFile(newGroupFile, outGroups);
827 vector<string> names = groupMap->getNamesSeqs();
829 for (int i = 0; i < names.size(); i++) {
830 groupName = groupMap->getGroup(names[i]);
831 if (isValidGroup(groupName, m->getGroups())) {
832 outGroups << names[i] << '\t' << groupName << endl;
838 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
839 set<string> processedLabels;
840 set<string> userLabels = labels;
842 while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
843 if (m->control_pressed) {
844 delete SharedList; delete groupMap;
845 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
846 out.close(); m->mothurRemove(filename);
847 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
851 if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){
853 lookup = SharedList->getSharedRAbundVector();
855 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
856 if (pickedGroups) { //check for otus with no seqs in them
857 eliminateZeroOTUS(lookup);
860 if (m->control_pressed) {
861 delete SharedList; delete groupMap;
862 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
863 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
864 out.close(); m->mothurRemove(filename);
865 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
869 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
870 printSharedData(lookup, out); //prints info to the .shared file
871 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
873 processedLabels.insert(SharedList->getLabel());
874 userLabels.erase(SharedList->getLabel());
877 if ((m->anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) == true) && (processedLabels.count(lastLabel) != 1)) {
878 string saveLabel = SharedList->getLabel();
881 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
883 lookup = SharedList->getSharedRAbundVector();
884 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
885 if (pickedGroups) { //check for otus with no seqs in them
886 eliminateZeroOTUS(lookup);
890 if (m->control_pressed) {
891 delete SharedList; delete groupMap;
892 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
893 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
894 out.close(); m->mothurRemove(filename);
895 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
899 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
900 printSharedData(lookup, out); //prints info to the .shared file
901 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
903 processedLabels.insert(SharedList->getLabel());
904 userLabels.erase(SharedList->getLabel());
906 //restore real lastlabel to save below
907 SharedList->setLabel(saveLabel);
911 lastLabel = SharedList->getLabel();
914 SharedList = input.getSharedListVector(); //get new list vector to process
917 //output error messages about any remaining user labels
918 set<string>::iterator it;
919 bool needToRun = false;
920 for (it = userLabels.begin(); it != userLabels.end(); it++) {
921 if (processedLabels.count(lastLabel) != 1) {
926 //run last label if you need to
927 if (needToRun == true) {
928 if (SharedList != NULL) { delete SharedList; }
929 SharedList = input.getSharedListVector(lastLabel); //get new list vector to process
931 lookup = SharedList->getSharedRAbundVector();
932 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
933 if (pickedGroups) { //check for otus with no seqs in them
934 eliminateZeroOTUS(lookup);
937 if (m->control_pressed) {
939 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
940 out.close(); m->mothurRemove(filename);
941 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
945 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
946 printSharedData(lookup, out); //prints info to the .shared file
947 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
953 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
959 if (m->control_pressed) {
960 m->mothurRemove(filename);
961 for (int i=0; i<Groups.size(); i++) { m->mothurRemove((fileroot + Groups[i] + ".rabund")); }
967 catch(exception& e) {
968 m->errorOut(e, "SharedCommand", "createSharedFromListGroup");
972 //**********************************************************************************************************************
973 void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup, ofstream& out) {
976 if (order.size() == 0) { //user has not specified an order so do aplabetically
977 sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
980 vector<string> Groups;
982 //initialize bin values
983 for (int i = 0; i < thislookup.size(); i++) {
984 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
985 thislookup[i]->print(out);
987 Groups.push_back(thislookup[i]->getGroup());
989 RAbundVector rav = thislookup[i]->getRAbundVector();
990 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
991 rav.print(*(filehandles[thislookup[i]->getGroup()]));
992 (*(filehandles[thislookup[i]->getGroup()])).close();
994 m->setGroups(Groups);
996 //create a map from groupName to each sharedrabund
997 map<string, SharedRAbundVector*> myMap;
998 map<string, SharedRAbundVector*>::iterator myIt;
1000 for (int i = 0; i < thislookup.size(); i++) {
1001 myMap[thislookup[i]->getGroup()] = thislookup[i];
1005 vector<string> Groups;
1007 //loop through ordered list and print the rabund
1008 for (int i = 0; i < order.size(); i++) {
1009 myIt = myMap.find(order[i]);
1011 if(myIt != myMap.end()) { //we found it
1012 out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
1013 (myIt->second)->print(out);
1015 Groups.push_back((myIt->second)->getGroup());
1017 RAbundVector rav = (myIt->second)->getRAbundVector();
1018 m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
1019 rav.print(*(filehandles[(myIt->second)->getGroup()]));
1020 (*(filehandles[(myIt->second)->getGroup()])).close();
1022 m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine();
1026 m->setGroups(Groups);
1031 catch(exception& e) {
1032 m->errorOut(e, "SharedCommand", "printSharedData");
1036 //**********************************************************************************************************************
1037 int SharedCommand::createMisMatchFile(SharedListVector* SharedList, GroupMap* groupMap) {
1039 ofstream outMisMatch;
1040 string outputMisMatchName = outputDir + m->getRootName(m->getSimpleName(listfile));
1042 //you have sequences in your list file that are not in your group file
1043 if (SharedList->getNumSeqs() > groupMap->getNumSeqs()) {
1044 outputMisMatchName += "missing.group";
1045 m->mothurOut("For a list of names that are in your list file and not in your group file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
1047 m->openOutputFile(outputMisMatchName, outMisMatch);
1049 set<string> listNames;
1050 set<string>::iterator itList;
1052 //go through list and if group returns "not found" output it
1053 for (int i = 0; i < SharedList->getNumBins(); i++) {
1054 if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
1056 string names = SharedList->get(i);
1058 vector<string> binNames;
1059 m->splitAtComma(names, binNames);
1061 for (int j = 0; j < binNames.size(); j++) {
1062 string name = binNames[j];
1063 string group = groupMap->getGroup(name);
1065 if(group == "not found") { outMisMatch << name << endl; }
1067 itList = listNames.find(name);
1068 if (itList != listNames.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
1069 else { listNames.insert(name); }
1073 outMisMatch.close();
1076 }else {//you have sequences in your group file that are not in you list file
1078 outputMisMatchName += "missing.name";
1079 m->mothurOut("For a list of names that are in your group file and not in your list file, please refer to " + outputMisMatchName + "."); m->mothurOutEndLine();
1081 map<string, string> namesInList;
1082 map<string, string>::iterator itList;
1084 //go through listfile and get names
1085 for (int i = 0; i < SharedList->getNumBins(); i++) {
1086 if (m->control_pressed) { return 0; }
1089 string names = SharedList->get(i);
1091 vector<string> binNames;
1092 m->splitAtComma(names, binNames);
1094 for (int j = 0; j < binNames.size(); j++) {
1096 string name = binNames[j];
1098 itList = namesInList.find(name);
1099 if (itList != namesInList.end()) { m->mothurOut(name + " is in your list file more than once. Sequence names must be unique. please correct."); m->mothurOutEndLine(); }
1101 namesInList[name] = name;
1106 //get names of sequences in groupfile
1107 vector<string> seqNames = groupMap->getNamesSeqs();
1109 map<string, string>::iterator itMatch;
1111 m->openOutputFile(outputMisMatchName, outMisMatch);
1113 //loop through names in seqNames and if they aren't in namesIn list output them
1114 for (int i = 0; i < seqNames.size(); i++) {
1115 if (m->control_pressed) { outMisMatch.close(); m->mothurRemove(outputMisMatchName); return 0; }
1117 itMatch = namesInList.find(seqNames[i]);
1119 if (itMatch == namesInList.end()) {
1121 outMisMatch << seqNames[i] << endl;
1124 outMisMatch.close();
1129 catch(exception& e) {
1130 m->errorOut(e, "SharedCommand", "createMisMatchFile");
1134 //**********************************************************************************************************************
1135 int SharedCommand::ListGroupSameSeqs(vector<string>& groupMapsSeqs, SharedListVector* SharedList) {
1139 set<string> groupNamesSeqs;
1140 for(int i = 0; i < groupMapsSeqs.size(); i++) {
1141 groupNamesSeqs.insert(groupMapsSeqs[i]);
1144 //go through list and if group returns "not found" output it
1145 for (int i = 0; i < SharedList->getNumBins(); i++) {
1146 if (m->control_pressed) { return 0; }
1148 string names = SharedList->get(i);
1150 vector<string> listNames;
1151 m->splitAtComma(names, listNames);
1153 for (int j = 0; j < listNames.size(); j++) {
1154 int num = groupNamesSeqs.count(listNames[j]);
1156 if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); }
1157 else { groupNamesSeqs.erase(listNames[j]); }
1161 for (set<string>::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) {
1163 m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine();
1168 catch(exception& e) {
1169 m->errorOut(e, "SharedCommand", "ListGroupSameSeqs");
1173 //**********************************************************************************************************************
1175 SharedCommand::~SharedCommand(){
1180 //**********************************************************************************************************************
1181 int SharedCommand::readOrderFile() {
1187 m->openInputFile(ordergroupfile, in);
1191 in >> thisGroup; m->gobble(in);
1193 order.push_back(thisGroup);
1195 if (m->control_pressed) { order.clear(); break; }
1201 catch(exception& e) {
1202 m->errorOut(e, "SharedCommand", "readOrderFile");
1206 //**********************************************************************************************************************
1208 bool SharedCommand::isValidGroup(string groupname, vector<string> groups) {
1210 for (int i = 0; i < groups.size(); i++) {
1211 if (groupname == groups[i]) { return true; }
1216 catch(exception& e) {
1217 m->errorOut(e, "SharedCommand", "isValidGroup");
1221 /************************************************************/