]> git.donarmstrong.com Git - mothur.git/blob - getsharedotucommand.cpp
fixes while testing 1.33.0
[mothur.git] / getsharedotucommand.cpp
1 /*
2  *  getsharedotucommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/22/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getsharedotucommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> GetSharedOTUCommand::setParameters(){    
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "sharedFasta", "none", "none","fasta",false,false); parameters.push_back(pfasta);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "groupList","",false,false,true); parameters.push_back(pgroup);
18                 CommandParameter plist("list", "InputTypes", "", "", "sharedList", "sharedList", "groupList","sharedseq",false,false,true); parameters.push_back(plist);
19         CommandParameter pshared("shared", "InputTypes", "", "", "sharedList-sharedFasta", "sharedList", "none","sharedseq",false,false,true); parameters.push_back(pshared);
20                 CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter puniquegroups("uniquegroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(puniquegroups);
23                 CommandParameter psharedgroups("sharedgroups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(psharedgroups);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "GetSharedOTUCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string GetSharedOTUCommand::getHelpString(){    
38         try {
39                 string helpString = "";
40                 helpString += "The get.sharedseqs command parameters are list, group, shared, label, uniquegroups, sharedgroups, output and fasta.  The list and group or shared parameters are required, unless you have valid current files.\n";
41                 helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
42                 helpString += "The uniquegroups and sharedgroups parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
43                 helpString += "If you enter your groups under the uniquegroups parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
44                 helpString += "If you enter your groups under the sharedgroups parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
45                 helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group or shared file.\n";
46                 helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified. It can only be used with a list and group file not the shared file input.\n";
47                 helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
48                 helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
49                 helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
50                 helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, uniquegroups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
51                 helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=amazon.groups, uniquegroups=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
52                 helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
53                 helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
54                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
55                 return helpString;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "GetSharedOTUCommand", "getHelpString");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 string GetSharedOTUCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66
67         if (type == "fasta")            {   pattern =  "[filename],[distance],[group],shared.fasta";   }
68         else if (type == "accnos")      {   pattern =  "[filename],[distance],[group],accnos";         }
69         else if (type == "sharedseqs")  {   pattern =  "[filename],[distance],[group],shared.seqs";    }
70         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
71         
72         return pattern;
73     }
74     catch(exception& e) {
75         m->errorOut(e, "GetSharedOTUCommand", "getOutputPattern");
76         exit(1);
77     }
78 }
79 //**********************************************************************************************************************
80 GetSharedOTUCommand::GetSharedOTUCommand(){     
81         try {
82                 abort = true; calledHelp = true;
83                 setParameters();
84                 vector<string> tempOutNames;
85                 outputTypes["fasta"] = tempOutNames;
86                 outputTypes["accnos"] = tempOutNames;
87                 outputTypes["sharedseqs"] = tempOutNames;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "GetSharedOTUCommand", "GetSharedOTUCommand");
91                 exit(1);
92         }
93 }
94 //**********************************************************************************************************************
95 GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
96         try {
97         
98                 abort = false; calledHelp = false;   
99                 unique = true;
100                 allLines = 1;
101                 
102                 //allow user to run help
103                 if(option == "help") { help(); abort = true; calledHelp = true; }
104                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
105                 
106                 else {
107                         vector<string> myArray = setParameters();
108                         
109                         OptionParser parser(option);
110                         map<string,string> parameters = parser.getParameters();
111                         
112                         ValidParameters validParameter;
113                         map<string,string>::iterator it;
114                         
115                         //check to make sure all parameters are valid for command
116                         for (it = parameters.begin(); it != parameters.end(); it++) { 
117                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
118                         }
119                         
120                         //initialize outputTypes
121                         vector<string> tempOutNames;
122                         outputTypes["fasta"] = tempOutNames;
123                         outputTypes["accnos"] = tempOutNames;
124                         outputTypes["sharedseqs"] = tempOutNames;
125                         
126                         //if the user changes the output directory command factory will send this info to us in the output parameter 
127                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
128                         
129                         //if the user changes the input directory command factory will send this info to us in the output parameter 
130                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
131                         if (inputDir == "not found"){   inputDir = "";          }
132                         else {
133                                 string path;
134                                 it = parameters.find("fasta");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
140                                 }
141                                 
142                                 it = parameters.find("list");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
148                                 }
149                 
150                 it = parameters.find("shared");
151                                 //user has given a template file
152                                 if(it != parameters.end()){
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
156                                 }
157                                 
158                                 it = parameters.find("group");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
164                                 }
165                         }
166
167                         
168                         //check for required parameters
169                         listfile = validParameter.validFile(parameters, "list", true);
170                         if (listfile == "not open") { abort = true; }
171                         else if (listfile == "not found") { listfile = "";                      }
172             else {  format = "list";    m->setListFile(listfile); }
173                         
174                         groupfile = validParameter.validFile(parameters, "group", true);
175                         if (groupfile == "not open") { abort = true; }  
176                         else if (groupfile == "not found") { groupfile = ""; }
177                         else { m->setGroupFile(groupfile); }
178             
179             sharedfile = validParameter.validFile(parameters, "shared", true);
180                         if (sharedfile == "not open") { abort = true; }
181                         else if (sharedfile == "not found") { sharedfile = ""; }
182                         else { m->setSharedFile(sharedfile); }
183             
184             fastafile = validParameter.validFile(parameters, "fasta", true);
185                         if (fastafile == "not open") { abort = true; }
186                         else if (fastafile == "not found") {  fastafile = "";  }
187                         else { m->setFastaFile(fastafile); }
188
189             
190             if ((sharedfile == "") && (listfile == "")) { //look for currents
191                 //is there are current file available for either of these?
192                                 //give priority to shared, then list
193                                 sharedfile = m->getSharedFile();
194                                 if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
195                                 else {
196                                         listfile = m->getListFile();
197                                         if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
198                                         else {
199                                                 m->mothurOut("No valid current files. You must provide a shared or list file."); m->mothurOutEndLine();
200                                                 abort = true;
201                                         }
202                                 }
203             }else if ((sharedfile != "") && (listfile != "")) {
204                 m->mothurOut("You may enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true;
205             }
206                         
207             if (listfile != "") {
208                                 if (groupfile == ""){
209                                         groupfile = m->getGroupFile();
210                                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
211                                         else { m->mothurOut("You need to provide a group file if you are going to use the list format."); m->mothurOutEndLine(); abort = true;
212                                         }
213                                 }
214                         }
215
216                         if ((sharedfile != "") && (fastafile != "")) { m->mothurOut("You cannot use the fasta file with the shared file."); m->mothurOutEndLine(); abort = true; }
217             
218                         //check for optional parameter and set defaults
219                         // ...at some point should added some additional type checking...
220                         label = validParameter.validFile(parameters, "label", false);                   
221                         if (label == "not found") { label = ""; }
222                         else { 
223                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
224                                 else { allLines = 1;  }
225                         }
226                         
227                         output = validParameter.validFile(parameters, "output", false);                 
228                         if (output == "not found") { output = ""; }
229                         else if (output == "default") { output = ""; }
230                         
231                         groups = validParameter.validFile(parameters, "uniquegroups", false);                   
232                         if (groups == "not found") { groups = ""; }
233                         else { 
234                                 userGroups = "unique." + groups;
235                                 m->splitAtDash(groups, Groups);
236                 if (Groups.size() > 4) {  userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big.
237                         }
238                         
239                         groups = validParameter.validFile(parameters, "sharedgroups", false);                   
240                         if (groups == "not found") { groups = "";  }
241                         else { 
242                                 userGroups = groups;
243                                 m->splitAtDash(groups, Groups);
244                 if (Groups.size() > 4) {  userGroups = "selected_groups"; } //if too many groups then the filename becomes too big.
245                                 unique = false;
246                         }
247                         
248         }
249
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "GetSharedOTUCommand", "GetSharedOTUCommand");
253                 exit(1);
254         }
255 }
256 //**********************************************************************************************************************
257
258 int GetSharedOTUCommand::execute(){
259         try {
260                 
261                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
262                 
263         if ( sharedfile != "") { runShared(); }
264         else {
265             m->setGroups(Groups);
266             groupMap = new GroupMap(groupfile);
267             int error = groupMap->readMap();
268             if (error == 1) { delete groupMap; return 0; }
269             
270             if (m->control_pressed) { delete groupMap; return 0; }
271             
272             if (Groups.size() == 0) {
273                 Groups = groupMap->getNamesOfGroups();
274                 
275                 //make string for outputfile name
276                 userGroups = "unique.";
277                 for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
278                 userGroups = userGroups.substr(0, userGroups.length()-1);
279                 if (Groups.size() > 4) {  userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big.
280             }else{
281                 //sanity check for group names
282                 SharedUtil util;
283                 vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
284                 util.setGroups(Groups, namesOfGroups);
285                 groupMap->setNamesOfGroups(namesOfGroups);
286             }
287         
288             //put groups in map to find easier
289             for(int i = 0; i < Groups.size(); i++) {
290                 groupFinder[Groups[i]] = Groups[i];
291             }
292             
293             if (fastafile != "") {
294                 ifstream inFasta;
295                 m->openInputFile(fastafile, inFasta);
296                 
297                 while(!inFasta.eof()) {
298                     if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
299                     
300                     Sequence seq(inFasta); m->gobble(inFasta);
301                     if (seq.getName() != "") {  seqs.push_back(seq);   }
302                 }
303                 inFasta.close();
304             }
305             
306             ListVector* lastlist = NULL;
307             string lastLabel = "";
308             
309             //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
310             set<string> processedLabels;
311             set<string> userLabels = labels;
312             
313             ifstream in;
314             m->openInputFile(listfile, in);
315             
316             //as long as you are not at the end of the file or done wih the lines you want
317             while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
318                 
319                 if (m->control_pressed) { 
320                     if (lastlist != NULL) {             delete lastlist;        }
321                     for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); }  outputTypes.clear();
322                     delete groupMap; return 0;
323                 }
324                 
325                 list = new ListVector(in);
326                 
327                 if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
328                     m->mothurOut(list->getLabel()); 
329                     process(list);
330                     
331                     processedLabels.insert(list->getLabel());
332                     userLabels.erase(list->getLabel());
333                 }
334                 
335                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
336                         string saveLabel = list->getLabel();
337                         
338                         m->mothurOut(lastlist->getLabel()); 
339                         process(lastlist);
340                         
341                         processedLabels.insert(lastlist->getLabel());
342                         userLabels.erase(lastlist->getLabel());
343                         
344                         //restore real lastlabel to save below
345                         list->setLabel(saveLabel);
346                 }
347
348                 lastLabel = list->getLabel();
349                 
350                 if (lastlist != NULL) {         delete lastlist;        }
351                 lastlist = list;                        
352             }
353             
354             in.close();
355             
356             //output error messages about any remaining user labels
357             set<string>::iterator it;
358             bool needToRun = false;
359             for (it = userLabels.begin(); it != userLabels.end(); it++) {  
360                 m->mothurOut("Your file does not include the label " + *it); 
361                 if (processedLabels.count(lastLabel) != 1) {
362                     m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
363                     needToRun = true;
364                 }else {
365                     m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
366                 }
367             }
368             
369             //run last label if you need to
370             if (needToRun == true)  {
371                     m->mothurOut(lastlist->getLabel()); 
372                     process(lastlist);
373                         
374                     processedLabels.insert(lastlist->getLabel());
375                     userLabels.erase(lastlist->getLabel());
376             }
377             
378
379             //reset groups parameter
380             m->clearGroups();  
381             
382             if (lastlist != NULL) {             delete lastlist;        }
383             
384             if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
385                 }
386                 //set fasta file as new current fastafile
387                 string current = "";
388                 itTypes = outputTypes.find("fasta");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
391                 }
392                 
393                 if (output == "accnos") {
394                         itTypes = outputTypes.find("accnos");
395                         if (itTypes != outputTypes.end()) {
396                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
397                         }
398                 }
399                 
400                 m->mothurOutEndLine();
401                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
402                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
403                 m->mothurOutEndLine();
404
405
406                 return 0;
407         }
408
409         catch(exception& e) {
410                 m->errorOut(e, "GetSharedOTUCommand", "execute");
411                 exit(1);
412         }
413 }
414 /***********************************************************/
415 int GetSharedOTUCommand::process(ListVector* shared) {
416         try {
417                 
418                 map<string, string> fastaMap;
419                 
420                 ofstream outNames;
421                 string outputFileNames;
422                 
423                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
424         map<string, string> variables;
425         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
426         variables["[distance]"] = shared->getLabel();
427         variables["[group]"] = userGroups;
428                 if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
429                 else { outputFileNames = getOutputFileName("accnos", variables); }
430         
431                 m->openOutputFile(outputFileNames, outNames);
432                 
433                 bool wroteSomething = false;
434                 int num = 0;
435                                 
436                 //go through each bin, find out if shared
437         vector<string> binLabels = shared->getLabels();
438                 for (int i = 0; i < shared->getNumBins(); i++) {
439                         if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
440                         
441                         bool uniqueOTU = true;
442                         
443                         map<string, int> atLeastOne;
444                         for (int f = 0; f < Groups.size(); f++) {
445                                 atLeastOne[Groups[f]] = 0;
446                         }
447                         
448                         vector<string> namesOfSeqsInThisBin;
449                         
450                         string names = shared->get(i); 
451             vector<string> binNames;
452             m->splitAtComma(names, binNames);
453                         for(int j = 0; j < binNames.size(); j++) {
454                                 string name = binNames[j];
455                                 
456                                 //find group
457                                 string seqGroup = groupMap->getGroup(name);
458                                 if (output != "accnos") {
459                                         namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + binLabels[i]));
460                                 }else {  namesOfSeqsInThisBin.push_back(name);  }
461                                 
462                                 if (seqGroup == "not found") { m->mothurOut(name + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1);  }
463                                 
464                                 //is this seq in one of hte groups we care about
465                                 it = groupFinder.find(seqGroup);
466                                 if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have a sequence from a group you don't want
467                                 else {  atLeastOne[seqGroup]++;  }
468                         }
469                         
470                         //make sure you have at least one seq from each group you want
471                         bool sharedByAll = true;
472                         map<string, int>::iterator it2;
473                         for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) {
474                                 if (it2->second == 0) {  sharedByAll = false;   }
475                         }
476                         
477                         //if the user wants unique bins and this is unique then print
478                         //or this the user wants shared bins and this bin is shared then print
479                         if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) {
480                                 
481                                 wroteSomething = true;
482                                 num++;
483                                 
484                                 //output list of names 
485                                 for (int j = 0; j < namesOfSeqsInThisBin.size(); j++) {
486                                         outNames << namesOfSeqsInThisBin[j] << endl;
487                                         
488                                         if (fastafile != "") { 
489                                                 if (output != "accnos") {
490                                                         string seqName = namesOfSeqsInThisBin[j].substr(0,namesOfSeqsInThisBin[j].find_last_of('|'));
491                                                         seqName = seqName.substr(0,seqName.find_last_of('|'));
492                                                         fastaMap[seqName] = namesOfSeqsInThisBin[j];  //fastaMap needs to contain just the seq name for output later
493                                                 }else {
494                                                         fastaMap[namesOfSeqsInThisBin[j]] = namesOfSeqsInThisBin[j];
495                                                 }
496                                         }
497                                 }
498                         }
499                 }
500                 
501                 outNames.close();
502                 
503                 if (!wroteSomething) {
504                         m->mothurRemove(outputFileNames);
505                         string outputString = "\t" + toString(num) + " - No otus shared by groups";
506                         
507                         string groupString = "";
508                         for (int h = 0; h < Groups.size(); h++) {
509                                 groupString += "  " + Groups[h];
510                         }
511                         
512                         outputString += groupString + ".";
513                         m->mothurOut(outputString); m->mothurOutEndLine();
514                 }else { 
515                         m->mothurOut("\t" + toString(num)); m->mothurOutEndLine(); 
516                         outputNames.push_back(outputFileNames);
517                         if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); }
518                         else { outputTypes["accnos"].push_back(outputFileNames); }
519                 }
520                 
521                 //if fasta file provided output new fasta file
522                 if ((fastafile != "") && wroteSomething) {
523                         if (outputDir == "") { outputDir += m->hasPath(fastafile); }
524             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
525                         string outputFileFasta = getOutputFileName("fasta", variables);
526                         ofstream outFasta;
527                         m->openOutputFile(outputFileFasta, outFasta);
528                         outputNames.push_back(outputFileFasta); outputTypes["fasta"].push_back(outputFileFasta);
529                         
530                         for (int k = 0; k < seqs.size(); k++) {
531                                 if (m->control_pressed) { outFasta.close(); return 0; }
532                         
533                                 //if this is a sequence we want, output it
534                                 it = fastaMap.find(seqs[k].getName());
535                                 if (it != fastaMap.end()) {
536                                 
537                                         if (output != "accnos") {
538                                                 outFasta << ">" << it->second << endl;
539                                         }else {
540                                                 outFasta << ">" << it->first << endl;
541                                         }
542                                         
543                                         outFasta << seqs[k].getAligned() << endl;
544                                 }
545                         }
546                         
547                         outFasta.close();
548                 }
549                 
550                 return 0;
551                 
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "GetSharedOTUCommand", "process");
555                 exit(1);
556         }
557 }
558 /***********************************************************/
559 int GetSharedOTUCommand::runShared() {
560         try {
561         InputData input(sharedfile, "sharedfile");
562                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
563                 string lastLabel = lookup[0]->getLabel();
564         
565         if (Groups.size() == 0) {
566             Groups = m->getGroups();
567             
568             //make string for outputfile name
569             userGroups = "unique.";
570             for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
571             userGroups = userGroups.substr(0, userGroups.length()-1);
572             if (Groups.size() > 4) {  userGroups = "unique.selected_groups"; } //if too many groups then the filename becomes too big.
573         }else {
574             //sanity check for group names
575             SharedUtil util;
576             vector<string> allGroups = m->getAllGroups();
577             util.setGroups(Groups, allGroups);
578         }
579         
580         //put groups in map to find easier
581         for(int i = 0; i < Groups.size(); i++) {
582             groupFinder[Groups[i]] = Groups[i];
583         }
584
585                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
586                 set<string> processedLabels;
587                 set<string> userLabels = labels;
588         
589                 //as long as you are not at the end of the file or done wih the lines you want
590                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
591                         if (m->control_pressed) {
592                 outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); }
593                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
594                         }
595             
596             
597                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
598                                 m->mothurOut(lookup[0]->getLabel()); 
599                                 process(lookup);
600                                 
601                                 processedLabels.insert(lookup[0]->getLabel());
602                                 userLabels.erase(lookup[0]->getLabel());
603                         }
604                         
605                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
606                 string saveLabel = lookup[0]->getLabel();
607                 
608                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
609                 lookup = input.getSharedRAbundVectors(lastLabel);
610                 
611                 m->mothurOut(lookup[0]->getLabel()); 
612                 process(lookup);
613                 
614                 processedLabels.insert(lookup[0]->getLabel());
615                 userLabels.erase(lookup[0]->getLabel());
616                 
617                 //restore real lastlabel to save below
618                 lookup[0]->setLabel(saveLabel);
619                         }
620                         
621                         lastLabel = lookup[0]->getLabel();
622             
623                         //get next line to process
624                         //prevent memory leak
625                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
626                         lookup = input.getSharedRAbundVectors();
627                 }
628                 
629                 if (m->control_pressed) {
630             outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
631             for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
632         }
633         
634                 //output error messages about any remaining user labels
635                 set<string>::iterator it;
636                 bool needToRun = false;
637                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
638                         m->mothurOut("Your file does not include the label " + *it);
639                         if (processedLabels.count(lastLabel) != 1) {
640                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
641                                 needToRun = true;
642                         }else {
643                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
644                         }
645                 }
646                 
647                 //run last label if you need to
648                 if (needToRun == true)  {
649             for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) { delete lookup[i];       } }
650             lookup = input.getSharedRAbundVectors(lastLabel);
651             
652             m->mothurOut(lookup[0]->getLabel()); 
653             process(lookup);
654             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
655                 }
656         
657                 //reset groups parameter
658                 m->clearGroups();  
659                 
660                 return 0;
661
662     }
663         catch(exception& e) {
664                 m->errorOut(e, "GetSharedOTUCommand", "runShared");
665                 exit(1);
666         }
667 }
668 /***********************************************************/
669 int GetSharedOTUCommand::process(vector<SharedRAbundVector*>& lookup) {
670         try {
671                 
672                 string outputFileNames;
673                 if (outputDir == "") { outputDir += m->hasPath(sharedfile); }
674         map<string, string> variables;
675         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
676         variables["[distance]"] = lookup[0]->getLabel();
677         variables["[group]"] = userGroups;
678                 if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
679                 else { outputFileNames = getOutputFileName("accnos", variables); }
680         
681         ofstream outNames;
682                 m->openOutputFile(outputFileNames, outNames);
683                 
684                 bool wroteSomething = false;
685                 int num = 0;
686         
687                 //go through each bin, find out if shared
688                 for (int i = 0; i < lookup[0]->getNumBins(); i++) {
689                         if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
690                         
691                         bool uniqueOTU = true;
692                         map<string, int> atLeastOne;
693                         for (int f = 0; f < Groups.size(); f++) {  atLeastOne[Groups[f]] = 0;  }
694                         
695                         set<string> namesOfGroupsInThisBin;
696                         
697                         for(int j = 0; j < lookup.size(); j++) {
698                                 string seqGroup = lookup[j]->getGroup();
699                 string name = m->currentSharedBinLabels[i];
700                                 
701                 if (lookup[j]->getAbundance(i) != 0) {
702                     if (output != "accnos") {
703                         namesOfGroupsInThisBin.insert(name + "|" + seqGroup + "|" + toString(lookup[j]->getAbundance(i)));
704                     }else {  namesOfGroupsInThisBin.insert(name);       }
705                     
706                     //is this seq in one of the groups we care about
707                     it = groupFinder.find(seqGroup);
708                     if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have sequences from a group you don't want
709                     else {  atLeastOne[seqGroup]++;  }
710                                 }
711                         }
712                         
713                         //make sure you have at least one seq from each group you want
714                         bool sharedByAll = true;
715                         map<string, int>::iterator it2;
716                         for (it2 = atLeastOne.begin(); it2 != atLeastOne.end(); it2++) {
717                                 if (it2->second == 0) {  sharedByAll = false;   }
718                         }
719                         
720                         //if the user wants unique bins and this is unique then print
721                         //or this the user wants shared bins and this bin is shared then print
722                         if ((unique && uniqueOTU && sharedByAll) || (!unique && sharedByAll)) {
723                                 
724                                 wroteSomething = true;
725                                 num++;
726                                 
727                                 //output list of names
728                                 for (set<string>::iterator itNames = namesOfGroupsInThisBin.begin(); itNames != namesOfGroupsInThisBin.end(); itNames++) {
729                                         outNames << (*itNames) << endl;
730                                 }
731                         }
732                 }
733                 outNames.close();
734                 
735                 if (!wroteSomething) {
736                         m->mothurRemove(outputFileNames);
737                         string outputString = "\t" + toString(num) + " - No otus shared by groups";
738                         
739                         string groupString = "";
740                         for (int h = 0; h < Groups.size(); h++) {
741                                 groupString += "  " + Groups[h];
742                         }
743                         
744                         outputString += groupString + ".";
745                         m->mothurOut(outputString); m->mothurOutEndLine();
746                 }else {
747                         m->mothurOut("\t" + toString(num)); m->mothurOutEndLine();
748                         outputNames.push_back(outputFileNames);
749                         if (output != "accnos") { outputTypes["sharedseqs"].push_back(outputFileNames); }
750                         else { outputTypes["accnos"].push_back(outputFileNames); }
751                 }
752                 
753                 return 0;
754         }
755         catch(exception& e) {
756                 m->errorOut(e, "GetSharedOTUCommand", "process");
757                 exit(1);
758         }
759 }
760
761 //**********************************************************************************************************************