]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){ 
14         try {
15                 string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){ 
26         try {
27                 abort = true; calledHelp = true; 
28                 vector<string> tempOutNames;
29                 outputTypes["list"] = tempOutNames;
30                 outputTypes["name"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32                 outputTypes["group"] = tempOutNames;
33                 outputTypes["fasta"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SplitAbundCommand::getRequiredParameters(){      
42         try {
43                 string Array[] =  {"fasta","list","name","or"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SplitAbundCommand::getRequiredFiles(){   
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 SplitAbundCommand::SplitAbundCommand(string option)  {
65         try {
66                 abort = false; calledHelp = false;   
67                 allLines = 1;
68                         
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; calledHelp = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string, string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string, string>::iterator it;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["list"] = tempOutNames;
91                         outputTypes["name"] = tempOutNames;
92                         outputTypes["accnos"] = tempOutNames;
93                         outputTypes["group"] = tempOutNames;
94                         outputTypes["fasta"] = tempOutNames;                    
95                                                                                                 
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("list");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("group");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
115                                 }
116                                 
117                                 it = parameters.find("fasta");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
123                                 }
124                                 
125                                 it = parameters.find("name");
126                                 //user has given a template file
127                                 if(it != parameters.end()){ 
128                                         path = m->hasPath(it->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
131                                 }
132
133                         }
134
135                         
136                         //if the user changes the output directory command factory will send this info to us in the output parameter 
137                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
138
139                         //check for required parameters
140                         listfile = validParameter.validFile(parameters, "list", true);
141                         if (listfile == "not open") { abort = true; }
142                         else if (listfile == "not found") { listfile = ""; }
143                         else{ inputFile = listfile; }   
144                         
145                         namefile = validParameter.validFile(parameters, "name", true);
146                         if (namefile == "not open") { abort = true; }
147                         else if (namefile == "not found") { namefile = ""; }    
148                         else{ inputFile = namefile; }   
149                 
150                         fastafile = validParameter.validFile(parameters, "fasta", true);
151                         if (fastafile == "not open") { abort = true; }
152                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
153                         
154                         groupfile = validParameter.validFile(parameters, "group", true);
155                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
156                         else if (groupfile == "not found") { groupfile = ""; }
157                         else {  
158                                 groupMap = new GroupMap(groupfile);
159                                 
160                                 int error = groupMap->readMap();
161                                 if (error == 1) { abort = true; }
162         
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);         
166                         if (groups == "not found") { groups = ""; }
167                         else if (groups == "all") { 
168                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
169                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
170                         }else { 
171                                 m->splitAtDash(groups, Groups);
172                         }
173                         
174                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
175                         
176                         //do you have all files needed
177                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = "";  allLines = 1; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
189                         accnos = m->isTrue(temp); 
190                         
191                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
192                         convert(temp, cutoff); 
193
194                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
195
196                 }
197
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205 void SplitAbundCommand::help(){
206         try {
207                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
208                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
209                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
210                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
211                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
212                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
213                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
214                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
215                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
216                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
217                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
218                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
219                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "SplitAbundCommand", "help");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228 SplitAbundCommand::~SplitAbundCommand(){ 
229         if (groupfile != "") {  delete groupMap;  } 
230 }
231 //**********************************************************************************************************************
232 int SplitAbundCommand::execute(){
233         try {
234         
235                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
236                 
237                 if (listfile != "") { //you are using a listfile to determine abundance
238                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
239                         
240                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
241                         set<string> processedLabels;
242                         set<string> userLabels = labels;        
243                         
244                         input = new InputData(listfile, "list");
245                         list = input->getListVector();
246                         string lastLabel = list->getLabel();
247                         
248                         //do you have a namefile or do we need to similate one?
249                         if (namefile != "") {  readNamesFile();         }
250                         else                            { createNameMap(list);  }
251                         
252                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
253                         
254                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
255                         
256                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
257                                 
258                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
259                                                 
260                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
261                                                 splitList(list);
262                                                                                         
263                                                 processedLabels.insert(list->getLabel());
264                                                 userLabels.erase(list->getLabel());
265                                 }
266                                 
267                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
268                                                 string saveLabel = list->getLabel();
269                                                 
270                                                 delete list;
271                                                 list = input->getListVector(lastLabel); //get new list vector to process
272                                                 
273                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
274                                                 splitList(list);
275                                                 
276                                                 processedLabels.insert(list->getLabel());
277                                                 userLabels.erase(list->getLabel());
278                                                 
279                                                 //restore real lastlabel to save below
280                                                 list->setLabel(saveLabel);
281                                 }
282                                 
283                         
284                                 lastLabel = list->getLabel();
285                                         
286                                 delete list;
287                                 list = input->getListVector(); //get new list vector to process
288                         }
289                         
290                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
291                         
292                         //output error messages about any remaining user labels
293                         set<string>::iterator it;
294                         bool needToRun = false;
295                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
296                                 m->mothurOut("Your file does not include the label " + *it); 
297                                 if (processedLabels.count(lastLabel) != 1) {
298                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
299                                         needToRun = true;
300                                 }else {
301                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
302                                 }
303
304                         }
305                         
306                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
307                         
308                         //run last label if you need to
309                         if (needToRun == true)  {
310                                 if (list != NULL) {     delete list;    }
311                                 list = input->getListVector(lastLabel); //get new list vector to process
312                                 
313                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
314                                 splitList(list);                
315                                 
316                                 delete list;
317                         }
318                         
319                         delete input;
320                         
321                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
322                                                                         
323                 }else { //you are using the namefile to determine abundance
324                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
325                         
326                         splitNames(); 
327                         writeNames();
328                         
329                         string tag = "";
330                         if (groupfile != "")                            {  parseGroup(tag);             }
331                         if (accnos)                                                     {  writeAccnos(tag);    }
332                         if (fastafile != "")                            {  parseFasta(tag);             }
333                 }
334                 
335                 //set fasta file as new current fastafile
336                 string current = "";
337                 itTypes = outputTypes.find("fasta");
338                 if (itTypes != outputTypes.end()) {
339                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
340                 }
341                 
342                 itTypes = outputTypes.find("name");
343                 if (itTypes != outputTypes.end()) {
344                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
345                 }
346                 
347                 itTypes = outputTypes.find("group");
348                 if (itTypes != outputTypes.end()) {
349                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
350                 }
351                 
352                 itTypes = outputTypes.find("list");
353                 if (itTypes != outputTypes.end()) {
354                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
355                 }
356                 
357                 itTypes = outputTypes.find("accnos");
358                 if (itTypes != outputTypes.end()) {
359                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
360                 }
361                 
362                 m->mothurOutEndLine();
363                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
364                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
365                 m->mothurOutEndLine();
366                 
367                 return 0;
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "SplitAbundCommand", "execute");
371                 exit(1);
372         }
373 }
374 /**********************************************************************************************************************/
375 int SplitAbundCommand::splitList(ListVector* thisList) {
376         try {
377                 rareNames.clear();
378                 abundNames.clear();
379                 
380                 //get rareNames and abundNames
381                 for (int i = 0; i < thisList->getNumBins(); i++) {
382                         if (m->control_pressed) { return 0; }
383                         
384                         string bin = thisList->get(i);
385                                                 
386                         vector<string> names;
387                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
388                         int size = names.size();
389                                 
390                         if (size <= cutoff) {
391                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
392                         }else{
393                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
394                         }
395                 }//end for
396
397                 
398                 string tag = thisList->getLabel() + ".";
399                 
400                 writeList(thisList, tag);
401                 
402                 if (groupfile != "")                            {  parseGroup(tag);             }
403                 if (accnos)                                                     {  writeAccnos(tag);    }
404                 if (fastafile != "")                            {  parseFasta(tag);             }
405                 
406                 return 0;
407
408         }
409         catch(exception& e) {
410                 m->errorOut(e, "SplitAbundCommand", "splitList");
411                 exit(1);
412         }
413 }
414 /**********************************************************************************************************************/
415 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
416         try {
417                 
418                 map<string, ofstream*> filehandles;
419                 
420                 if (Groups.size() == 0) {
421                         SAbundVector* sabund = new SAbundVector();
422                         *sabund = thisList->getSAbundVector();
423                 
424                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
425                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
426                         int numRareBins = 0;
427                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
428                                 if (i > cutoff) { break; }
429                                 numRareBins += sabund->get(i);
430                         }
431                         int numAbundBins = thisList->getNumBins() - numRareBins;
432                         delete sabund;
433
434                         ofstream aout;
435                         ofstream rout;
436                         
437                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
438                         m->openOutputFile(rare, rout);
439                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
440                         
441                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
442                         m->openOutputFile(abund, aout);
443                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
444
445                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
446                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
447
448                         for (int i = 0; i < thisList->getNumBins(); i++) {
449                                 if (m->control_pressed) { break; }
450                         
451                                 string bin = list->get(i); 
452                         
453                                 int size = m->getNumNames(bin);
454                         
455                                 if (size <= cutoff) {  rout << bin << '\t';  }
456                                 else                            {  aout << bin << '\t'; }
457                         }
458                         
459                         if (rareNames.size() != 0)      { rout << endl; }
460                         if (abundNames.size() != 0) { aout << endl; }
461                         
462                         rout.close();
463                         aout.close();
464                         
465                 }else{ //parse names by abundance and group
466                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
467                         ofstream* temp;
468                         ofstream* temp2;
469                         //map<string, bool> wroteFile;
470                         map<string, ofstream*> filehandles;
471                         map<string, ofstream*>::iterator it3;
472
473                         for (int i=0; i<Groups.size(); i++) {
474                                 temp = new ofstream;
475                                 filehandles[Groups[i]+".rare"] = temp;
476                                 temp2 = new ofstream;
477                                 filehandles[Groups[i]+".abund"] = temp2;
478                                 
479                                 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
480                                 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
481                                 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
482                                 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
483                         }
484                         
485                         map<string, string> groupVector;
486                         map<string, string>::iterator itGroup;
487                         map<string, int> groupNumBins;
488                 
489                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
490                                 groupNumBins[it3->first] = 0;
491                                 groupVector[it3->first] = "";
492                         }
493                 
494                         for (int i = 0; i < thisList->getNumBins(); i++) {
495                                 if (m->control_pressed) { break; }
496                         
497                                 map<string, string> groupBins;
498                                 string bin = list->get(i); 
499                         
500                                 vector<string> names;
501                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
502                         
503                                 //parse bin into list of sequences in each group
504                                 for (int j = 0; j < names.size(); j++) {
505                                         string rareAbund;
506                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
507                                                 rareAbund = ".rare";
508                                         }else{ //you are a abund name
509                                                 rareAbund = ".abund";
510                                         }
511                                         
512                                         string group = groupMap->getGroup(names[j]);
513                                 
514                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
515                                                 itGroup = groupBins.find(group+rareAbund);
516                                                 if(itGroup == groupBins.end()) {
517                                                         groupBins[group+rareAbund] = names[j];  //add first name
518                                                         groupNumBins[group+rareAbund]++;
519                                                 }else{ //add another name
520                                                         groupBins[group+rareAbund] +=  "," + names[j];
521                                                 }
522                                         }else if(group == "not found") {
523                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
524                                         }
525                                 }
526                         
527                         
528                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
529                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
530                                 }
531                         }
532                         
533                         //end list vector
534                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
535                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
536                                 (*(filehandles[it3->first])).close();
537                                 delete it3->second;
538                         }
539                 }
540                 
541                 return 0;
542
543         }
544         catch(exception& e) {
545                 m->errorOut(e, "SplitAbundCommand", "writeList");
546                 exit(1);
547         }
548 }
549 /**********************************************************************************************************************/
550 int SplitAbundCommand::splitNames() { //namefile
551         try {
552                 
553                 rareNames.clear();
554                 abundNames.clear();     
555                         
556                 //open input file
557                 ifstream in;
558                 m->openInputFile(namefile, in);
559                 
560                 while (!in.eof()) {
561                         if (m->control_pressed) { break; }
562                         
563                         string firstCol, secondCol;
564                         in >> firstCol >> secondCol; m->gobble(in);
565                         
566                         nameMap[firstCol] = secondCol;
567                         
568                         int size = m->getNumNames(secondCol);
569                                 
570                         if (size <= cutoff) {
571                                 rareNames.insert(firstCol); 
572                         }else{
573                                 abundNames.insert(firstCol); 
574                         }
575                 }
576                 in.close();
577                                 
578                 return 0;
579
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "SplitAbundCommand", "splitNames");
583                 exit(1);
584         }
585 }
586 /**********************************************************************************************************************/
587 int SplitAbundCommand::readNamesFile() { 
588         try {
589                 //open input file
590                 ifstream in;
591                 m->openInputFile(namefile, in);
592                 
593                 while (!in.eof()) {
594                         if (m->control_pressed) { break; }
595                         
596                         string firstCol, secondCol;
597                         in >> firstCol >> secondCol; m->gobble(in);
598                         
599                         nameMap[firstCol] = secondCol;
600                 }
601                 in.close();
602                                 
603                 return 0;
604
605         }
606         catch(exception& e) {
607                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
608                 exit(1);
609         }
610 }
611 /**********************************************************************************************************************/
612 int SplitAbundCommand::createNameMap(ListVector* thisList) {
613         try {
614                 
615                 if (thisList != NULL) {
616                         for (int i = 0; i < thisList->getNumBins(); i++) {
617                                 if (m->control_pressed) { return 0; }
618                                 
619                                 string bin = thisList->get(i);
620                                                         
621                                 vector<string> names;
622                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
623                                 
624                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
625                         }//end for
626                 }
627                 
628                 return 0;
629         }
630         catch(exception& e) {
631                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
632                 exit(1);
633         }
634 }
635 /**********************************************************************************************************************/
636 int SplitAbundCommand::writeNames() { //namefile
637         try {
638                 
639                 map<string, ofstream*> filehandles;
640
641                 if (Groups.size() == 0) {
642                         ofstream aout;
643                         ofstream rout;
644                         
645                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
646                         m->openOutputFile(rare, rout);
647                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
648                         
649                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
650                         m->openOutputFile(abund, aout);
651                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
652                         
653                         if (rareNames.size() != 0) {
654                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
655                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
656                                 }
657                         }
658                         rout.close();
659                         
660                         if (abundNames.size() != 0) {
661                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
662                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
663                                 }
664                         }
665                         aout.close();
666                         
667                 }else{ //parse names by abundance and group
668                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
669                         ofstream* temp;
670                         ofstream* temp2;
671                         map<string, ofstream*> filehandles;
672                         map<string, ofstream*>::iterator it3;
673
674                         for (int i=0; i<Groups.size(); i++) {
675                                 temp = new ofstream;
676                                 filehandles[Groups[i]+".rare"] = temp;
677                                 temp2 = new ofstream;
678                                 filehandles[Groups[i]+".abund"] = temp2;
679                                 
680                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
681                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
682                         }
683                         
684                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
685                                 vector<string> names;
686                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
687                                 
688                                 string rareAbund;
689                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
690                                                 rareAbund = ".rare";
691                                 }else{ //you are a abund name
692                                                 rareAbund = ".abund";
693                                 }
694                                 
695                                 map<string, string> outputStrings;
696                                 map<string, string>::iterator itout;
697                                 for (int i = 0; i < names.size(); i++) {
698                                         
699                                         string group = groupMap->getGroup(names[i]);
700                                         
701                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
702                                                 itout = outputStrings.find(group+rareAbund);
703                                                 if (itout == outputStrings.end()) {  
704                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
705                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
706                                         }else if(group == "not found") {
707                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
708                                         }
709                                 }
710                                 
711                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
712                         }
713                         
714                         
715                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
716                                 (*(filehandles[it3->first])).close();
717                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
718                                 delete it3->second;
719                         }
720                 }
721                                 
722                 return 0;
723
724         }
725         catch(exception& e) {
726                 m->errorOut(e, "SplitAbundCommand", "writeNames");
727                 exit(1);
728         }
729 }
730 /**********************************************************************************************************************/
731 //just write the unique names - if a namesfile is given
732 int SplitAbundCommand::writeAccnos(string tag) { 
733         try {
734                 
735                 map<string, ofstream*> filehandles;
736                 
737                 if (Groups.size() == 0) {
738                         ofstream aout;
739                         ofstream rout;
740                         
741                         
742                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
743                         m->openOutputFile(rare, rout);
744                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
745                         
746                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
747                                 rout << (*itRare) << endl;
748                         }
749                         rout.close();
750                 
751                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
752                         m->openOutputFile(abund, aout);
753                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
754                         
755                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
756                                 aout << (*itAbund) << endl;
757                         }
758                         aout.close();
759                         
760                 }else{ //parse names by abundance and group
761                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
762                         ofstream* temp;
763                         ofstream* temp2;
764                         map<string, ofstream*> filehandles;
765                         map<string, ofstream*>::iterator it3;
766                         
767                         for (int i=0; i<Groups.size(); i++) {
768                                 temp = new ofstream;
769                                 filehandles[Groups[i]+".rare"] = temp;
770                                 temp2 = new ofstream;
771                                 filehandles[Groups[i]+".abund"] = temp2;
772                                 
773                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
774                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
775                         }
776                         
777                         //write rare
778                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
779                                         string group = groupMap->getGroup(*itRare);
780                                         
781                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
782                                                 *(filehandles[group+".rare"]) << *itRare << endl;
783                                         }
784                         }
785                                 
786                         //write abund   
787                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
788                                         string group = groupMap->getGroup(*itAbund);
789                                         
790                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
791                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
792                                         }
793                         }
794                         
795                         //close files
796                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
797                                 (*(filehandles[it3->first])).close();
798                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
799                                 delete it3->second;
800                         }
801                 }
802                                 
803                 return 0;
804
805         }
806         catch(exception& e) {
807                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
808                 exit(1);
809         }
810 }
811 /**********************************************************************************************************************/
812 int SplitAbundCommand::parseGroup(string tag) { //namefile
813         try {
814                 
815                 map<string, ofstream*> filehandles;
816         
817                 if (Groups.size() == 0) {
818                         ofstream aout;
819                         ofstream rout;
820                         
821                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
822                         m->openOutputFile(rare, rout);
823                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
824                 
825                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
826                         m->openOutputFile(abund, aout);
827                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
828                         
829                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
830                                 vector<string> names;
831                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
832                                 
833                                 for (int i = 0; i < names.size(); i++) {
834                                 
835                                         string group = groupMap->getGroup(names[i]);
836                                 
837                                         if (group == "not found") { 
838                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
839                                         }else {
840                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
841                                                         rout << names[i] << '\t' << group << endl;
842                                                 }else{ //you are a abund name
843                                                         aout << names[i] << '\t' << group << endl;
844                                                 }
845                                         }
846                                 }
847                         }
848                         
849                         rout.close(); 
850                         aout.close(); 
851
852                 }else{ //parse names by abundance and group
853                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
854                         ofstream* temp;
855                         ofstream* temp2;
856                         map<string, ofstream*> filehandles;
857                         map<string, ofstream*>::iterator it3;
858
859                         for (int i=0; i<Groups.size(); i++) {
860                                 temp = new ofstream;
861                                 filehandles[Groups[i]+".rare"] = temp;
862                                 temp2 = new ofstream;
863                                 filehandles[Groups[i]+".abund"] = temp2;
864                                 
865                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
866                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
867                         }
868                         
869                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
870                                 vector<string> names;
871                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
872                                 
873                                 string rareAbund;
874                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
875                                         rareAbund = ".rare";
876                                 }else{ //you are a abund name
877                                         rareAbund = ".abund";
878                                 }
879                                 
880                                 for (int i = 0; i < names.size(); i++) {
881                                 
882                                         string group = groupMap->getGroup(names[i]);
883                                                                         
884                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
885                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
886                                         }
887                                 }
888                         }
889                         
890                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
891                                 (*(filehandles[it3->first])).close();
892                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
893                                 delete it3->second;
894                         }
895                 }
896                                 
897                 return 0;
898
899         }
900         catch(exception& e) {
901                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
902                 exit(1);
903         }
904 }
905 /**********************************************************************************************************************/
906 int SplitAbundCommand::parseFasta(string tag) { //namefile
907         try {
908                 
909                 map<string, ofstream*> filehandles;
910                 
911                 if (Groups.size() == 0) {
912                         ofstream aout;
913                         ofstream rout;
914                         
915                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
916                         m->openOutputFile(rare, rout);
917                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
918                 
919                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
920                         m->openOutputFile(abund, aout);
921                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
922                 
923                         //open input file
924                         ifstream in;
925                         m->openInputFile(fastafile, in);
926         
927                         while (!in.eof()) {
928                                 if (m->control_pressed) { break; }
929                 
930                                 Sequence seq(in); m->gobble(in);
931                                 
932                                 if (seq.getName() != "") { 
933                                         
934                                         map<string, string>::iterator itNames;
935                                         
936                                         itNames = nameMap.find(seq.getName());
937                                         
938                                         if (itNames == nameMap.end()) {
939                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
940                                         }else{
941                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
942                                                         seq.printSequence(rout);
943                                                 }else{ //you are a abund name
944                                                         seq.printSequence(aout);
945                                                 }
946                                         }
947                                 }
948                         }
949                         in.close();
950                         rout.close(); 
951                         aout.close(); 
952
953                 }else{ //parse names by abundance and group
954                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
955                         ofstream* temp;
956                         ofstream* temp2;
957                         map<string, ofstream*> filehandles;
958                         map<string, ofstream*>::iterator it3;
959
960                         for (int i=0; i<Groups.size(); i++) {
961                                 temp = new ofstream;
962                                 filehandles[Groups[i]+".rare"] = temp;
963                                 temp2 = new ofstream;
964                                 filehandles[Groups[i]+".abund"] = temp2;
965                                 
966                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
967                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
968                         }
969                         
970                         //open input file
971                         ifstream in;
972                         m->openInputFile(fastafile, in);
973         
974                         while (!in.eof()) {
975                                 if (m->control_pressed) { break; }
976                 
977                                 Sequence seq(in); m->gobble(in);
978                                 
979                                 if (seq.getName() != "") { 
980                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
981                                         
982                                         if (itNames == nameMap.end()) {
983                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
984                                         }else{
985                                                 vector<string> names;
986                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
987                                 
988                                                 string rareAbund;
989                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
990                                                         rareAbund = ".rare";
991                                                 }else{ //you are a abund name
992                                                         rareAbund = ".abund";
993                                                 }
994                                 
995                                                 for (int i = 0; i < names.size(); i++) {
996                                 
997                                                         string group = groupMap->getGroup(seq.getName());
998                                         
999                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1000                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
1001                                                         }else if(group == "not found") {
1002                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1003                                                         }
1004                                                 }
1005                                         }
1006                                 }
1007                         }
1008                         in.close();
1009                         
1010                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1011                                 (*(filehandles[it3->first])).close();
1012                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1013                                 delete it3->second;
1014                         }
1015                 }
1016                                 
1017                 return 0;
1018
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1022                 exit(1);
1023         }
1024 }
1025 /**********************************************************************************************************************/
1026