]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option)  {
14         try {
15                 abort = false;
16                 allLines = 1;
17                         
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string, string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                                                 
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("list");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = m->hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
48                                 }
49                                 
50                                 it = parameters.find("group");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = m->hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
56                                 }
57                                 
58                                 it = parameters.find("fasta");
59                                 //user has given a template file
60                                 if(it != parameters.end()){ 
61                                         path = m->hasPath(it->second);
62                                         //if the user has not given a path then, add inputdir. else leave path alone.
63                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
64                                 }
65                                 
66                                 it = parameters.find("name");
67                                 //user has given a template file
68                                 if(it != parameters.end()){ 
69                                         path = m->hasPath(it->second);
70                                         //if the user has not given a path then, add inputdir. else leave path alone.
71                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
72                                 }
73
74                         }
75
76                         
77                         //if the user changes the output directory command factory will send this info to us in the output parameter 
78                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
79
80                         //check for required parameters
81                         listfile = validParameter.validFile(parameters, "list", true);
82                         if (listfile == "not open") { abort = true; }
83                         else if (listfile == "not found") { listfile = ""; }
84                         else{ inputFile = listfile; }   
85                         
86                         namefile = validParameter.validFile(parameters, "name", true);
87                         if (namefile == "not open") { abort = true; }
88                         else if (namefile == "not found") { namefile = ""; }    
89                         else{ inputFile = namefile; }   
90                 
91                         fastafile = validParameter.validFile(parameters, "fasta", true);
92                         if (fastafile == "not open") { abort = true; }
93                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
94                         
95                         groupfile = validParameter.validFile(parameters, "group", true);
96                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
97                         else if (groupfile == "not found") { groupfile = ""; }
98                         else {  
99                                 groupMap = new GroupMap(groupfile);
100                                 
101                                 int error = groupMap->readMap();
102                                 if (error == 1) { abort = true; }
103         
104                         }
105                         
106                         groups = validParameter.validFile(parameters, "groups", false);         
107                         if (groups == "not found") { groups = ""; }
108                         else if (groups == "all") { 
109                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
110                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
111                         }else { 
112                                 m->splitAtDash(groups, Groups);
113                         }
114                         
115                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
116                         
117                         //do you have all files needed
118                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
119                         
120                         //check for optional parameter and set defaults
121                         // ...at some point should added some additional type checking...
122                         label = validParameter.validFile(parameters, "label", false);                   
123                         if (label == "not found") { label = "";  allLines = 1; }
124                         else { 
125                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
126                                 else { allLines = 1;  }
127                         }
128                         
129                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
130                         accnos = m->isTrue(temp); 
131                         
132                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
133                         convert(temp, cutoff); 
134
135                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
136
137                 }
138
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
142                 exit(1);
143         }
144 }
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
147         try {
148                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
150                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
156                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
157                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
158                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
161
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "SplitAbundCommand", "help");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){ 
170         if (groupfile != "") {  delete groupMap;  } 
171 }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
174         try {
175         
176                 if (abort == true) {    return 0;       }
177                 
178                 if (listfile != "") { //you are using a listfile to determine abundance
179                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
180                 
181                         //remove old files so you can append later....
182                         string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
183                         if (Groups.size() == 0) {
184                                 remove((fileroot + "rare.list").c_str());
185                                 remove((fileroot + "abund.list").c_str());
186                                 
187                                 outputNames.push_back((fileroot + "rare.list"));
188                                 outputNames.push_back((fileroot + "abund.list"));
189                         }else{
190                                 for (int i=0; i<Groups.size(); i++) {
191                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
192                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
193                                         
194                                         outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
195                                         outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
196                                 }
197                         }
198                         
199                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
200                         set<string> processedLabels;
201                         set<string> userLabels = labels;        
202                         
203                         input = new InputData(listfile, "list");
204                         list = input->getListVector();
205                         string lastLabel = list->getLabel();
206                         
207                         //do you have a namefile or do we need to similate one?
208                         if (namefile != "") {  readNamesFile();         }
209                         else                            { createNameMap(list);  }
210                         
211                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
212                         
213                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
214                         
215                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
216                                 
217                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
218                                                 
219                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
220                                                 splitList(list);
221                                                                                         
222                                                 processedLabels.insert(list->getLabel());
223                                                 userLabels.erase(list->getLabel());
224                                 }
225                                 
226                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227                                                 string saveLabel = list->getLabel();
228                                                 
229                                                 delete list;
230                                                 list = input->getListVector(lastLabel); //get new list vector to process
231                                                 
232                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
233                                                 splitList(list);
234                                                 
235                                                 processedLabels.insert(list->getLabel());
236                                                 userLabels.erase(list->getLabel());
237                                                 
238                                                 //restore real lastlabel to save below
239                                                 list->setLabel(saveLabel);
240                                 }
241                                 
242                         
243                                 lastLabel = list->getLabel();
244                                         
245                                 delete list;
246                                 list = input->getListVector(); //get new list vector to process
247                         }
248                         
249                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
250                         
251                         //output error messages about any remaining user labels
252                         set<string>::iterator it;
253                         bool needToRun = false;
254                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
255                                 m->mothurOut("Your file does not include the label " + *it); 
256                                 if (processedLabels.count(lastLabel) != 1) {
257                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
258                                         needToRun = true;
259                                 }else {
260                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
261                                 }
262
263                         }
264                         
265                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
266                         
267                         //run last label if you need to
268                         if (needToRun == true)  {
269                                 if (list != NULL) {     delete list;    }
270                                 list = input->getListVector(lastLabel); //get new list vector to process
271                                 
272                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
273                                 splitList(list);                
274                                 
275                                 delete list;
276                         }
277                         
278                         delete input;
279                         
280                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
281                                                                         
282                 }else { //you are using the namefile to determine abundance
283                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
284                         
285                         splitNames(); 
286                         writeNames();
287                         
288                         string tag = "";
289                         if (groupfile != "")                            {  parseGroup(tag);             }
290                         if (accnos)                                                     {  writeAccnos(tag);    }
291                         if (fastafile != "")                            {  parseFasta(tag);             }
292                 }
293
294                 m->mothurOutEndLine();
295                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
296                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
297                 m->mothurOutEndLine();
298                 
299                 return 0;
300         }
301         catch(exception& e) {
302                 m->errorOut(e, "SplitAbundCommand", "execute");
303                 exit(1);
304         }
305 }
306 /**********************************************************************************************************************/
307 int SplitAbundCommand::splitList(ListVector* thisList) {
308         try {
309                 rareNames.clear();
310                 abundNames.clear();
311                 
312                 //get rareNames and abundNames
313                 for (int i = 0; i < thisList->getNumBins(); i++) {
314                         if (m->control_pressed) { return 0; }
315                         
316                         string bin = thisList->get(i);
317                                                 
318                         vector<string> names;
319                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
320                         int size = names.size();
321                                 
322                         if (size <= cutoff) {
323                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
324                         }else{
325                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
326                         }
327                 }//end for
328
329                 writeList(thisList);
330                 
331                 string tag = thisList->getLabel() + ".";
332                 if (groupfile != "")                            {  parseGroup(tag);             }
333                 if (accnos)                                                     {  writeAccnos(tag);    }
334                 if (fastafile != "")                            {  parseFasta(tag);             }
335                 
336                 return 0;
337
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "SplitAbundCommand", "splitList");
341                 exit(1);
342         }
343 }
344 /**********************************************************************************************************************/
345 int SplitAbundCommand::writeList(ListVector* thisList) { 
346         try {
347                 
348                 map<string, ofstream*> filehandles;
349                 
350                 if (Groups.size() == 0) {
351                         SAbundVector* sabund = new SAbundVector();
352                         *sabund = thisList->getSAbundVector();
353                 
354                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
355                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
356                         int numRareBins = 0;
357                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
358                                 if (i > cutoff) { break; }
359                                 numRareBins += sabund->get(i);
360                         }
361                         int numAbundBins = thisList->getNumBins() - numRareBins;
362                         delete sabund;
363
364                         ofstream aout;
365                         ofstream rout;
366                         
367                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile))  + "rare.list";
368                         m->openOutputFileAppend(rare, rout);
369                         outputNames.push_back(rare);
370                         
371                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile))  + "abund.list";
372                         m->openOutputFileAppend(abund, aout);
373                         outputNames.push_back(abund);
374
375                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
376                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
377
378                         for (int i = 0; i < thisList->getNumBins(); i++) {
379                                 if (m->control_pressed) { break; }
380                         
381                                 string bin = list->get(i); 
382                         
383                                 int size = m->getNumNames(bin);
384                         
385                                 if (size <= cutoff) {  rout << bin << '\t';  }
386                                 else                            {  aout << bin << '\t'; }
387                         }
388                         
389                         if (rareNames.size() != 0)      { rout << endl; }
390                         if (abundNames.size() != 0) { aout << endl; }
391                         
392                         rout.close();
393                         aout.close();
394                         
395                 }else{ //parse names by abundance and group
396                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
397                         ofstream* temp;
398                         ofstream* temp2;
399                         //map<string, bool> wroteFile;
400                         map<string, ofstream*> filehandles;
401                         map<string, ofstream*>::iterator it3;
402
403                         for (int i=0; i<Groups.size(); i++) {
404                                 temp = new ofstream;
405                                 filehandles[Groups[i]+".rare"] = temp;
406                                 temp2 = new ofstream;
407                                 filehandles[Groups[i]+".abund"] = temp2;
408                                 
409                                 m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
410                                 m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
411                         }
412                         
413                         map<string, string> groupVector;
414                         map<string, string>::iterator itGroup;
415                         map<string, int> groupNumBins;
416                 
417                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
418                                 groupNumBins[it3->first] = 0;
419                                 groupVector[it3->first] = "";
420                         }
421                 
422                         for (int i = 0; i < thisList->getNumBins(); i++) {
423                                 if (m->control_pressed) { break; }
424                         
425                                 map<string, string> groupBins;
426                                 string bin = list->get(i); 
427                         
428                                 vector<string> names;
429                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
430                         
431                                 //parse bin into list of sequences in each group
432                                 for (int j = 0; j < names.size(); j++) {
433                                         string rareAbund;
434                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
435                                                 rareAbund = ".rare";
436                                         }else{ //you are a abund name
437                                                 rareAbund = ".abund";
438                                         }
439                                         
440                                         string group = groupMap->getGroup(names[j]);
441                                 
442                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
443                                                 itGroup = groupBins.find(group+rareAbund);
444                                                 if(itGroup == groupBins.end()) {
445                                                         groupBins[group+rareAbund] = names[j];  //add first name
446                                                         groupNumBins[group+rareAbund]++;
447                                                 }else{ //add another name
448                                                         groupBins[group+rareAbund] +=  "," + names[j];
449                                                 }
450                                         }else if(group == "not found") {
451                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
452                                         }
453                                 }
454                         
455                         
456                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
457                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
458                                 }
459                         }
460                         
461                         //end list vector
462                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
463                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
464                                 (*(filehandles[it3->first])).close();
465                                 delete it3->second;
466                         }
467                 }
468                 
469                 return 0;
470
471         }
472         catch(exception& e) {
473                 m->errorOut(e, "SplitAbundCommand", "writeList");
474                 exit(1);
475         }
476 }
477 /**********************************************************************************************************************/
478 int SplitAbundCommand::splitNames() { //namefile
479         try {
480                 
481                 rareNames.clear();
482                 abundNames.clear();     
483                         
484                 //open input file
485                 ifstream in;
486                 m->openInputFile(namefile, in);
487                 
488                 while (!in.eof()) {
489                         if (m->control_pressed) { break; }
490                         
491                         string firstCol, secondCol;
492                         in >> firstCol >> secondCol; m->gobble(in);
493                         
494                         nameMap[firstCol] = secondCol;
495                         
496                         int size = m->getNumNames(secondCol);
497                                 
498                         if (size <= cutoff) {
499                                 rareNames.insert(firstCol); 
500                         }else{
501                                 abundNames.insert(firstCol); 
502                         }
503                 }
504                 in.close();
505                                 
506                 return 0;
507
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "SplitAbundCommand", "splitNames");
511                 exit(1);
512         }
513 }
514 /**********************************************************************************************************************/
515 int SplitAbundCommand::readNamesFile() { 
516         try {
517                 //open input file
518                 ifstream in;
519                 m->openInputFile(namefile, in);
520                 
521                 while (!in.eof()) {
522                         if (m->control_pressed) { break; }
523                         
524                         string firstCol, secondCol;
525                         in >> firstCol >> secondCol; m->gobble(in);
526                         
527                         nameMap[firstCol] = secondCol;
528                 }
529                 in.close();
530                                 
531                 return 0;
532
533         }
534         catch(exception& e) {
535                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
536                 exit(1);
537         }
538 }
539 /**********************************************************************************************************************/
540 int SplitAbundCommand::createNameMap(ListVector* thisList) {
541         try {
542                 
543                 if (thisList != NULL) {
544                         for (int i = 0; i < thisList->getNumBins(); i++) {
545                                 if (m->control_pressed) { return 0; }
546                                 
547                                 string bin = thisList->get(i);
548                                                         
549                                 vector<string> names;
550                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
551                                 
552                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
553                         }//end for
554                 }
555                 
556                 return 0;
557         }
558         catch(exception& e) {
559                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
560                 exit(1);
561         }
562 }
563 /**********************************************************************************************************************/
564 int SplitAbundCommand::writeNames() { //namefile
565         try {
566                 
567                 map<string, ofstream*> filehandles;
568
569                 if (Groups.size() == 0) {
570                         ofstream aout;
571                         ofstream rout;
572                         
573                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
574                         m->openOutputFile(rare, rout);
575                         outputNames.push_back(rare);
576                         
577                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
578                         m->openOutputFile(abund, aout);
579                         outputNames.push_back(abund);
580                         
581                         if (rareNames.size() != 0) {
582                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
583                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
584                                 }
585                         }
586                         rout.close();
587                         
588                         if (abundNames.size() != 0) {
589                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
590                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
591                                 }
592                         }
593                         aout.close();
594                         
595                 }else{ //parse names by abundance and group
596                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
597                         ofstream* temp;
598                         ofstream* temp2;
599                         map<string, ofstream*> filehandles;
600                         map<string, ofstream*>::iterator it3;
601
602                         for (int i=0; i<Groups.size(); i++) {
603                                 temp = new ofstream;
604                                 filehandles[Groups[i]+".rare"] = temp;
605                                 temp2 = new ofstream;
606                                 filehandles[Groups[i]+".abund"] = temp2;
607                                 
608                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
609                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
610                         }
611                         
612                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
613                                 vector<string> names;
614                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
615                                 
616                                 string rareAbund;
617                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
618                                                 rareAbund = ".rare";
619                                 }else{ //you are a abund name
620                                                 rareAbund = ".abund";
621                                 }
622                                 
623                                 map<string, string> outputStrings;
624                                 map<string, string>::iterator itout;
625                                 for (int i = 0; i < names.size(); i++) {
626                                         
627                                         string group = groupMap->getGroup(names[i]);
628                                         
629                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
630                                                 itout = outputStrings.find(group+rareAbund);
631                                                 if (itout == outputStrings.end()) {  
632                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
633                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
634                                         }else if(group == "not found") {
635                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
636                                         }
637                                 }
638                                 
639                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
640                         }
641                         
642                         
643                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
644                                 (*(filehandles[it3->first])).close();
645                                 outputNames.push_back(fileroot + it3->first + ".names");  
646                                 delete it3->second;
647                         }
648                 }
649                                 
650                 return 0;
651
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "SplitAbundCommand", "writeNames");
655                 exit(1);
656         }
657 }
658 /**********************************************************************************************************************/
659 //just write the unique names - if a namesfile is given
660 int SplitAbundCommand::writeAccnos(string tag) { 
661         try {
662                 
663                 map<string, ofstream*> filehandles;
664                 
665                 if (Groups.size() == 0) {
666                         ofstream aout;
667                         ofstream rout;
668                         
669                         
670                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
671                         m->openOutputFile(rare, rout);
672                         outputNames.push_back(rare);
673                         
674                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
675                                 rout << (*itRare) << endl;
676                         }
677                         rout.close();
678                 
679                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
680                         m->openOutputFile(abund, aout);
681                         outputNames.push_back(abund);
682                         
683                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
684                                 aout << (*itAbund) << endl;
685                         }
686                         aout.close();
687                         
688                 }else{ //parse names by abundance and group
689                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
690                         ofstream* temp;
691                         ofstream* temp2;
692                         map<string, ofstream*> filehandles;
693                         map<string, ofstream*>::iterator it3;
694                         
695                         for (int i=0; i<Groups.size(); i++) {
696                                 temp = new ofstream;
697                                 filehandles[Groups[i]+".rare"] = temp;
698                                 temp2 = new ofstream;
699                                 filehandles[Groups[i]+".abund"] = temp2;
700                                 
701                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
702                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
703                         }
704                         
705                         //write rare
706                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
707                                         string group = groupMap->getGroup(*itRare);
708                                         
709                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
710                                                 *(filehandles[group+".rare"]) << *itRare << endl;
711                                         }
712                         }
713                                 
714                         //write abund   
715                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
716                                         string group = groupMap->getGroup(*itAbund);
717                                         
718                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
719                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
720                                         }
721                         }
722                         
723                         //close files
724                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
725                                 (*(filehandles[it3->first])).close();
726                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  
727                                 delete it3->second;
728                         }
729                 }
730                                 
731                 return 0;
732
733         }
734         catch(exception& e) {
735                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
736                 exit(1);
737         }
738 }
739 /**********************************************************************************************************************/
740 int SplitAbundCommand::parseGroup(string tag) { //namefile
741         try {
742                 
743                 map<string, ofstream*> filehandles;
744         
745                 if (Groups.size() == 0) {
746                         ofstream aout;
747                         ofstream rout;
748                         
749                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
750                         m->openOutputFile(rare, rout);
751                         outputNames.push_back(rare);
752                 
753                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
754                         m->openOutputFile(abund, aout);
755                         outputNames.push_back(abund);
756                         
757                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
758                                 vector<string> names;
759                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
760                                 
761                                 for (int i = 0; i < names.size(); i++) {
762                                 
763                                         string group = groupMap->getGroup(names[i]);
764                                 
765                                         if (group == "not found") { 
766                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
767                                         }else {
768                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
769                                                         rout << names[i] << '\t' << group << endl;
770                                                 }else{ //you are a abund name
771                                                         aout << names[i] << '\t' << group << endl;
772                                                 }
773                                         }
774                                 }
775                         }
776                         
777                         rout.close(); 
778                         aout.close(); 
779
780                 }else{ //parse names by abundance and group
781                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
782                         ofstream* temp;
783                         ofstream* temp2;
784                         map<string, ofstream*> filehandles;
785                         map<string, ofstream*>::iterator it3;
786
787                         for (int i=0; i<Groups.size(); i++) {
788                                 temp = new ofstream;
789                                 filehandles[Groups[i]+".rare"] = temp;
790                                 temp2 = new ofstream;
791                                 filehandles[Groups[i]+".abund"] = temp2;
792                                 
793                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
794                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
795                         }
796                         
797                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
798                                 vector<string> names;
799                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
800                                 
801                                 string rareAbund;
802                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
803                                         rareAbund = ".rare";
804                                 }else{ //you are a abund name
805                                         rareAbund = ".abund";
806                                 }
807                                 
808                                 for (int i = 0; i < names.size(); i++) {
809                                 
810                                         string group = groupMap->getGroup(names[i]);
811                                                                         
812                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
813                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
814                                         }
815                                 }
816                         }
817                         
818                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
819                                 (*(filehandles[it3->first])).close();
820                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  
821                                 delete it3->second;
822                         }
823                 }
824                                 
825                 return 0;
826
827         }
828         catch(exception& e) {
829                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
830                 exit(1);
831         }
832 }
833 /**********************************************************************************************************************/
834 int SplitAbundCommand::parseFasta(string tag) { //namefile
835         try {
836                 
837                 map<string, ofstream*> filehandles;
838                 
839                 if (Groups.size() == 0) {
840                         ofstream aout;
841                         ofstream rout;
842                         
843                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
844                         m->openOutputFile(rare, rout);
845                         outputNames.push_back(rare);
846                 
847                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
848                         m->openOutputFile(abund, aout);
849                         outputNames.push_back(abund);
850                 
851                         //open input file
852                         ifstream in;
853                         m->openInputFile(fastafile, in);
854         
855                         while (!in.eof()) {
856                                 if (m->control_pressed) { break; }
857                 
858                                 Sequence seq(in); m->gobble(in);
859                                 
860                                 if (seq.getName() != "") { 
861                                         
862                                         map<string, string>::iterator itNames;
863                                         
864                                         itNames = nameMap.find(seq.getName());
865                                         
866                                         if (itNames == nameMap.end()) {
867                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
868                                         }else{
869                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
870                                                         seq.printSequence(rout);
871                                                 }else{ //you are a abund name
872                                                         seq.printSequence(aout);
873                                                 }
874                                         }
875                                 }
876                         }
877                         in.close();
878                         rout.close(); 
879                         aout.close(); 
880
881                 }else{ //parse names by abundance and group
882                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
883                         ofstream* temp;
884                         ofstream* temp2;
885                         map<string, ofstream*> filehandles;
886                         map<string, ofstream*>::iterator it3;
887
888                         for (int i=0; i<Groups.size(); i++) {
889                                 temp = new ofstream;
890                                 filehandles[Groups[i]+".rare"] = temp;
891                                 temp2 = new ofstream;
892                                 filehandles[Groups[i]+".abund"] = temp2;
893                                 
894                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
895                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
896                         }
897                         
898                         //open input file
899                         ifstream in;
900                         m->openInputFile(fastafile, in);
901         
902                         while (!in.eof()) {
903                                 if (m->control_pressed) { break; }
904                 
905                                 Sequence seq(in); m->gobble(in);
906                                 
907                                 if (seq.getName() != "") { 
908                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
909                                         
910                                         if (itNames == nameMap.end()) {
911                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
912                                         }else{
913                                                 vector<string> names;
914                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
915                                 
916                                                 string rareAbund;
917                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
918                                                         rareAbund = ".rare";
919                                                 }else{ //you are a abund name
920                                                         rareAbund = ".abund";
921                                                 }
922                                 
923                                                 for (int i = 0; i < names.size(); i++) {
924                                 
925                                                         string group = groupMap->getGroup(seq.getName());
926                                         
927                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
928                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
929                                                         }else if(group == "not found") {
930                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
931                                                         }
932                                                 }
933                                         }
934                                 }
935                         }
936                         in.close();
937                         
938                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
939                                 (*(filehandles[it3->first])).close();
940                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  
941                                 delete it3->second;
942                         }
943                 }
944                                 
945                 return 0;
946
947         }
948         catch(exception& e) {
949                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
950                 exit(1);
951         }
952 }
953 /**********************************************************************************************************************/
954