]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added oldfasta and column parameter to dist.seqs so you can append distances to an...
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option)  {
14         try {
15                 abort = false;
16                 allLines = 1;
17                         
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string, string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                                                 
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("list");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
48                                 }
49                                 
50                                 it = parameters.find("group");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
56                                 }
57                                 
58                                 it = parameters.find("fasta");
59                                 //user has given a template file
60                                 if(it != parameters.end()){ 
61                                         path = hasPath(it->second);
62                                         //if the user has not given a path then, add inputdir. else leave path alone.
63                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
64                                 }
65                                 
66                                 it = parameters.find("name");
67                                 //user has given a template file
68                                 if(it != parameters.end()){ 
69                                         path = hasPath(it->second);
70                                         //if the user has not given a path then, add inputdir. else leave path alone.
71                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
72                                 }
73
74                         }
75
76                         
77                         //if the user changes the output directory command factory will send this info to us in the output parameter 
78                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
79
80                         //check for required parameters
81                         listfile = validParameter.validFile(parameters, "list", true);
82                         if (listfile == "not open") { abort = true; }
83                         else if (listfile == "not found") { listfile = ""; }
84                         else{ inputFile = listfile; }   
85                         
86                         namefile = validParameter.validFile(parameters, "name", true);
87                         if (namefile == "not open") { abort = true; }
88                         else if (namefile == "not found") { namefile = ""; }    
89                         else{ inputFile = namefile; }   
90                 
91                         fastafile = validParameter.validFile(parameters, "fasta", true);
92                         if (fastafile == "not open") { abort = true; }
93                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
94                         
95                         groupfile = validParameter.validFile(parameters, "group", true);
96                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
97                         else if (groupfile == "not found") { groupfile = ""; }
98                         else {  
99                                 groupMap = new GroupMap(groupfile);
100                                 
101                                 int error = groupMap->readMap();
102                                 if (error == 1) { abort = true; }
103         
104                         }
105                         
106                         groups = validParameter.validFile(parameters, "groups", false);         
107                         if (groups == "not found") { groups = ""; }
108                         else if (groups == "all") { 
109                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
110                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
111                         }else { 
112                                 splitAtDash(groups, Groups);
113                         }
114                         
115                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
116                         
117                         //do you have all files needed
118                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
119                         
120                         //check for optional parameter and set defaults
121                         // ...at some point should added some additional type checking...
122                         label = validParameter.validFile(parameters, "label", false);                   
123                         if (label == "not found") { label = "";  allLines = 1; }
124                         else { 
125                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
126                                 else { allLines = 1;  }
127                         }
128                         
129                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
130                         accnos = isTrue(temp); 
131                         
132                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
133                         convert(temp, cutoff); 
134
135                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
136
137                 }
138
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
142                 exit(1);
143         }
144 }
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
147         try {
148                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
150                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
156                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
157                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
158                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
161
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "SplitAbundCommand", "help");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){ 
170         if (groupfile != "") {  delete groupMap;  } 
171 }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
174         try {
175         
176                 if (abort == true) {    return 0;       }
177                 
178                 if (listfile != "") { //you are using a listfile to determine abundance
179                         if (outputDir == "") { outputDir = hasPath(listfile); }
180                 
181                         //remove old files so you can append later....
182                         string fileroot = outputDir + getRootName(getSimpleName(listfile));
183                         if (Groups.size() == 0) {
184                                 remove((fileroot + "rare.list").c_str());
185                                 remove((fileroot + "abund.list").c_str());
186                                 
187                                 wroteListFile["rare"] = false;
188                                 wroteListFile["abund"] = false;
189                         }else{
190                                 for (int i=0; i<Groups.size(); i++) {
191                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
192                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
193                         
194                                         wroteListFile[(Groups[i] + ".rare")] = false;
195                                         wroteListFile[(Groups[i] + ".abund")] = false;
196                                 }
197                         }
198                         
199                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
200                         set<string> processedLabels;
201                         set<string> userLabels = labels;        
202                         
203                         input = new InputData(listfile, "list");
204                         list = input->getListVector();
205                         string lastLabel = list->getLabel();
206                         
207                         //do you have a namefile or do we need to similate one?
208                         if (namefile != "") {  readNamesFile();         }
209                         else                            { createNameMap(list);  }
210                         
211                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
212                         
213                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
214                         
215                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
216                                 
217                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
218                                                 
219                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
220                                                 splitList(list);
221                                                                                         
222                                                 processedLabels.insert(list->getLabel());
223                                                 userLabels.erase(list->getLabel());
224                                 }
225                                 
226                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227                                                 string saveLabel = list->getLabel();
228                                                 
229                                                 delete list;
230                                                 list = input->getListVector(lastLabel); //get new list vector to process
231                                                 
232                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
233                                                 splitList(list);
234                                                 
235                                                 processedLabels.insert(list->getLabel());
236                                                 userLabels.erase(list->getLabel());
237                                                 
238                                                 //restore real lastlabel to save below
239                                                 list->setLabel(saveLabel);
240                                 }
241                                 
242                         
243                                 lastLabel = list->getLabel();
244                                         
245                                 delete list;
246                                 list = input->getListVector(); //get new list vector to process
247                         }
248                         
249                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
250                         
251                         //output error messages about any remaining user labels
252                         set<string>::iterator it;
253                         bool needToRun = false;
254                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
255                                 m->mothurOut("Your file does not include the label " + *it); 
256                                 if (processedLabels.count(lastLabel) != 1) {
257                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
258                                         needToRun = true;
259                                 }else {
260                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
261                                 }
262
263                         }
264                         
265                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
266                         
267                         //run last label if you need to
268                         if (needToRun == true)  {
269                                 if (list != NULL) {     delete list;    }
270                                 list = input->getListVector(lastLabel); //get new list vector to process
271                                 
272                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
273                                 splitList(list);                
274                                 
275                                 delete list;
276                         }
277                         
278                         delete input;
279                         
280                         for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
281                                 string filename = fileroot + itBool->first;
282                                 if ((itBool->first == "rare") || (itBool->first == "abund")) {
283                                         filename = fileroot + itBool->first + ".list";
284                                 }
285                                 if (itBool->second) { //we wrote to this file
286                                         outputNames.push_back(filename);
287                                 }else{
288                                         remove(filename.c_str());
289                                 }
290                         }
291                         
292                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
293
294                                                                         
295                 }else { //you are using the namefile to determine abundance
296                         if (outputDir == "") { outputDir = hasPath(namefile); }
297                         
298                         splitNames(); 
299                         writeNames();
300                         
301                         string tag = "";
302                         if (groupfile != "")                            {  parseGroup(tag);             }
303                         if (accnos)                                                     {  writeAccnos(tag);    }
304                         if (fastafile != "")                            {  parseFasta(tag);             }
305                 }
306
307                 m->mothurOutEndLine();
308                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
309                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
310                 m->mothurOutEndLine();
311                 
312                 return 0;
313         }
314         catch(exception& e) {
315                 m->errorOut(e, "SplitAbundCommand", "execute");
316                 exit(1);
317         }
318 }
319 /**********************************************************************************************************************/
320 int SplitAbundCommand::splitList(ListVector* thisList) {
321         try {
322                 rareNames.clear();
323                 abundNames.clear();
324                 
325                 //get rareNames and abundNames
326                 for (int i = 0; i < thisList->getNumBins(); i++) {
327                         if (m->control_pressed) { return 0; }
328                         
329                         string bin = thisList->get(i);
330                                                 
331                         vector<string> names;
332                         splitAtComma(bin, names);  //parses bin into individual sequence names
333                         int size = names.size();
334                                 
335                         if (size <= cutoff) {
336                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
337                         }else{
338                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
339                         }
340                 }//end for
341
342                 writeList(thisList);
343                 
344                 string tag = thisList->getLabel() + ".";
345                 if (groupfile != "")                            {  parseGroup(tag);             }
346                 if (accnos)                                                     {  writeAccnos(tag);    }
347                 if (fastafile != "")                            {  parseFasta(tag);             }
348                 
349                 return 0;
350
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "SplitAbundCommand", "splitList");
354                 exit(1);
355         }
356 }
357 /**********************************************************************************************************************/
358 int SplitAbundCommand::writeList(ListVector* thisList) { 
359         try {
360                 
361                 map<string, ofstream*> filehandles;
362                 
363                 if (Groups.size() == 0) {
364                         SAbundVector* sabund = new SAbundVector();
365                         *sabund = thisList->getSAbundVector();
366                 
367                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
368                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
369                         int numRareBins = 0;
370                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
371                                 if (i > cutoff) { break; }
372                                 numRareBins += sabund->get(i);
373                         }
374                         int numAbundBins = thisList->getNumBins() - numRareBins;
375                         delete sabund;
376
377                         ofstream aout;
378                         ofstream rout;
379                         
380                         if (rareNames.size() != 0) {
381                                 string rare = outputDir + getRootName(getSimpleName(listfile))  + "rare.list";
382                                 wroteListFile["rare"] = true;
383                                 openOutputFileAppend(rare, rout);
384                                 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
385                         }
386                         
387                         if (abundNames.size() != 0) {
388                                 string abund = outputDir + getRootName(getSimpleName(listfile))  + "abund.list";
389                                 wroteListFile["abund"] = true;
390                                 openOutputFileAppend(abund, aout);
391                                 aout << thisList->getLabel() << '\t' << numAbundBins << '\t';
392                         }
393
394                         for (int i = 0; i < thisList->getNumBins(); i++) {
395                                 if (m->control_pressed) { break; }
396                         
397                                 string bin = list->get(i); 
398                         
399                                 int size = getNumNames(bin);
400                         
401                                 if (size <= cutoff) {  rout << bin << '\t';  }
402                                 else                            {  aout << bin << '\t'; }
403                         }
404                         
405                         if (rareNames.size() != 0)      { rout << endl; rout.close(); }
406                         if (abundNames.size() != 0) { aout << endl; aout.close(); }
407
408                 }else{ //parse names by abundance and group
409                         string fileroot =  outputDir + getRootName(getSimpleName(listfile));
410                         ofstream* temp;
411                         ofstream* temp2;
412                         map<string, bool> wroteFile;
413                         map<string, ofstream*> filehandles;
414                         map<string, ofstream*>::iterator it3;
415
416                         for (int i=0; i<Groups.size(); i++) {
417                                 temp = new ofstream;
418                                 filehandles[Groups[i]+".rare"] = temp;
419                                 temp2 = new ofstream;
420                                 filehandles[Groups[i]+".abund"] = temp2;
421                                 
422                                 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
423                                 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
424                         }
425                         
426                         map<string, string> groupVector;
427                         map<string, string>::iterator itGroup;
428                         map<string, int> groupNumBins;
429                 
430                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
431                                 groupNumBins[it3->first] = 0;
432                                 groupVector[it3->first] = "";
433                         }
434                 
435                         for (int i = 0; i < thisList->getNumBins(); i++) {
436                                 if (m->control_pressed) { break; }
437                         
438                                 map<string, string> groupBins;
439                                 string bin = list->get(i); 
440                         
441                                 vector<string> names;
442                                 splitAtComma(bin, names);  //parses bin into individual sequence names
443                         
444                                 //parse bin into list of sequences in each group
445                                 for (int j = 0; j < names.size(); j++) {
446                                         string rareAbund;
447                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
448                                                 rareAbund = ".rare";
449                                         }else{ //you are a abund name
450                                                 rareAbund = ".abund";
451                                         }
452                                         
453                                         string group = groupMap->getGroup(names[j]);
454                                 
455                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
456                                                 itGroup = groupBins.find(group+rareAbund);
457                                                 if(itGroup == groupBins.end()) {
458                                                         groupBins[group+rareAbund] = names[j];  //add first name
459                                                         groupNumBins[group+rareAbund]++;
460                                                 }else{ //add another name
461                                                         groupBins[group+rareAbund] +=  "," + names[j];
462                                                 }
463                                         }else if(group == "not found") {
464                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
465                                         }
466                                 }
467                         
468                         
469                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
470                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
471                                 }
472                         }
473                         
474                         //end list vector
475                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
476                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
477                                 wroteListFile[it3->first] = true;
478                                 (*(filehandles[it3->first])).close();
479                                 delete it3->second;
480                         }
481                 }
482                 
483                 return 0;
484
485         }
486         catch(exception& e) {
487                 m->errorOut(e, "SplitAbundCommand", "writeList");
488                 exit(1);
489         }
490 }
491 /**********************************************************************************************************************/
492 int SplitAbundCommand::splitNames() { //namefile
493         try {
494                 
495                 rareNames.clear();
496                 abundNames.clear();     
497                         
498                 //open input file
499                 ifstream in;
500                 openInputFile(namefile, in);
501                 
502                 while (!in.eof()) {
503                         if (m->control_pressed) { break; }
504                         
505                         string firstCol, secondCol;
506                         in >> firstCol >> secondCol; gobble(in);
507                         
508                         nameMap[firstCol] = secondCol;
509                         
510                         int size = getNumNames(secondCol);
511                                 
512                         if (size <= cutoff) {
513                                 rareNames.insert(firstCol); 
514                         }else{
515                                 abundNames.insert(firstCol); 
516                         }
517                 }
518                 in.close();
519                                 
520                 return 0;
521
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "SplitAbundCommand", "splitNames");
525                 exit(1);
526         }
527 }
528 /**********************************************************************************************************************/
529 int SplitAbundCommand::readNamesFile() { 
530         try {
531                 //open input file
532                 ifstream in;
533                 openInputFile(namefile, in);
534                 
535                 while (!in.eof()) {
536                         if (m->control_pressed) { break; }
537                         
538                         string firstCol, secondCol;
539                         in >> firstCol >> secondCol; gobble(in);
540                         
541                         nameMap[firstCol] = secondCol;
542                 }
543                 in.close();
544                                 
545                 return 0;
546
547         }
548         catch(exception& e) {
549                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
550                 exit(1);
551         }
552 }
553 /**********************************************************************************************************************/
554 int SplitAbundCommand::createNameMap(ListVector* thisList) {
555         try {
556                 
557                 if (thisList != NULL) {
558                         for (int i = 0; i < thisList->getNumBins(); i++) {
559                                 if (m->control_pressed) { return 0; }
560                                 
561                                 string bin = thisList->get(i);
562                                                         
563                                 vector<string> names;
564                                 splitAtComma(bin, names);  //parses bin into individual sequence names
565                                 
566                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
567                         }//end for
568                 }
569                 
570                 return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
574                 exit(1);
575         }
576 }
577 /**********************************************************************************************************************/
578 int SplitAbundCommand::writeNames() { //namefile
579         try {
580                 
581                 map<string, ofstream*> filehandles;
582
583                 if (Groups.size() == 0) {
584                         ofstream aout;
585                         ofstream rout;
586                         
587                         if (rareNames.size() != 0) {
588                                 string rare = outputDir + getRootName(getSimpleName(namefile))  + "rare.names";
589                                 openOutputFile(rare, rout);
590                                 outputNames.push_back(rare);
591                                 
592                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
593                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
594                                 }
595                                 rout.close();
596                         }
597                         
598                         if (abundNames.size() != 0) {
599                                 string abund = outputDir + getRootName(getSimpleName(namefile))  + "abund.names";
600                                 openOutputFile(abund, aout);
601                                 outputNames.push_back(abund);
602                                 
603                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
604                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
605                                 }
606                                 aout.close();
607                         }
608
609                 }else{ //parse names by abundance and group
610                         string fileroot =  outputDir + getRootName(getSimpleName(namefile));
611                         ofstream* temp;
612                         ofstream* temp2;
613                         map<string, bool> wroteFile;
614                         map<string, ofstream*> filehandles;
615                         map<string, ofstream*>::iterator it3;
616
617                         for (int i=0; i<Groups.size(); i++) {
618                                 temp = new ofstream;
619                                 filehandles[Groups[i]+".rare"] = temp;
620                                 temp2 = new ofstream;
621                                 filehandles[Groups[i]+".abund"] = temp2;
622                                 
623                                 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
624                                 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
625                                 
626                                 wroteFile[Groups[i] + ".rare"] = false;
627                                 wroteFile[Groups[i] + ".abund"] = false;
628                         }
629                         
630                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
631                                 vector<string> names;
632                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
633                                 
634                                 string rareAbund;
635                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
636                                                 rareAbund = ".rare";
637                                 }else{ //you are a abund name
638                                                 rareAbund = ".abund";
639                                 }
640                                 
641                                 map<string, string> outputStrings;
642                                 map<string, string>::iterator itout;
643                                 for (int i = 0; i < names.size(); i++) {
644                                         
645                                         string group = groupMap->getGroup(names[i]);
646                                         
647                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
648                                                 itout = outputStrings.find(group+rareAbund);
649                                                 if (itout == outputStrings.end()) {  
650                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
651                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
652                                         }else if(group == "not found") {
653                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
654                                         }
655                                 }
656                                 
657                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { 
658                                         *(filehandles[itout->first]) << itout->second << endl;
659                                         wroteFile[itout->first] = true;
660                                 }
661                         }
662                         
663                         
664                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
665                                 (*(filehandles[it3->first])).close();
666                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + it3->first + ".names");  }
667                                 else { remove((it3->first).c_str()); }
668                                 delete it3->second;
669                         }
670                 }
671                                 
672                 return 0;
673
674         }
675         catch(exception& e) {
676                 m->errorOut(e, "SplitAbundCommand", "writeNames");
677                 exit(1);
678         }
679 }
680 /**********************************************************************************************************************/
681 //just write the unique names - if a namesfile is given
682 int SplitAbundCommand::writeAccnos(string tag) { 
683         try {
684                 
685                 map<string, ofstream*> filehandles;
686                 
687                 if (Groups.size() == 0) {
688                         ofstream aout;
689                         ofstream rout;
690                         
691                         if (rareNames.size() != 0) {
692                                 string rare = outputDir + getRootName(getSimpleName(inputFile))  + tag + "rare.accnos";
693                                 openOutputFile(rare, rout);
694                                 outputNames.push_back(rare);
695                                 
696                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
697                                         rout << (*itRare) << endl;
698                                 }
699                                 rout.close();
700                         }
701                         
702                         if (abundNames.size() != 0) {
703                                 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag  + "abund.accnos";
704                                 openOutputFile(abund, aout);
705                                 outputNames.push_back(abund);
706                                 
707                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
708                                         aout << (*itAbund) << endl;
709                                 }
710                                 aout.close();
711                         }
712                 }else{ //parse names by abundance and group
713                         string fileroot =  outputDir + getRootName(getSimpleName(inputFile));
714                         ofstream* temp;
715                         ofstream* temp2;
716                         map<string, bool> wroteFile;
717                         map<string, ofstream*> filehandles;
718                         map<string, ofstream*>::iterator it3;
719                         
720                         for (int i=0; i<Groups.size(); i++) {
721                                 temp = new ofstream;
722                                 filehandles[Groups[i]+".rare"] = temp;
723                                 temp2 = new ofstream;
724                                 filehandles[Groups[i]+".abund"] = temp2;
725                                 
726                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
727                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
728                                 
729                                 wroteFile[Groups[i] + ".rare"] = false;
730                                 wroteFile[Groups[i] + ".abund"] = false;
731                         }
732                         
733                         //write rare
734                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
735                                         string group = groupMap->getGroup(*itRare);
736                                         
737                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
738                                                 *(filehandles[group+".rare"]) << *itRare << endl;
739                                                 wroteFile[group+".rare"] = true;
740                                         }
741                         }
742                                 
743                         //write abund   
744                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
745                                         string group = groupMap->getGroup(*itAbund);
746                                         
747                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
748                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
749                                                 wroteFile[group+".abund"] = true;
750                                         }
751                         }
752                         
753                         //close files
754                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
755                                 (*(filehandles[it3->first])).close();
756                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".accnos");  }
757                                 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
758                                 delete it3->second;
759                         }
760                 }
761                                 
762                 return 0;
763
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
767                 exit(1);
768         }
769 }
770 /**********************************************************************************************************************/
771 int SplitAbundCommand::parseGroup(string tag) { //namefile
772         try {
773                 
774                 map<string, ofstream*> filehandles;
775         
776                 if (Groups.size() == 0) {
777                         ofstream aout;
778                         ofstream rout;
779                         
780                         if (rareNames.size() != 0) {
781                                 string rare = outputDir + getRootName(getSimpleName(groupfile))  + tag + "rare.groups";
782                                 openOutputFile(rare, rout);
783                                 outputNames.push_back(rare);
784                         }
785                         
786                         if (abundNames.size() != 0) {
787                                 string abund = outputDir + getRootName(getSimpleName(groupfile))  + tag + "abund.groups";
788                                 openOutputFile(abund, aout);
789                                 outputNames.push_back(abund);
790                         }
791
792                                 
793                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
794                                 vector<string> names;
795                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
796                                 
797                                 for (int i = 0; i < names.size(); i++) {
798                                 
799                                         string group = groupMap->getGroup(names[i]);
800                                 
801                                         if (group == "not found") { 
802                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
803                                         }else {
804                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
805                                                         rout << names[i] << '\t' << group << endl;
806                                                 }else{ //you are a abund name
807                                                         aout << names[i] << '\t' << group << endl;
808                                                 }
809                                         }
810                                 }
811                         }
812                         
813                         if (rareNames.size() != 0)      { rout.close(); }
814                         if (abundNames.size() != 0) { aout.close(); }
815
816                 }else{ //parse names by abundance and group
817                         string fileroot =  outputDir + getRootName(getSimpleName(groupfile));
818                         ofstream* temp;
819                         ofstream* temp2;
820                         map<string, bool> wroteFile;
821                         map<string, ofstream*> filehandles;
822                         map<string, ofstream*>::iterator it3;
823
824                         for (int i=0; i<Groups.size(); i++) {
825                                 temp = new ofstream;
826                                 filehandles[Groups[i]+".rare"] = temp;
827                                 temp2 = new ofstream;
828                                 filehandles[Groups[i]+".abund"] = temp2;
829                                 
830                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
831                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
832                                 
833                                 wroteFile[Groups[i] + ".rare"] = false;
834                                 wroteFile[Groups[i] + ".abund"] = false;
835                         }
836                         
837                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
838                                 vector<string> names;
839                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
840                                 
841                                 string rareAbund;
842                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
843                                         rareAbund = ".rare";
844                                 }else{ //you are a abund name
845                                         rareAbund = ".abund";
846                                 }
847                                 
848                                 for (int i = 0; i < names.size(); i++) {
849                                 
850                                         string group = groupMap->getGroup(names[i]);
851                                                                         
852                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
853                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
854                                                 wroteFile[group+rareAbund] = true;
855                                         }
856                                 }
857                         }
858                         
859                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
860                                 (*(filehandles[it3->first])).close();
861                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".groups");  }
862                                 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
863                                 delete it3->second;
864                         }
865                 }
866                                 
867                 return 0;
868
869         }
870         catch(exception& e) {
871                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
872                 exit(1);
873         }
874 }
875 /**********************************************************************************************************************/
876 int SplitAbundCommand::parseFasta(string tag) { //namefile
877         try {
878                 
879                 map<string, ofstream*> filehandles;
880                 
881                 if (Groups.size() == 0) {
882                         ofstream aout;
883                         ofstream rout;
884                         
885                         if (rareNames.size() != 0) {
886                                 string rare = outputDir + getRootName(getSimpleName(fastafile))  + tag + "rare.fasta";
887                                 openOutputFile(rare, rout);
888                                 outputNames.push_back(rare);
889                         }
890                         
891                         if (abundNames.size() != 0) {
892                                 string abund = outputDir + getRootName(getSimpleName(fastafile))  + tag + "abund.fasta";
893                                 openOutputFile(abund, aout);
894                                 outputNames.push_back(abund);
895                         }
896
897                                 
898                         //open input file
899                         ifstream in;
900                         openInputFile(fastafile, in);
901         
902                         while (!in.eof()) {
903                                 if (m->control_pressed) { break; }
904                 
905                                 Sequence seq(in); gobble(in);
906                                 
907                                 if (seq.getName() != "") { 
908                                         
909                                         map<string, string>::iterator itNames;
910                                         
911                                         itNames = nameMap.find(seq.getName());
912                                         
913                                         if (itNames == nameMap.end()) {
914                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
915                                         }else{
916                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
917                                                         seq.printSequence(rout);
918                                                 }else{ //you are a abund name
919                                                         seq.printSequence(aout);
920                                                 }
921                                         }
922                                 }
923                         }
924                         in.close();
925                         if (rareNames.size() != 0)      { rout.close(); }
926                         if (abundNames.size() != 0) { aout.close(); }
927
928                 }else{ //parse names by abundance and group
929                         string fileroot =  outputDir + getRootName(getSimpleName(fastafile));
930                         ofstream* temp;
931                         ofstream* temp2;
932                         map<string, bool> wroteFile;
933                         map<string, ofstream*> filehandles;
934                         map<string, ofstream*>::iterator it3;
935
936                         for (int i=0; i<Groups.size(); i++) {
937                                 temp = new ofstream;
938                                 filehandles[Groups[i]+".rare"] = temp;
939                                 temp2 = new ofstream;
940                                 filehandles[Groups[i]+".abund"] = temp2;
941                                 
942                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
943                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
944                                 
945                                 wroteFile[Groups[i] + ".rare"] = false;
946                                 wroteFile[Groups[i] + ".abund"] = false;
947                         }
948                         
949                         //open input file
950                         ifstream in;
951                         openInputFile(fastafile, in);
952         
953                         while (!in.eof()) {
954                                 if (m->control_pressed) { break; }
955                 
956                                 Sequence seq(in); gobble(in);
957                                 
958                                 if (seq.getName() != "") { 
959                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
960                                         
961                                         if (itNames == nameMap.end()) {
962                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
963                                         }else{
964                                                 vector<string> names;
965                                                 splitAtComma(itNames->second, names);  //parses bin into individual sequence names
966                                 
967                                                 string rareAbund;
968                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
969                                                         rareAbund = ".rare";
970                                                 }else{ //you are a abund name
971                                                         rareAbund = ".abund";
972                                                 }
973                                 
974                                                 for (int i = 0; i < names.size(); i++) {
975                                 
976                                                         string group = groupMap->getGroup(seq.getName());
977                                         
978                                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
979                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
980                                                                 wroteFile[group+rareAbund] = true;
981                                                         }else if(group == "not found") {
982                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
983                                                         }
984                                                 }
985                                         }
986                                 }
987                         }
988                         in.close();
989                         
990                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
991                                 (*(filehandles[it3->first])).close();
992                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".fasta");  }
993                                 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
994                                 delete it3->second;
995                         }
996                 }
997                                 
998                 return 0;
999
1000         }
1001         catch(exception& e) {
1002                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1003                 exit(1);
1004         }
1005 }
1006 /**********************************************************************************************************************/
1007