]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
changes while testing 1.11
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option)  {
14         try {
15                 abort = false;
16                 allLines = 1;
17                         
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string, string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                                                 
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("list");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
48                                 }
49                                 
50                                 it = parameters.find("group");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
56                                 }
57                                 
58                                 it = parameters.find("fasta");
59                                 //user has given a template file
60                                 if(it != parameters.end()){ 
61                                         path = hasPath(it->second);
62                                         //if the user has not given a path then, add inputdir. else leave path alone.
63                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
64                                 }
65                                 
66                                 it = parameters.find("name");
67                                 //user has given a template file
68                                 if(it != parameters.end()){ 
69                                         path = hasPath(it->second);
70                                         //if the user has not given a path then, add inputdir. else leave path alone.
71                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
72                                 }
73
74                         }
75
76                         
77                         //if the user changes the output directory command factory will send this info to us in the output parameter 
78                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
79
80                         //check for required parameters
81                         listfile = validParameter.validFile(parameters, "list", true);
82                         if (listfile == "not open") { abort = true; }
83                         else if (listfile == "not found") { listfile = ""; }
84                         else{ inputFile = listfile; }   
85                         
86                         namefile = validParameter.validFile(parameters, "name", true);
87                         if (namefile == "not open") { abort = true; }
88                         else if (namefile == "not found") { namefile = ""; }    
89                         else{ inputFile = namefile; }   
90                 
91                         fastafile = validParameter.validFile(parameters, "fasta", true);
92                         if (fastafile == "not open") { abort = true; }
93                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
94                         
95                         groupfile = validParameter.validFile(parameters, "group", true);
96                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
97                         else if (groupfile == "not found") { groupfile = ""; }
98                         else {  
99                                 groupMap = new GroupMap(groupfile);
100                                 
101                                 int error = groupMap->readMap();
102                                 if (error == 1) { abort = true; }
103         
104                         }
105                         
106                         groups = validParameter.validFile(parameters, "groups", false);         
107                         if (groups == "not found") { groups = ""; }
108                         else if (groups == "all") { 
109                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
110                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
111                         }else { 
112                                 splitAtDash(groups, Groups);
113                         }
114                         
115                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
116                         
117                         //do you have all files needed
118                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
119                         
120                         //check for optional parameter and set defaults
121                         // ...at some point should added some additional type checking...
122                         label = validParameter.validFile(parameters, "label", false);                   
123                         if (label == "not found") { label = "";  allLines = 1; }
124                         else { 
125                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
126                                 else { allLines = 1;  }
127                         }
128                         
129                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
130                         accnos = isTrue(temp); 
131                         
132                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
133                         convert(temp, cutoff); 
134
135                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
136
137                 }
138
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
142                 exit(1);
143         }
144 }
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
147         try {
148                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149                 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label, groups and accnos.\n");
150                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
156                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
157                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
158                 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159                 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
161
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "SplitAbundCommand", "help");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){ 
170         if (groupfile != "") {  delete groupMap;  } 
171 }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
174         try {
175         
176                 if (abort == true) {    return 0;       }
177                 
178                 if (listfile != "") { //you are using a listfile to determine abundance
179                 
180                         //remove old files so you can append later....
181                         string fileroot = outputDir + getRootName(getSimpleName(listfile));
182                         if (Groups.size() == 0) {
183                                 remove((fileroot + "rare.list").c_str());
184                                 remove((fileroot + "abund.list").c_str());
185                                 
186                                 wroteListFile["rare"] = false;
187                                 wroteListFile["abund"] = false;
188                         }else{
189                                 for (int i=0; i<Groups.size(); i++) {
190                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
191                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
192                         
193                                         wroteListFile[(Groups[i] + ".rare")] = false;
194                                         wroteListFile[(Groups[i] + ".abund")] = false;
195                                 }
196                         }
197                         
198                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199                         set<string> processedLabels;
200                         set<string> userLabels = labels;        
201                         
202                         input = new InputData(listfile, "list");
203                         list = input->getListVector();
204                         string lastLabel = list->getLabel();
205                         
206                         //do you have a namefile or do we need to similate one?
207                         if (namefile != "") {  readNamesFile();         }
208                         else                            { createNameMap(list);  }
209                         
210                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
211                         
212                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
213                         
214                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
215                                 
216                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
217                                                 
218                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
219                                                 splitList(list);
220                                                                                         
221                                                 processedLabels.insert(list->getLabel());
222                                                 userLabels.erase(list->getLabel());
223                                 }
224                                 
225                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
226                                                 string saveLabel = list->getLabel();
227                                                 
228                                                 delete list;
229                                                 list = input->getListVector(lastLabel); //get new list vector to process
230                                                 
231                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
232                                                 splitList(list);
233                                                 
234                                                 processedLabels.insert(list->getLabel());
235                                                 userLabels.erase(list->getLabel());
236                                                 
237                                                 //restore real lastlabel to save below
238                                                 list->setLabel(saveLabel);
239                                 }
240                                 
241                         
242                                 lastLabel = list->getLabel();
243                                         
244                                 delete list;
245                                 list = input->getListVector(); //get new list vector to process
246                         }
247                         
248                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
249                         
250                         //output error messages about any remaining user labels
251                         set<string>::iterator it;
252                         bool needToRun = false;
253                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
254                                 m->mothurOut("Your file does not include the label " + *it); 
255                                 if (processedLabels.count(lastLabel) != 1) {
256                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
257                                         needToRun = true;
258                                 }else {
259                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
260                                 }
261
262                         }
263                         
264                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
265                         
266                         //run last label if you need to
267                         if (needToRun == true)  {
268                                 if (list != NULL) {     delete list;    }
269                                 list = input->getListVector(lastLabel); //get new list vector to process
270                                 
271                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
272                                 splitList(list);                
273                                 
274                                 delete list;
275                         }
276                         
277                         delete input;
278                         
279                         for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
280                                 string filename = fileroot + itBool->first;
281                                 if ((itBool->first == "rare") || (itBool->first == "abund")) {
282                                         filename = fileroot + itBool->first + ".list";
283                                 }
284                                 if (itBool->second) { //we wrote to this file
285                                         outputNames.push_back(filename);
286                                 }else{
287                                         remove(filename.c_str());
288                                 }
289                         }
290                         
291                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
292
293                                                                         
294                 }else { //you are using the namefile to determine abundance
295
296                         splitNames(); 
297                         writeNames();
298                         
299                         string tag = "";
300                         if (groupfile != "")                            {  parseGroup(tag);             }
301                         if (accnos)                                                     {  writeAccnos(tag);    }
302                         if (fastafile != "")                            {  parseFasta(tag);             }
303                 }
304
305                 m->mothurOutEndLine();
306                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
307                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
308                 m->mothurOutEndLine();
309                 
310                 return 0;
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "SplitAbundCommand", "execute");
314                 exit(1);
315         }
316 }
317 /**********************************************************************************************************************/
318 int SplitAbundCommand::splitList(ListVector* thisList) {
319         try {
320                 rareNames.clear();
321                 abundNames.clear();
322                 
323                 //get rareNames and abundNames
324                 for (int i = 0; i < thisList->getNumBins(); i++) {
325                         if (m->control_pressed) { return 0; }
326                         
327                         string bin = thisList->get(i);
328                                                 
329                         vector<string> names;
330                         splitAtComma(bin, names);  //parses bin into individual sequence names
331                         int size = names.size();
332                                 
333                         if (size <= cutoff) {
334                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
335                         }else{
336                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
337                         }
338                 }//end for
339
340                 writeList(thisList);
341                 
342                 string tag = thisList->getLabel() + ".";
343                 if (groupfile != "")                            {  parseGroup(tag);             }
344                 if (accnos)                                                     {  writeAccnos(tag);    }
345                 if (fastafile != "")                            {  parseFasta(tag);             }
346                 
347                 return 0;
348
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "SplitAbundCommand", "splitList");
352                 exit(1);
353         }
354 }
355 /**********************************************************************************************************************/
356 int SplitAbundCommand::writeList(ListVector* thisList) { 
357         try {
358                 
359                 map<string, ofstream*> filehandles;
360                 
361                 if (Groups.size() == 0) {
362                         SAbundVector* sabund = new SAbundVector();
363                         *sabund = thisList->getSAbundVector();
364                 
365                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
366                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
367                         int numRareBins = 0;
368                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
369                                 if (i > cutoff) { break; }
370                                 numRareBins += sabund->get(i);
371                         }
372                         int numAbundBins = thisList->getNumBins() - numRareBins;
373                         delete sabund;
374
375                         ofstream aout;
376                         ofstream rout;
377                         
378                         if (rareNames.size() != 0) {
379                                 string rare = outputDir + getRootName(getSimpleName(listfile))  + "rare.list";
380                                 wroteListFile["rare"] = true;
381                                 openOutputFileAppend(rare, rout);
382                                 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
383                         }
384                         
385                         if (abundNames.size() != 0) {
386                                 string abund = outputDir + getRootName(getSimpleName(listfile))  + "abund.list";
387                                 wroteListFile["abund"] = true;
388                                 openOutputFileAppend(abund, aout);
389                                 aout << thisList->getLabel() << '\t' << numAbundBins << '\t';
390                         }
391
392                         for (int i = 0; i < thisList->getNumBins(); i++) {
393                                 if (m->control_pressed) { break; }
394                         
395                                 string bin = list->get(i); 
396                         
397                                 int size = getNumNames(bin);
398                         
399                                 if (size <= cutoff) {  rout << bin << '\t';  }
400                                 else                            {  aout << bin << '\t'; }
401                         }
402                         
403                         if (rareNames.size() != 0)      { rout << endl; rout.close(); }
404                         if (abundNames.size() != 0) { aout << endl; aout.close(); }
405
406                 }else{ //parse names by abundance and group
407                         string fileroot =  outputDir + getRootName(getSimpleName(listfile));
408                         ofstream* temp;
409                         ofstream* temp2;
410                         map<string, bool> wroteFile;
411                         map<string, ofstream*> filehandles;
412                         map<string, ofstream*>::iterator it3;
413
414                         for (int i=0; i<Groups.size(); i++) {
415                                 temp = new ofstream;
416                                 filehandles[Groups[i]+".rare"] = temp;
417                                 temp2 = new ofstream;
418                                 filehandles[Groups[i]+".abund"] = temp2;
419                                 
420                                 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
421                                 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
422                         }
423                         
424                         map<string, string> groupVector;
425                         map<string, string>::iterator itGroup;
426                         map<string, int> groupNumBins;
427                 
428                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
429                                 groupNumBins[it3->first] = 0;
430                                 groupVector[it3->first] = "";
431                         }
432                 
433                         for (int i = 0; i < thisList->getNumBins(); i++) {
434                                 if (m->control_pressed) { break; }
435                         
436                                 map<string, string> groupBins;
437                                 string bin = list->get(i); 
438                         
439                                 vector<string> names;
440                                 splitAtComma(bin, names);  //parses bin into individual sequence names
441                         
442                                 //parse bin into list of sequences in each group
443                                 for (int j = 0; j < names.size(); j++) {
444                                         string rareAbund;
445                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
446                                                 rareAbund = ".rare";
447                                         }else{ //you are a abund name
448                                                 rareAbund = ".abund";
449                                         }
450                                         
451                                         string group = groupMap->getGroup(names[j]);
452                                 
453                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
454                                                 itGroup = groupBins.find(group+rareAbund);
455                                                 if(itGroup == groupBins.end()) {
456                                                         groupBins[group+rareAbund] = names[j];  //add first name
457                                                         groupNumBins[group+rareAbund]++;
458                                                 }else{ //add another name
459                                                         groupBins[group+rareAbund] +=  "," + names[j];
460                                                 }
461                                         }else if(group == "not found") {
462                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
463                                         }
464                                 }
465                         
466                         
467                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
468                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
469                                 }
470                         }
471                         
472                         //end list vector
473                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
474                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
475                                 wroteListFile[it3->first] = true;
476                                 (*(filehandles[it3->first])).close();
477                                 delete it3->second;
478                         }
479                 }
480                 
481                 return 0;
482
483         }
484         catch(exception& e) {
485                 m->errorOut(e, "SplitAbundCommand", "writeList");
486                 exit(1);
487         }
488 }
489 /**********************************************************************************************************************/
490 int SplitAbundCommand::splitNames() { //namefile
491         try {
492                 
493                 rareNames.clear();
494                 abundNames.clear();     
495                         
496                 //open input file
497                 ifstream in;
498                 openInputFile(namefile, in);
499                 
500                 while (!in.eof()) {
501                         if (m->control_pressed) { break; }
502                         
503                         string firstCol, secondCol;
504                         in >> firstCol >> secondCol; gobble(in);
505                         
506                         nameMap[firstCol] = secondCol;
507                         
508                         int size = getNumNames(secondCol);
509                                 
510                         if (size <= cutoff) {
511                                 rareNames.insert(firstCol); 
512                         }else{
513                                 abundNames.insert(firstCol); 
514                         }
515                 }
516                 in.close();
517                                 
518                 return 0;
519
520         }
521         catch(exception& e) {
522                 m->errorOut(e, "SplitAbundCommand", "splitNames");
523                 exit(1);
524         }
525 }
526 /**********************************************************************************************************************/
527 int SplitAbundCommand::readNamesFile() { 
528         try {
529                 //open input file
530                 ifstream in;
531                 openInputFile(namefile, in);
532                 
533                 while (!in.eof()) {
534                         if (m->control_pressed) { break; }
535                         
536                         string firstCol, secondCol;
537                         in >> firstCol >> secondCol; gobble(in);
538                         
539                         nameMap[firstCol] = secondCol;
540                 }
541                 in.close();
542                                 
543                 return 0;
544
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
548                 exit(1);
549         }
550 }
551 /**********************************************************************************************************************/
552 int SplitAbundCommand::createNameMap(ListVector* thisList) {
553         try {
554                 
555                 if (thisList != NULL) {
556                         for (int i = 0; i < thisList->getNumBins(); i++) {
557                                 if (m->control_pressed) { return 0; }
558                                 
559                                 string bin = thisList->get(i);
560                                                         
561                                 vector<string> names;
562                                 splitAtComma(bin, names);  //parses bin into individual sequence names
563                                 
564                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
565                         }//end for
566                 }
567                 
568                 return 0;
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
572                 exit(1);
573         }
574 }
575 /**********************************************************************************************************************/
576 int SplitAbundCommand::writeNames() { //namefile
577         try {
578                 
579                 map<string, ofstream*> filehandles;
580
581                 if (Groups.size() == 0) {
582                         ofstream aout;
583                         ofstream rout;
584                         
585                         if (rareNames.size() != 0) {
586                                 string rare = outputDir + getRootName(getSimpleName(namefile))  + "rare.names";
587                                 openOutputFile(rare, rout);
588                                 outputNames.push_back(rare);
589                                 
590                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
591                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
592                                 }
593                                 rout.close();
594                         }
595                         
596                         if (abundNames.size() != 0) {
597                                 string abund = outputDir + getRootName(getSimpleName(namefile))  + "abund.names";
598                                 openOutputFile(abund, aout);
599                                 outputNames.push_back(abund);
600                                 
601                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
602                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
603                                 }
604                                 aout.close();
605                         }
606
607                 }else{ //parse names by abundance and group
608                         string fileroot =  outputDir + getRootName(getSimpleName(namefile));
609                         ofstream* temp;
610                         ofstream* temp2;
611                         map<string, bool> wroteFile;
612                         map<string, ofstream*> filehandles;
613                         map<string, ofstream*>::iterator it3;
614
615                         for (int i=0; i<Groups.size(); i++) {
616                                 temp = new ofstream;
617                                 filehandles[Groups[i]+".rare"] = temp;
618                                 temp2 = new ofstream;
619                                 filehandles[Groups[i]+".abund"] = temp2;
620                                 
621                                 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
622                                 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
623                                 
624                                 wroteFile[Groups[i] + ".rare"] = false;
625                                 wroteFile[Groups[i] + ".abund"] = false;
626                         }
627                         
628                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
629                                 vector<string> names;
630                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
631                                 
632                                 string rareAbund;
633                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
634                                                 rareAbund = ".rare";
635                                 }else{ //you are a abund name
636                                                 rareAbund = ".abund";
637                                 }
638                                 
639                                 map<string, string> outputStrings;
640                                 map<string, string>::iterator itout;
641                                 for (int i = 0; i < names.size(); i++) {
642                                         
643                                         string group = groupMap->getGroup(names[i]);
644                                         
645                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
646                                                 itout = outputStrings.find(group+rareAbund);
647                                                 if (itout == outputStrings.end()) {  
648                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
649                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
650                                         }else if(group == "not found") {
651                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
652                                         }
653                                 }
654                                 
655                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { 
656                                         *(filehandles[itout->first]) << itout->second << endl;
657                                         wroteFile[itout->first] = true;
658                                 }
659                         }
660                         
661                         
662                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
663                                 (*(filehandles[it3->first])).close();
664                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + it3->first + ".names");  }
665                                 else { remove((it3->first).c_str()); }
666                                 delete it3->second;
667                         }
668                 }
669                                 
670                 return 0;
671
672         }
673         catch(exception& e) {
674                 m->errorOut(e, "SplitAbundCommand", "writeNames");
675                 exit(1);
676         }
677 }
678 /**********************************************************************************************************************/
679 //just write the unique names - if a namesfile is given
680 int SplitAbundCommand::writeAccnos(string tag) { 
681         try {
682                 
683                 map<string, ofstream*> filehandles;
684                 
685                 if (Groups.size() == 0) {
686                         ofstream aout;
687                         ofstream rout;
688                         
689                         if (rareNames.size() != 0) {
690                                 string rare = outputDir + getRootName(getSimpleName(inputFile))  + tag + "rare.accnos";
691                                 openOutputFile(rare, rout);
692                                 outputNames.push_back(rare);
693                                 
694                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
695                                         rout << (*itRare) << endl;
696                                 }
697                                 rout.close();
698                         }
699                         
700                         if (abundNames.size() != 0) {
701                                 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag  + "abund.accnos";
702                                 openOutputFile(abund, aout);
703                                 outputNames.push_back(abund);
704                                 
705                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
706                                         aout << (*itAbund) << endl;
707                                 }
708                                 aout.close();
709                         }
710                 }else{ //parse names by abundance and group
711                         string fileroot =  outputDir + getRootName(getSimpleName(inputFile));
712                         ofstream* temp;
713                         ofstream* temp2;
714                         map<string, bool> wroteFile;
715                         map<string, ofstream*> filehandles;
716                         map<string, ofstream*>::iterator it3;
717                         
718                         for (int i=0; i<Groups.size(); i++) {
719                                 temp = new ofstream;
720                                 filehandles[Groups[i]+".rare"] = temp;
721                                 temp2 = new ofstream;
722                                 filehandles[Groups[i]+".abund"] = temp2;
723                                 
724                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
725                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
726                                 
727                                 wroteFile[Groups[i] + ".rare"] = false;
728                                 wroteFile[Groups[i] + ".abund"] = false;
729                         }
730                         
731                         //write rare
732                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
733                                         string group = groupMap->getGroup(*itRare);
734                                         
735                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
736                                                 *(filehandles[group+".rare"]) << *itRare << endl;
737                                                 wroteFile[group+".rare"] = true;
738                                         }
739                         }
740                                 
741                         //write abund   
742                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
743                                         string group = groupMap->getGroup(*itAbund);
744                                         
745                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
746                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
747                                                 wroteFile[group+".abund"] = true;
748                                         }
749                         }
750                         
751                         //close files
752                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
753                                 (*(filehandles[it3->first])).close();
754                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".accnos");  }
755                                 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
756                                 delete it3->second;
757                         }
758                 }
759                                 
760                 return 0;
761
762         }
763         catch(exception& e) {
764                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
765                 exit(1);
766         }
767 }
768 /**********************************************************************************************************************/
769 int SplitAbundCommand::parseGroup(string tag) { //namefile
770         try {
771                 
772                 map<string, ofstream*> filehandles;
773         
774                 if (Groups.size() == 0) {
775                         ofstream aout;
776                         ofstream rout;
777                         
778                         if (rareNames.size() != 0) {
779                                 string rare = outputDir + getRootName(getSimpleName(groupfile))  + tag + "rare.groups";
780                                 openOutputFile(rare, rout);
781                                 outputNames.push_back(rare);
782                         }
783                         
784                         if (abundNames.size() != 0) {
785                                 string abund = outputDir + getRootName(getSimpleName(groupfile))  + tag + "abund.groups";
786                                 openOutputFile(abund, aout);
787                                 outputNames.push_back(abund);
788                         }
789
790                                 
791                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
792                                 vector<string> names;
793                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
794                                 
795                                 for (int i = 0; i < names.size(); i++) {
796                                 
797                                         string group = groupMap->getGroup(names[i]);
798                                 
799                                         if (group == "not found") { 
800                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
801                                         }else {
802                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
803                                                         rout << names[i] << '\t' << group << endl;
804                                                 }else{ //you are a abund name
805                                                         rout << names[i] << '\t' << group << endl;
806                                                 }
807                                         }
808                                 }
809                         }
810                         
811                         if (rareNames.size() != 0)      { rout.close(); }
812                         if (abundNames.size() != 0) { aout.close(); }
813
814                 }else{ //parse names by abundance and group
815                         string fileroot =  outputDir + getRootName(getSimpleName(groupfile));
816                         ofstream* temp;
817                         ofstream* temp2;
818                         map<string, bool> wroteFile;
819                         map<string, ofstream*> filehandles;
820                         map<string, ofstream*>::iterator it3;
821
822                         for (int i=0; i<Groups.size(); i++) {
823                                 temp = new ofstream;
824                                 filehandles[Groups[i]+".rare"] = temp;
825                                 temp2 = new ofstream;
826                                 filehandles[Groups[i]+".abund"] = temp2;
827                                 
828                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
829                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
830                                 
831                                 wroteFile[Groups[i] + ".rare"] = false;
832                                 wroteFile[Groups[i] + ".abund"] = false;
833                         }
834                         
835                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
836                                 vector<string> names;
837                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
838                                 
839                                 string rareAbund;
840                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
841                                         rareAbund = ".rare";
842                                 }else{ //you are a abund name
843                                         rareAbund = ".abund";
844                                 }
845                                 
846                                 for (int i = 0; i < names.size(); i++) {
847                                 
848                                         string group = groupMap->getGroup(names[i]);
849                                                                         
850                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
851                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
852                                                 wroteFile[group+rareAbund] = true;
853                                         }
854                                 }
855                         }
856                         
857                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
858                                 (*(filehandles[it3->first])).close();
859                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".groups");  }
860                                 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
861                                 delete it3->second;
862                         }
863                 }
864                                 
865                 return 0;
866
867         }
868         catch(exception& e) {
869                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
870                 exit(1);
871         }
872 }
873 /**********************************************************************************************************************/
874 int SplitAbundCommand::parseFasta(string tag) { //namefile
875         try {
876                 
877                 map<string, ofstream*> filehandles;
878                 
879                 if (Groups.size() == 0) {
880                         ofstream aout;
881                         ofstream rout;
882                         
883                         if (rareNames.size() != 0) {
884                                 string rare = outputDir + getRootName(getSimpleName(fastafile))  + tag + "rare.fasta";
885                                 openOutputFile(rare, rout);
886                                 outputNames.push_back(rare);
887                         }
888                         
889                         if (abundNames.size() != 0) {
890                                 string abund = outputDir + getRootName(getSimpleName(fastafile))  + tag + "abund.fasta";
891                                 openOutputFile(abund, aout);
892                                 outputNames.push_back(abund);
893                         }
894
895                                 
896                         //open input file
897                         ifstream in;
898                         openInputFile(fastafile, in);
899         
900                         while (!in.eof()) {
901                                 if (m->control_pressed) { break; }
902                 
903                                 Sequence seq(in); gobble(in);
904                                 
905                                 if (seq.getName() != "") { 
906                                         
907                                         map<string, string>::iterator itNames;
908                                         
909                                         itNames = nameMap.find(seq.getName());
910                                         
911                                         if (itNames == nameMap.end()) {
912                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
913                                         }else{
914                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
915                                                         seq.printSequence(rout);
916                                                 }else{ //you are a abund name
917                                                         seq.printSequence(aout);
918                                                 }
919                                         }
920                                 }
921                         }
922                         in.close();
923                         if (rareNames.size() != 0)      { rout.close(); }
924                         if (abundNames.size() != 0) { aout.close(); }
925
926                 }else{ //parse names by abundance and group
927                         string fileroot =  outputDir + getRootName(getSimpleName(fastafile));
928                         ofstream* temp;
929                         ofstream* temp2;
930                         map<string, bool> wroteFile;
931                         map<string, ofstream*> filehandles;
932                         map<string, ofstream*>::iterator it3;
933
934                         for (int i=0; i<Groups.size(); i++) {
935                                 temp = new ofstream;
936                                 filehandles[Groups[i]+".rare"] = temp;
937                                 temp2 = new ofstream;
938                                 filehandles[Groups[i]+".abund"] = temp2;
939                                 
940                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
941                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
942                                 
943                                 wroteFile[Groups[i] + ".rare"] = false;
944                                 wroteFile[Groups[i] + ".abund"] = false;
945                         }
946                         
947                         //open input file
948                         ifstream in;
949                         openInputFile(fastafile, in);
950         
951                         while (!in.eof()) {
952                                 if (m->control_pressed) { break; }
953                 
954                                 Sequence seq(in); gobble(in);
955                                 
956                                 if (seq.getName() != "") { 
957                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
958                                         
959                                         if (itNames == nameMap.end()) {
960                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
961                                         }else{
962                                                 vector<string> names;
963                                                 splitAtComma(itNames->second, names);  //parses bin into individual sequence names
964                                 
965                                                 string rareAbund;
966                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
967                                                         rareAbund = ".rare";
968                                                 }else{ //you are a abund name
969                                                         rareAbund = ".abund";
970                                                 }
971                                 
972                                                 for (int i = 0; i < names.size(); i++) {
973                                 
974                                                         string group = groupMap->getGroup(seq.getName());
975                                         
976                                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
977                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
978                                                                 wroteFile[group+rareAbund] = true;
979                                                         }else if(group == "not found") {
980                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
981                                                         }
982                                                 }
983                                         }
984                                 }
985                         }
986                         in.close();
987                         
988                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
989                                 (*(filehandles[it3->first])).close();
990                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".fasta");  }
991                                 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
992                                 delete it3->second;
993                         }
994                 }
995                                 
996                 return 0;
997
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1001                 exit(1);
1002         }
1003 }
1004 /**********************************************************************************************************************/
1005