]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
changes to blastdb to make filenames given to blast unique, changes to split.abund...
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option)  {
14         try {
15                 abort = false;
16                 allLines = 1;
17                         
18                 //allow user to run help
19                 if(option == "help") { help(); abort = true; }
20                 
21                 else {
22                         //valid paramters for this command
23                         string Array[] =  {"name","group","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //"list",
24                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
25                         
26                         OptionParser parser(option);
27                         map<string, string> parameters = parser.getParameters();
28                         
29                         ValidParameters validParameter;
30                         map<string, string>::iterator it;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                                                 
37                         //if the user changes the input directory command factory will send this info to us in the output parameter 
38                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
39                         if (inputDir == "not found"){   inputDir = "";          }
40                         else {
41                                 string path;
42                                 it = parameters.find("list");
43                                 //user has given a template file
44                                 if(it != parameters.end()){ 
45                                         path = hasPath(it->second);
46                                         //if the user has not given a path then, add inputdir. else leave path alone.
47                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
48                                 }
49                                 
50                                 it = parameters.find("group");
51                                 //user has given a template file
52                                 if(it != parameters.end()){ 
53                                         path = hasPath(it->second);
54                                         //if the user has not given a path then, add inputdir. else leave path alone.
55                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
56                                 }
57                                 
58                                 it = parameters.find("fasta");
59                                 //user has given a template file
60                                 if(it != parameters.end()){ 
61                                         path = hasPath(it->second);
62                                         //if the user has not given a path then, add inputdir. else leave path alone.
63                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
64                                 }
65                                 
66                                 it = parameters.find("name");
67                                 //user has given a template file
68                                 if(it != parameters.end()){ 
69                                         path = hasPath(it->second);
70                                         //if the user has not given a path then, add inputdir. else leave path alone.
71                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
72                                 }
73
74                         }
75
76                         
77                         //if the user changes the output directory command factory will send this info to us in the output parameter 
78                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
79
80                         //check for required parameters
81                         listfile = validParameter.validFile(parameters, "list", true);
82                         if (listfile == "not open") { abort = true; }
83                         else if (listfile == "not found") { listfile = ""; }
84                         else{ inputFile = listfile; }   
85                         
86                         namefile = validParameter.validFile(parameters, "name", true);
87                         if (namefile == "not open") { abort = true; }
88                         else if (namefile == "not found") { namefile = ""; }    
89                         else{ inputFile = namefile; }   
90                 
91                         fastafile = validParameter.validFile(parameters, "fasta", true);
92                         if (fastafile == "not open") { abort = true; }
93                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
94                         
95                         groupfile = validParameter.validFile(parameters, "group", true);
96                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
97                         else if (groupfile == "not found") { groupfile = ""; }
98                         else {  
99                                 groupMap = new GroupMap(groupfile);
100                                 
101                                 int error = groupMap->readMap();
102                                 if (error == 1) { abort = true; }
103         
104                         }
105                         
106                         groups = validParameter.validFile(parameters, "groups", false);         
107                         if (groups == "not found") { groups = ""; }
108                         else if (groups == "all") { 
109                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
110                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
111                         }else { 
112                                 splitAtDash(groups, Groups);
113                         }
114                         
115                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
116                         
117                         //do you have all files needed
118                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
119                         
120                         //check for optional parameter and set defaults
121                         // ...at some point should added some additional type checking...
122                         label = validParameter.validFile(parameters, "label", false);                   
123                         if (label == "not found") { label = "";  allLines = 1; }
124                         else { 
125                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
126                                 else { allLines = 1;  }
127                         }
128                         
129                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
130                         accnos = isTrue(temp); 
131                         
132                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
133                         convert(temp, cutoff); 
134
135                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
136
137                 }
138
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
142                 exit(1);
143         }
144 }
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
147         try {
148                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149                 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label, groups and accnos.\n");
150                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
156                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
157                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
158                 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159                 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
161
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "SplitAbundCommand", "help");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){ 
170         if (groupfile != "") {  delete groupMap;  } 
171 }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
174         try {
175         
176                 if (abort == true) {    return 0;       }
177                 
178                 if (listfile != "") { //you are using a listfile to determine abundance
179                 
180                         //remove old files so you can append later....
181                         string fileroot = outputDir + getRootName(getSimpleName(listfile));
182                         if (Groups.size() == 0) {
183                                 remove((fileroot + "rare.list").c_str());
184                                 remove((fileroot + "abund.list").c_str());
185                                 
186                                 wroteListFile["rare"] = false;
187                                 wroteListFile["abund"] = false;
188                         }else{
189                                 for (int i=0; i<Groups.size(); i++) {
190                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
191                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
192                         
193                                         wroteListFile[(Groups[i] + ".rare")] = false;
194                                         wroteListFile[(Groups[i] + ".abund")] = false;
195                                 }
196                         }
197                         
198                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199                         set<string> processedLabels;
200                         set<string> userLabels = labels;        
201                         
202                         input = new InputData(listfile, "list");
203                         list = input->getListVector();
204                         string lastLabel = list->getLabel();
205                         
206                         //do you have a namefile or do we need to similate one?
207                         if (namefile != "") {  readNamesFile();         }
208                         else                            { createNameMap(list);  }
209                         
210                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
211                         
212                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
213                         
214                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
215                                 
216                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
217                                                 
218                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
219                                                 splitList(list);
220                                                                                         
221                                                 processedLabels.insert(list->getLabel());
222                                                 userLabels.erase(list->getLabel());
223                                 }
224                                 
225                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
226                                                 string saveLabel = list->getLabel();
227                                                 
228                                                 delete list;
229                                                 list = input->getListVector(lastLabel); //get new list vector to process
230                                                 
231                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
232                                                 splitList(list);
233                                                 
234                                                 processedLabels.insert(list->getLabel());
235                                                 userLabels.erase(list->getLabel());
236                                                 
237                                                 //restore real lastlabel to save below
238                                                 list->setLabel(saveLabel);
239                                 }
240                                 
241                         
242                                 lastLabel = list->getLabel();
243                                         
244                                 delete list;
245                                 list = input->getListVector(); //get new list vector to process
246                         }
247                         
248                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
249                         
250                         //output error messages about any remaining user labels
251                         set<string>::iterator it;
252                         bool needToRun = false;
253                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
254                                 m->mothurOut("Your file does not include the label " + *it); 
255                                 if (processedLabels.count(lastLabel) != 1) {
256                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
257                                         needToRun = true;
258                                 }else {
259                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
260                                 }
261
262                         }
263                         
264                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
265                         
266                         //run last label if you need to
267                         if (needToRun == true)  {
268                                 if (list != NULL) {     delete list;    }
269                                 list = input->getListVector(lastLabel); //get new list vector to process
270                                 
271                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
272                                 splitList(list);                
273                                 
274                                 delete list;
275                         }
276                         
277                         delete input;
278                         
279                         for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
280                                 string filename = fileroot + itBool->first;
281                                 if (itBool->second) { //we wrote to this file
282                                         outputNames.push_back(filename);
283                                 }else{
284                                         remove(filename.c_str());
285                                 }
286                         }
287                         
288                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
289
290                                                                         
291                 }else { //you are using the namefile to determine abundance
292
293                         splitNames(); 
294                         writeNames();
295                         
296                         string tag = "";
297                         if (groupfile != "")                            {  parseGroup(tag);             }
298                         if (accnos)                                                     {  writeAccnos(tag);    }
299                         if (fastafile != "")                            {  parseFasta(tag);             }
300                 }
301
302                 m->mothurOutEndLine();
303                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
304                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
305                 m->mothurOutEndLine();
306                 
307                 return 0;
308         }
309         catch(exception& e) {
310                 m->errorOut(e, "SplitAbundCommand", "execute");
311                 exit(1);
312         }
313 }
314 /**********************************************************************************************************************/
315 int SplitAbundCommand::splitList(ListVector* thisList) {
316         try {
317                 rareNames.clear();
318                 abundNames.clear();
319                 
320                 //get rareNames and abundNames
321                 for (int i = 0; i < thisList->getNumBins(); i++) {
322                         if (m->control_pressed) { return 0; }
323                         
324                         string bin = thisList->get(i);
325                                                 
326                         vector<string> names;
327                         splitAtComma(bin, names);  //parses bin into individual sequence names
328                         int size = names.size();
329                                 
330                         if (size <= cutoff) {
331                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
332                         }else{
333                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
334                         }
335                 }//end for
336
337                 writeList(thisList);
338                 
339                 string tag = thisList->getLabel() + ".";
340                 if (groupfile != "")                            {  parseGroup(tag);             }
341                 if (accnos)                                                     {  writeAccnos(tag);    }
342                 if (fastafile != "")                            {  parseFasta(tag);             }
343                 
344                 return 0;
345
346         }
347         catch(exception& e) {
348                 m->errorOut(e, "SplitAbundCommand", "splitList");
349                 exit(1);
350         }
351 }
352 /**********************************************************************************************************************/
353 int SplitAbundCommand::writeList(ListVector* thisList) { 
354         try {
355                 
356                 map<string, ofstream*> filehandles;
357                 
358                 if (Groups.size() == 0) {
359                         SAbundVector* sabund = new SAbundVector();
360                         *sabund = thisList->getSAbundVector();
361                 
362                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
363                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
364                         int numRareBins = 0;
365                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
366                                 if (i > cutoff) { break; }
367                                 numRareBins += sabund->get(i);
368                         }
369                         int numAbundBins = thisList->getNumBins() - numRareBins;
370                         delete sabund;
371
372                         ofstream aout;
373                         ofstream rout;
374                         
375                         if (rareNames.size() != 0) {
376                                 string rare = outputDir + getRootName(getSimpleName(listfile))  + ".rare.list";
377                                 wroteListFile["rare"] = true;
378                                 openOutputFileAppend(rare, rout);
379                                 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
380                         }
381                         
382                         if (abundNames.size() != 0) {
383                                 string abund = outputDir + getRootName(getSimpleName(listfile))  + ".abund.list";
384                                 wroteListFile["abund"] = true;
385                                 openOutputFileAppend(abund, aout);
386                                 rout << thisList->getLabel() << '\t' << numAbundBins << '\t';
387                         }
388
389                         for (int i = 0; i < thisList->getNumBins(); i++) {
390                                 if (m->control_pressed) { break; }
391                         
392                                 string bin = list->get(i); 
393                         
394                                 int size = getNumNames(bin);
395                         
396                                 if (size <= cutoff) {  rout << bin << '\t';  }
397                                 else                            {  aout << bin << '\t'; }
398                         }
399                         
400                         if (rareNames.size() != 0)      { rout << endl; rout.close(); }
401                         if (abundNames.size() != 0) { aout << endl; aout.close(); }
402
403                 }else{ //parse names by abundance and group
404                         string fileroot =  outputDir + getRootName(getSimpleName(listfile));
405                         ofstream* temp;
406                         ofstream* temp2;
407                         map<string, bool> wroteFile;
408                         map<string, ofstream*> filehandles;
409                         map<string, ofstream*>::iterator it3;
410
411                         for (int i=0; i<Groups.size(); i++) {
412                                 temp = new ofstream;
413                                 filehandles[Groups[i]+".rare"] = temp;
414                                 temp2 = new ofstream;
415                                 filehandles[Groups[i]+".abund"] = temp2;
416                                 
417                                 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
418                                 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
419                         }
420                         
421                         map<string, string> groupVector;
422                         map<string, string>::iterator itGroup;
423                         map<string, int> groupNumBins;
424                 
425                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
426                                 groupNumBins[it3->first] = 0;
427                                 groupVector[it3->first] = "";
428                         }
429                 
430                         for (int i = 0; i < thisList->getNumBins(); i++) {
431                                 if (m->control_pressed) { break; }
432                         
433                                 map<string, string> groupBins;
434                                 string bin = list->get(i); 
435                         
436                                 vector<string> names;
437                                 splitAtComma(bin, names);  //parses bin into individual sequence names
438                         
439                                 //parse bin into list of sequences in each group
440                                 for (int j = 0; j < names.size(); j++) {
441                                         string rareAbund;
442                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
443                                                 rareAbund = ".rare";
444                                         }else{ //you are a abund name
445                                                 rareAbund = ".abund";
446                                         }
447                                         
448                                         string group = groupMap->getGroup(names[j]);
449                                 
450                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
451                                                 itGroup = groupBins.find(group+rareAbund);
452                                                 if(itGroup == groupBins.end()) {
453                                                         groupBins[group+rareAbund] = names[j];  //add first name
454                                                         groupNumBins[group+rareAbund]++;
455                                                 }else{ //add another name
456                                                         groupBins[group+rareAbund] +=  "," + names[j];
457                                                 }
458                                         }else if(group == "not found") {
459                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
460                                         }
461                                 }
462                         
463                         
464                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
465                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
466                                 }
467                         }
468                         
469                         //end list vector
470                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
471                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
472                                 wroteListFile[it3->first] = true;
473                                 (*(filehandles[it3->first])).close();
474                                 delete it3->second;
475                         }
476                 }
477                 
478                 return 0;
479
480         }
481         catch(exception& e) {
482                 m->errorOut(e, "SplitAbundCommand", "writeList");
483                 exit(1);
484         }
485 }
486 /**********************************************************************************************************************/
487 int SplitAbundCommand::splitNames() { //namefile
488         try {
489                 
490                 rareNames.clear();
491                 abundNames.clear();     
492                         
493                 //open input file
494                 ifstream in;
495                 openInputFile(namefile, in);
496                 
497                 while (!in.eof()) {
498                         if (m->control_pressed) { break; }
499                         
500                         string firstCol, secondCol;
501                         in >> firstCol >> secondCol; gobble(in);
502                         
503                         nameMap[firstCol] = secondCol;
504                         
505                         int size = getNumNames(secondCol);
506                                 
507                         if (size <= cutoff) {
508                                 rareNames.insert(firstCol); 
509                         }else{
510                                 abundNames.insert(firstCol); 
511                         }
512                 }
513                 in.close();
514                                 
515                 return 0;
516
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "SplitAbundCommand", "splitNames");
520                 exit(1);
521         }
522 }
523 /**********************************************************************************************************************/
524 int SplitAbundCommand::readNamesFile() { 
525         try {
526                 //open input file
527                 ifstream in;
528                 openInputFile(namefile, in);
529                 
530                 while (!in.eof()) {
531                         if (m->control_pressed) { break; }
532                         
533                         string firstCol, secondCol;
534                         in >> firstCol >> secondCol; gobble(in);
535                         
536                         nameMap[firstCol] = secondCol;
537                 }
538                 in.close();
539                                 
540                 return 0;
541
542         }
543         catch(exception& e) {
544                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
545                 exit(1);
546         }
547 }
548 /**********************************************************************************************************************/
549 int SplitAbundCommand::createNameMap(ListVector* thisList) {
550         try {
551                 
552                 if (thisList != NULL) {
553                         for (int i = 0; i < thisList->getNumBins(); i++) {
554                                 if (m->control_pressed) { return 0; }
555                                 
556                                 string bin = thisList->get(i);
557                                                         
558                                 vector<string> names;
559                                 splitAtComma(bin, names);  //parses bin into individual sequence names
560                                 
561                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
562                         }//end for
563                 }
564                 
565                 return 0;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
569                 exit(1);
570         }
571 }
572 /**********************************************************************************************************************/
573 int SplitAbundCommand::writeNames() { //namefile
574         try {
575                 
576                 map<string, ofstream*> filehandles;
577
578                 if (Groups.size() == 0) {
579                         ofstream aout;
580                         ofstream rout;
581                         
582                         if (rareNames.size() != 0) {
583                                 string rare = outputDir + getRootName(getSimpleName(namefile))  + "rare.names";
584                                 openOutputFile(rare, rout);
585                                 outputNames.push_back(rare);
586                                 
587                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
588                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
589                                 }
590                                 rout.close();
591                         }
592                         
593                         if (abundNames.size() != 0) {
594                                 string abund = outputDir + getRootName(getSimpleName(namefile))  + "abund.names";
595                                 openOutputFile(abund, aout);
596                                 outputNames.push_back(abund);
597                                 
598                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
599                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
600                                 }
601                                 aout.close();
602                         }
603
604                 }else{ //parse names by abundance and group
605                         string fileroot =  outputDir + getRootName(getSimpleName(namefile));
606                         ofstream* temp;
607                         ofstream* temp2;
608                         map<string, bool> wroteFile;
609                         map<string, ofstream*> filehandles;
610                         map<string, ofstream*>::iterator it3;
611
612                         for (int i=0; i<Groups.size(); i++) {
613                                 temp = new ofstream;
614                                 filehandles[Groups[i]+".rare"] = temp;
615                                 temp2 = new ofstream;
616                                 filehandles[Groups[i]+".abund"] = temp2;
617                                 
618                                 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
619                                 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
620                                 
621                                 wroteFile[Groups[i] + ".rare"] = false;
622                                 wroteFile[Groups[i] + ".abund"] = false;
623                         }
624                         
625                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
626                                 vector<string> names;
627                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
628                                 
629                                 string rareAbund;
630                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
631                                                 rareAbund = ".rare";
632                                 }else{ //you are a abund name
633                                                 rareAbund = ".abund";
634                                 }
635                                 
636                                 map<string, string> outputStrings;
637                                 map<string, string>::iterator itout;
638                                 for (int i = 0; i < names.size(); i++) {
639                                         
640                                         string group = groupMap->getGroup(names[i]);
641                                         
642                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
643                                                 itout = outputStrings.find(group+rareAbund);
644                                                 if (itout == outputStrings.end()) {  
645                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
646                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
647                                         }else if(group == "not found") {
648                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
649                                         }
650                                 }
651                                 
652                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { 
653                                         *(filehandles[itout->first]) << itout->second << endl;
654                                         wroteFile[itout->first] = true;
655                                 }
656                         }
657                         
658                         
659                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
660                                 (*(filehandles[it3->first])).close();
661                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + it3->first + ".names");  }
662                                 else { remove((it3->first).c_str()); }
663                                 delete it3->second;
664                         }
665                 }
666                                 
667                 return 0;
668
669         }
670         catch(exception& e) {
671                 m->errorOut(e, "SplitAbundCommand", "writeNames");
672                 exit(1);
673         }
674 }
675 /**********************************************************************************************************************/
676 //just write the unique names - if a namesfile is given
677 int SplitAbundCommand::writeAccnos(string tag) { 
678         try {
679                 
680                 map<string, ofstream*> filehandles;
681                 
682                 if (Groups.size() == 0) {
683                         ofstream aout;
684                         ofstream rout;
685                         
686                         if (rareNames.size() != 0) {
687                                 string rare = outputDir + getRootName(getSimpleName(inputFile))  + tag + "rare.accnos";
688                                 openOutputFile(rare, rout);
689                                 outputNames.push_back(rare);
690                                 
691                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
692                                         rout << (*itRare) << endl;
693                                 }
694                                 rout.close();
695                         }
696                         
697                         if (abundNames.size() != 0) {
698                                 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag  + "abund.accnos";
699                                 openOutputFile(abund, aout);
700                                 outputNames.push_back(abund);
701                                 
702                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
703                                         aout << (*itAbund) << endl;
704                                 }
705                                 aout.close();
706                         }
707                 }else{ //parse names by abundance and group
708                         string fileroot =  outputDir + getRootName(getSimpleName(inputFile));
709                         ofstream* temp;
710                         ofstream* temp2;
711                         map<string, bool> wroteFile;
712                         map<string, ofstream*> filehandles;
713                         map<string, ofstream*>::iterator it3;
714                         
715                         for (int i=0; i<Groups.size(); i++) {
716                                 temp = new ofstream;
717                                 filehandles[Groups[i]+".rare"] = temp;
718                                 temp2 = new ofstream;
719                                 filehandles[Groups[i]+".abund"] = temp2;
720                                 
721                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
722                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
723                                 
724                                 wroteFile[Groups[i] + ".rare"] = false;
725                                 wroteFile[Groups[i] + ".abund"] = false;
726                         }
727                         
728                         //write rare
729                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
730                                         string group = groupMap->getGroup(*itRare);
731                                         
732                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
733                                                 *(filehandles[group+".rare"]) << *itRare << endl;
734                                                 wroteFile[group+".rare"] = true;
735                                         }
736                         }
737                                 
738                         //write abund   
739                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
740                                         string group = groupMap->getGroup(*itAbund);
741                                         
742                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
743                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
744                                                 wroteFile[group+".abund"] = true;
745                                         }
746                         }
747                         
748                         //close files
749                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
750                                 (*(filehandles[it3->first])).close();
751                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".accnos");  }
752                                 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
753                                 delete it3->second;
754                         }
755                 }
756                                 
757                 return 0;
758
759         }
760         catch(exception& e) {
761                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
762                 exit(1);
763         }
764 }
765 /**********************************************************************************************************************/
766 int SplitAbundCommand::parseGroup(string tag) { //namefile
767         try {
768                 
769                 map<string, ofstream*> filehandles;
770         
771                 if (Groups.size() == 0) {
772                         ofstream aout;
773                         ofstream rout;
774                         
775                         if (rareNames.size() != 0) {
776                                 string rare = outputDir + getRootName(getSimpleName(groupfile))  + tag + "rare.groups";
777                                 openOutputFile(rare, rout);
778                                 outputNames.push_back(rare);
779                         }
780                         
781                         if (abundNames.size() != 0) {
782                                 string abund = outputDir + getRootName(getSimpleName(groupfile))  + tag + "abund.groups";
783                                 openOutputFile(abund, aout);
784                                 outputNames.push_back(abund);
785                         }
786
787                                 
788                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
789                                 vector<string> names;
790                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
791                                 
792                                 for (int i = 0; i < names.size(); i++) {
793                                 
794                                         string group = groupMap->getGroup(names[i]);
795                                 
796                                         if (group == "not found") { 
797                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
798                                         }else {
799                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
800                                                         rout << names[i] << '\t' << group << endl;
801                                                 }else{ //you are a abund name
802                                                         rout << names[i] << '\t' << group << endl;
803                                                 }
804                                         }
805                                 }
806                         }
807                         
808                         if (rareNames.size() != 0)      { rout.close(); }
809                         if (abundNames.size() != 0) { aout.close(); }
810
811                 }else{ //parse names by abundance and group
812                         string fileroot =  outputDir + getRootName(getSimpleName(groupfile));
813                         ofstream* temp;
814                         ofstream* temp2;
815                         map<string, bool> wroteFile;
816                         map<string, ofstream*> filehandles;
817                         map<string, ofstream*>::iterator it3;
818
819                         for (int i=0; i<Groups.size(); i++) {
820                                 temp = new ofstream;
821                                 filehandles[Groups[i]+".rare"] = temp;
822                                 temp2 = new ofstream;
823                                 filehandles[Groups[i]+".abund"] = temp2;
824                                 
825                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
826                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
827                                 
828                                 wroteFile[Groups[i] + ".rare"] = false;
829                                 wroteFile[Groups[i] + ".abund"] = false;
830                         }
831                         
832                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
833                                 vector<string> names;
834                                 splitAtComma(itName->second, names);  //parses bin into individual sequence names
835                                 
836                                 string rareAbund;
837                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
838                                         rareAbund = ".rare";
839                                 }else{ //you are a abund name
840                                         rareAbund = ".abund";
841                                 }
842                                 
843                                 for (int i = 0; i < names.size(); i++) {
844                                 
845                                         string group = groupMap->getGroup(names[i]);
846                                                                         
847                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
848                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
849                                                 wroteFile[group+rareAbund] = true;
850                                         }
851                                 }
852                         }
853                         
854                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
855                                 (*(filehandles[it3->first])).close();
856                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".groups");  }
857                                 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
858                                 delete it3->second;
859                         }
860                 }
861                                 
862                 return 0;
863
864         }
865         catch(exception& e) {
866                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
867                 exit(1);
868         }
869 }
870 /**********************************************************************************************************************/
871 int SplitAbundCommand::parseFasta(string tag) { //namefile
872         try {
873                 
874                 map<string, ofstream*> filehandles;
875                 
876                 if (Groups.size() == 0) {
877                         ofstream aout;
878                         ofstream rout;
879                         
880                         if (rareNames.size() != 0) {
881                                 string rare = outputDir + getRootName(getSimpleName(fastafile))  + tag + "rare.fasta";
882                                 openOutputFile(rare, rout);
883                                 outputNames.push_back(rare);
884                         }
885                         
886                         if (abundNames.size() != 0) {
887                                 string abund = outputDir + getRootName(getSimpleName(fastafile))  + tag + "abund.fasta";
888                                 openOutputFile(abund, aout);
889                                 outputNames.push_back(abund);
890                         }
891
892                                 
893                         //open input file
894                         ifstream in;
895                         openInputFile(fastafile, in);
896         
897                         while (!in.eof()) {
898                                 if (m->control_pressed) { break; }
899                 
900                                 Sequence seq(in); gobble(in);
901                                 
902                                 if (seq.getName() != "") { 
903                                         
904                                         map<string, string>::iterator itNames;
905                                         
906                                         itNames = nameMap.find(seq.getName());
907                                         
908                                         if (itNames == nameMap.end()) {
909                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
910                                         }else{
911                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
912                                                         seq.printSequence(rout);
913                                                 }else{ //you are a abund name
914                                                         seq.printSequence(aout);
915                                                 }
916                                         }
917                                 }
918                         }
919                         in.close();
920                         if (rareNames.size() != 0)      { rout.close(); }
921                         if (abundNames.size() != 0) { aout.close(); }
922
923                 }else{ //parse names by abundance and group
924                         string fileroot =  outputDir + getRootName(getSimpleName(fastafile));
925                         ofstream* temp;
926                         ofstream* temp2;
927                         map<string, bool> wroteFile;
928                         map<string, ofstream*> filehandles;
929                         map<string, ofstream*>::iterator it3;
930
931                         for (int i=0; i<Groups.size(); i++) {
932                                 temp = new ofstream;
933                                 filehandles[Groups[i]+".rare"] = temp;
934                                 temp2 = new ofstream;
935                                 filehandles[Groups[i]+".abund"] = temp2;
936                                 
937                                 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
938                                 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
939                                 
940                                 wroteFile[Groups[i] + ".rare"] = false;
941                                 wroteFile[Groups[i] + ".abund"] = false;
942                         }
943                         
944                         //open input file
945                         ifstream in;
946                         openInputFile(fastafile, in);
947         
948                         while (!in.eof()) {
949                                 if (m->control_pressed) { break; }
950                 
951                                 Sequence seq(in); gobble(in);
952                                 
953                                 if (seq.getName() != "") { 
954                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
955                                         
956                                         if (itNames == nameMap.end()) {
957                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
958                                         }else{
959                                                 vector<string> names;
960                                                 splitAtComma(itNames->second, names);  //parses bin into individual sequence names
961                                 
962                                                 string rareAbund;
963                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
964                                                         rareAbund = ".rare";
965                                                 }else{ //you are a abund name
966                                                         rareAbund = ".abund";
967                                                 }
968                                 
969                                                 for (int i = 0; i < names.size(); i++) {
970                                 
971                                                         string group = groupMap->getGroup(seq.getName());
972                                         
973                                                         if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
974                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
975                                                                 wroteFile[group+rareAbund] = true;
976                                                         }else if(group == "not found") {
977                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
978                                                         }
979                                                 }
980                                         }
981                                 }
982                         }
983                         in.close();
984                         
985                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
986                                 (*(filehandles[it3->first])).close();
987                                 if (wroteFile[it3->first] == true) {  outputNames.push_back(fileroot + tag + it3->first + ".fasta");  }
988                                 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
989                                 delete it3->second;
990                         }
991                 }
992                                 
993                 return 0;
994
995         }
996         catch(exception& e) {
997                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
998                 exit(1);
999         }
1000 }
1001 /**********************************************************************************************************************/
1002