]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){ 
14         try {
15                 string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){ 
26         try {
27                 abort = true; calledHelp = true; 
28                 vector<string> tempOutNames;
29                 outputTypes["list"] = tempOutNames;
30                 outputTypes["name"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32                 outputTypes["group"] = tempOutNames;
33                 outputTypes["fasta"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SplitAbundCommand::getRequiredParameters(){      
42         try {
43                 string Array[] =  {"fasta","list","name","or"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SplitAbundCommand::getRequiredFiles(){   
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 SplitAbundCommand::SplitAbundCommand(string option)  {
65         try {
66                 abort = false; calledHelp = false;   
67                 allLines = 1;
68                         
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; calledHelp = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string, string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string, string>::iterator it;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["list"] = tempOutNames;
91                         outputTypes["name"] = tempOutNames;
92                         outputTypes["accnos"] = tempOutNames;
93                         outputTypes["group"] = tempOutNames;
94                         outputTypes["fasta"] = tempOutNames;                    
95                                                                                                 
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("list");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("group");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
115                                 }
116                                 
117                                 it = parameters.find("fasta");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
123                                 }
124                                 
125                                 it = parameters.find("name");
126                                 //user has given a template file
127                                 if(it != parameters.end()){ 
128                                         path = m->hasPath(it->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
131                                 }
132
133                         }
134
135                         
136                         //if the user changes the output directory command factory will send this info to us in the output parameter 
137                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
138
139                         //check for required parameters
140                         listfile = validParameter.validFile(parameters, "list", true);
141                         if (listfile == "not open") { abort = true; }
142                         else if (listfile == "not found") { listfile = ""; }
143                         else{ inputFile = listfile; }   
144                         
145                         namefile = validParameter.validFile(parameters, "name", true);
146                         if (namefile == "not open") { abort = true; }
147                         else if (namefile == "not found") { namefile = ""; }    
148                         else{ inputFile = namefile; }   
149                 
150                         fastafile = validParameter.validFile(parameters, "fasta", true);
151                         if (fastafile == "not open") { abort = true; }
152                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
153                         
154                         groupfile = validParameter.validFile(parameters, "group", true);
155                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
156                         else if (groupfile == "not found") { groupfile = ""; }
157                         else {  
158                                 groupMap = new GroupMap(groupfile);
159                                 
160                                 int error = groupMap->readMap();
161                                 if (error == 1) { abort = true; }
162         
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);         
166                         if (groups == "not found") { groups = ""; }
167                         else if (groups == "all") { 
168                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
169                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
170                         }else { 
171                                 m->splitAtDash(groups, Groups);
172                         }
173                         
174                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
175                         
176                         //do you have all files needed
177                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = "";  allLines = 1; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
189                         accnos = m->isTrue(temp); 
190                         
191                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
192                         convert(temp, cutoff); 
193
194                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
195
196                 }
197
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205 void SplitAbundCommand::help(){
206         try {
207                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
208                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
209                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
210                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
211                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
212                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
213                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
214                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
215                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
216                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
217                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
218                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
219                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "SplitAbundCommand", "help");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228 SplitAbundCommand::~SplitAbundCommand(){ 
229         if (groupfile != "") {  delete groupMap;  } 
230 }
231 //**********************************************************************************************************************
232 int SplitAbundCommand::execute(){
233         try {
234         
235                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
236                 
237                 if (listfile != "") { //you are using a listfile to determine abundance
238                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
239                         
240                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
241                         set<string> processedLabels;
242                         set<string> userLabels = labels;        
243                         
244                         input = new InputData(listfile, "list");
245                         list = input->getListVector();
246                         string lastLabel = list->getLabel();
247                         
248                         //do you have a namefile or do we need to similate one?
249                         if (namefile != "") {  readNamesFile();         }
250                         else                            { createNameMap(list);  }
251                         
252                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
253                         
254                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
255                         
256                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
257                                 
258                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
259                                                 
260                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
261                                                 splitList(list);
262                                                                                         
263                                                 processedLabels.insert(list->getLabel());
264                                                 userLabels.erase(list->getLabel());
265                                 }
266                                 
267                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
268                                                 string saveLabel = list->getLabel();
269                                                 
270                                                 delete list;
271                                                 list = input->getListVector(lastLabel); //get new list vector to process
272                                                 
273                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
274                                                 splitList(list);
275                                                 
276                                                 processedLabels.insert(list->getLabel());
277                                                 userLabels.erase(list->getLabel());
278                                                 
279                                                 //restore real lastlabel to save below
280                                                 list->setLabel(saveLabel);
281                                 }
282                                 
283                         
284                                 lastLabel = list->getLabel();
285                                         
286                                 delete list;
287                                 list = input->getListVector(); //get new list vector to process
288                         }
289                         
290                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
291                         
292                         //output error messages about any remaining user labels
293                         set<string>::iterator it;
294                         bool needToRun = false;
295                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
296                                 m->mothurOut("Your file does not include the label " + *it); 
297                                 if (processedLabels.count(lastLabel) != 1) {
298                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
299                                         needToRun = true;
300                                 }else {
301                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
302                                 }
303
304                         }
305                         
306                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
307                         
308                         //run last label if you need to
309                         if (needToRun == true)  {
310                                 if (list != NULL) {     delete list;    }
311                                 list = input->getListVector(lastLabel); //get new list vector to process
312                                 
313                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
314                                 splitList(list);                
315                                 
316                                 delete list;
317                         }
318                         
319                         delete input;
320                         
321                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
322                                                                         
323                 }else { //you are using the namefile to determine abundance
324                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
325                         
326                         splitNames(); 
327                         writeNames();
328                         
329                         string tag = "";
330                         if (groupfile != "")                            {  parseGroup(tag);             }
331                         if (accnos)                                                     {  writeAccnos(tag);    }
332                         if (fastafile != "")                            {  parseFasta(tag);             }
333                 }
334                 
335                 m->mothurOutEndLine();
336                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
337                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
338                 m->mothurOutEndLine();
339                 
340                 return 0;
341         }
342         catch(exception& e) {
343                 m->errorOut(e, "SplitAbundCommand", "execute");
344                 exit(1);
345         }
346 }
347 /**********************************************************************************************************************/
348 int SplitAbundCommand::splitList(ListVector* thisList) {
349         try {
350                 rareNames.clear();
351                 abundNames.clear();
352                 
353                 //get rareNames and abundNames
354                 for (int i = 0; i < thisList->getNumBins(); i++) {
355                         if (m->control_pressed) { return 0; }
356                         
357                         string bin = thisList->get(i);
358                                                 
359                         vector<string> names;
360                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
361                         int size = names.size();
362                                 
363                         if (size <= cutoff) {
364                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
365                         }else{
366                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
367                         }
368                 }//end for
369
370                 
371                 string tag = thisList->getLabel() + ".";
372                 
373                 writeList(thisList, tag);
374                 
375                 if (groupfile != "")                            {  parseGroup(tag);             }
376                 if (accnos)                                                     {  writeAccnos(tag);    }
377                 if (fastafile != "")                            {  parseFasta(tag);             }
378                 
379                 return 0;
380
381         }
382         catch(exception& e) {
383                 m->errorOut(e, "SplitAbundCommand", "splitList");
384                 exit(1);
385         }
386 }
387 /**********************************************************************************************************************/
388 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
389         try {
390                 
391                 map<string, ofstream*> filehandles;
392                 
393                 if (Groups.size() == 0) {
394                         SAbundVector* sabund = new SAbundVector();
395                         *sabund = thisList->getSAbundVector();
396                 
397                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
398                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
399                         int numRareBins = 0;
400                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
401                                 if (i > cutoff) { break; }
402                                 numRareBins += sabund->get(i);
403                         }
404                         int numAbundBins = thisList->getNumBins() - numRareBins;
405                         delete sabund;
406
407                         ofstream aout;
408                         ofstream rout;
409                         
410                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
411                         m->openOutputFile(rare, rout);
412                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
413                         
414                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
415                         m->openOutputFile(abund, aout);
416                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
417
418                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
419                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
420
421                         for (int i = 0; i < thisList->getNumBins(); i++) {
422                                 if (m->control_pressed) { break; }
423                         
424                                 string bin = list->get(i); 
425                         
426                                 int size = m->getNumNames(bin);
427                         
428                                 if (size <= cutoff) {  rout << bin << '\t';  }
429                                 else                            {  aout << bin << '\t'; }
430                         }
431                         
432                         if (rareNames.size() != 0)      { rout << endl; }
433                         if (abundNames.size() != 0) { aout << endl; }
434                         
435                         rout.close();
436                         aout.close();
437                         
438                 }else{ //parse names by abundance and group
439                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
440                         ofstream* temp;
441                         ofstream* temp2;
442                         //map<string, bool> wroteFile;
443                         map<string, ofstream*> filehandles;
444                         map<string, ofstream*>::iterator it3;
445
446                         for (int i=0; i<Groups.size(); i++) {
447                                 temp = new ofstream;
448                                 filehandles[Groups[i]+".rare"] = temp;
449                                 temp2 = new ofstream;
450                                 filehandles[Groups[i]+".abund"] = temp2;
451                                 
452                                 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
453                                 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
454                                 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
455                                 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
456                         }
457                         
458                         map<string, string> groupVector;
459                         map<string, string>::iterator itGroup;
460                         map<string, int> groupNumBins;
461                 
462                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
463                                 groupNumBins[it3->first] = 0;
464                                 groupVector[it3->first] = "";
465                         }
466                 
467                         for (int i = 0; i < thisList->getNumBins(); i++) {
468                                 if (m->control_pressed) { break; }
469                         
470                                 map<string, string> groupBins;
471                                 string bin = list->get(i); 
472                         
473                                 vector<string> names;
474                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
475                         
476                                 //parse bin into list of sequences in each group
477                                 for (int j = 0; j < names.size(); j++) {
478                                         string rareAbund;
479                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
480                                                 rareAbund = ".rare";
481                                         }else{ //you are a abund name
482                                                 rareAbund = ".abund";
483                                         }
484                                         
485                                         string group = groupMap->getGroup(names[j]);
486                                 
487                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
488                                                 itGroup = groupBins.find(group+rareAbund);
489                                                 if(itGroup == groupBins.end()) {
490                                                         groupBins[group+rareAbund] = names[j];  //add first name
491                                                         groupNumBins[group+rareAbund]++;
492                                                 }else{ //add another name
493                                                         groupBins[group+rareAbund] +=  "," + names[j];
494                                                 }
495                                         }else if(group == "not found") {
496                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
497                                         }
498                                 }
499                         
500                         
501                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
502                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
503                                 }
504                         }
505                         
506                         //end list vector
507                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
508                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
509                                 (*(filehandles[it3->first])).close();
510                                 delete it3->second;
511                         }
512                 }
513                 
514                 return 0;
515
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "SplitAbundCommand", "writeList");
519                 exit(1);
520         }
521 }
522 /**********************************************************************************************************************/
523 int SplitAbundCommand::splitNames() { //namefile
524         try {
525                 
526                 rareNames.clear();
527                 abundNames.clear();     
528                         
529                 //open input file
530                 ifstream in;
531                 m->openInputFile(namefile, in);
532                 
533                 while (!in.eof()) {
534                         if (m->control_pressed) { break; }
535                         
536                         string firstCol, secondCol;
537                         in >> firstCol >> secondCol; m->gobble(in);
538                         
539                         nameMap[firstCol] = secondCol;
540                         
541                         int size = m->getNumNames(secondCol);
542                                 
543                         if (size <= cutoff) {
544                                 rareNames.insert(firstCol); 
545                         }else{
546                                 abundNames.insert(firstCol); 
547                         }
548                 }
549                 in.close();
550                                 
551                 return 0;
552
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "SplitAbundCommand", "splitNames");
556                 exit(1);
557         }
558 }
559 /**********************************************************************************************************************/
560 int SplitAbundCommand::readNamesFile() { 
561         try {
562                 //open input file
563                 ifstream in;
564                 m->openInputFile(namefile, in);
565                 
566                 while (!in.eof()) {
567                         if (m->control_pressed) { break; }
568                         
569                         string firstCol, secondCol;
570                         in >> firstCol >> secondCol; m->gobble(in);
571                         
572                         nameMap[firstCol] = secondCol;
573                 }
574                 in.close();
575                                 
576                 return 0;
577
578         }
579         catch(exception& e) {
580                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
581                 exit(1);
582         }
583 }
584 /**********************************************************************************************************************/
585 int SplitAbundCommand::createNameMap(ListVector* thisList) {
586         try {
587                 
588                 if (thisList != NULL) {
589                         for (int i = 0; i < thisList->getNumBins(); i++) {
590                                 if (m->control_pressed) { return 0; }
591                                 
592                                 string bin = thisList->get(i);
593                                                         
594                                 vector<string> names;
595                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
596                                 
597                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
598                         }//end for
599                 }
600                 
601                 return 0;
602         }
603         catch(exception& e) {
604                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
605                 exit(1);
606         }
607 }
608 /**********************************************************************************************************************/
609 int SplitAbundCommand::writeNames() { //namefile
610         try {
611                 
612                 map<string, ofstream*> filehandles;
613
614                 if (Groups.size() == 0) {
615                         ofstream aout;
616                         ofstream rout;
617                         
618                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
619                         m->openOutputFile(rare, rout);
620                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
621                         
622                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
623                         m->openOutputFile(abund, aout);
624                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
625                         
626                         if (rareNames.size() != 0) {
627                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
628                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
629                                 }
630                         }
631                         rout.close();
632                         
633                         if (abundNames.size() != 0) {
634                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
635                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
636                                 }
637                         }
638                         aout.close();
639                         
640                 }else{ //parse names by abundance and group
641                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
642                         ofstream* temp;
643                         ofstream* temp2;
644                         map<string, ofstream*> filehandles;
645                         map<string, ofstream*>::iterator it3;
646
647                         for (int i=0; i<Groups.size(); i++) {
648                                 temp = new ofstream;
649                                 filehandles[Groups[i]+".rare"] = temp;
650                                 temp2 = new ofstream;
651                                 filehandles[Groups[i]+".abund"] = temp2;
652                                 
653                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
654                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
655                         }
656                         
657                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
658                                 vector<string> names;
659                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
660                                 
661                                 string rareAbund;
662                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
663                                                 rareAbund = ".rare";
664                                 }else{ //you are a abund name
665                                                 rareAbund = ".abund";
666                                 }
667                                 
668                                 map<string, string> outputStrings;
669                                 map<string, string>::iterator itout;
670                                 for (int i = 0; i < names.size(); i++) {
671                                         
672                                         string group = groupMap->getGroup(names[i]);
673                                         
674                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
675                                                 itout = outputStrings.find(group+rareAbund);
676                                                 if (itout == outputStrings.end()) {  
677                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
678                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
679                                         }else if(group == "not found") {
680                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
681                                         }
682                                 }
683                                 
684                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
685                         }
686                         
687                         
688                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
689                                 (*(filehandles[it3->first])).close();
690                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
691                                 delete it3->second;
692                         }
693                 }
694                                 
695                 return 0;
696
697         }
698         catch(exception& e) {
699                 m->errorOut(e, "SplitAbundCommand", "writeNames");
700                 exit(1);
701         }
702 }
703 /**********************************************************************************************************************/
704 //just write the unique names - if a namesfile is given
705 int SplitAbundCommand::writeAccnos(string tag) { 
706         try {
707                 
708                 map<string, ofstream*> filehandles;
709                 
710                 if (Groups.size() == 0) {
711                         ofstream aout;
712                         ofstream rout;
713                         
714                         
715                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
716                         m->openOutputFile(rare, rout);
717                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
718                         
719                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
720                                 rout << (*itRare) << endl;
721                         }
722                         rout.close();
723                 
724                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
725                         m->openOutputFile(abund, aout);
726                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
727                         
728                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
729                                 aout << (*itAbund) << endl;
730                         }
731                         aout.close();
732                         
733                 }else{ //parse names by abundance and group
734                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
735                         ofstream* temp;
736                         ofstream* temp2;
737                         map<string, ofstream*> filehandles;
738                         map<string, ofstream*>::iterator it3;
739                         
740                         for (int i=0; i<Groups.size(); i++) {
741                                 temp = new ofstream;
742                                 filehandles[Groups[i]+".rare"] = temp;
743                                 temp2 = new ofstream;
744                                 filehandles[Groups[i]+".abund"] = temp2;
745                                 
746                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
747                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
748                         }
749                         
750                         //write rare
751                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
752                                         string group = groupMap->getGroup(*itRare);
753                                         
754                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
755                                                 *(filehandles[group+".rare"]) << *itRare << endl;
756                                         }
757                         }
758                                 
759                         //write abund   
760                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
761                                         string group = groupMap->getGroup(*itAbund);
762                                         
763                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
764                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
765                                         }
766                         }
767                         
768                         //close files
769                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
770                                 (*(filehandles[it3->first])).close();
771                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
772                                 delete it3->second;
773                         }
774                 }
775                                 
776                 return 0;
777
778         }
779         catch(exception& e) {
780                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
781                 exit(1);
782         }
783 }
784 /**********************************************************************************************************************/
785 int SplitAbundCommand::parseGroup(string tag) { //namefile
786         try {
787                 
788                 map<string, ofstream*> filehandles;
789         
790                 if (Groups.size() == 0) {
791                         ofstream aout;
792                         ofstream rout;
793                         
794                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
795                         m->openOutputFile(rare, rout);
796                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
797                 
798                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
799                         m->openOutputFile(abund, aout);
800                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
801                         
802                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
803                                 vector<string> names;
804                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
805                                 
806                                 for (int i = 0; i < names.size(); i++) {
807                                 
808                                         string group = groupMap->getGroup(names[i]);
809                                 
810                                         if (group == "not found") { 
811                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
812                                         }else {
813                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
814                                                         rout << names[i] << '\t' << group << endl;
815                                                 }else{ //you are a abund name
816                                                         aout << names[i] << '\t' << group << endl;
817                                                 }
818                                         }
819                                 }
820                         }
821                         
822                         rout.close(); 
823                         aout.close(); 
824
825                 }else{ //parse names by abundance and group
826                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
827                         ofstream* temp;
828                         ofstream* temp2;
829                         map<string, ofstream*> filehandles;
830                         map<string, ofstream*>::iterator it3;
831
832                         for (int i=0; i<Groups.size(); i++) {
833                                 temp = new ofstream;
834                                 filehandles[Groups[i]+".rare"] = temp;
835                                 temp2 = new ofstream;
836                                 filehandles[Groups[i]+".abund"] = temp2;
837                                 
838                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
839                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
840                         }
841                         
842                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
843                                 vector<string> names;
844                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
845                                 
846                                 string rareAbund;
847                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
848                                         rareAbund = ".rare";
849                                 }else{ //you are a abund name
850                                         rareAbund = ".abund";
851                                 }
852                                 
853                                 for (int i = 0; i < names.size(); i++) {
854                                 
855                                         string group = groupMap->getGroup(names[i]);
856                                                                         
857                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
858                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
859                                         }
860                                 }
861                         }
862                         
863                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
864                                 (*(filehandles[it3->first])).close();
865                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
866                                 delete it3->second;
867                         }
868                 }
869                                 
870                 return 0;
871
872         }
873         catch(exception& e) {
874                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
875                 exit(1);
876         }
877 }
878 /**********************************************************************************************************************/
879 int SplitAbundCommand::parseFasta(string tag) { //namefile
880         try {
881                 
882                 map<string, ofstream*> filehandles;
883                 
884                 if (Groups.size() == 0) {
885                         ofstream aout;
886                         ofstream rout;
887                         
888                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
889                         m->openOutputFile(rare, rout);
890                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
891                 
892                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
893                         m->openOutputFile(abund, aout);
894                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
895                 
896                         //open input file
897                         ifstream in;
898                         m->openInputFile(fastafile, in);
899         
900                         while (!in.eof()) {
901                                 if (m->control_pressed) { break; }
902                 
903                                 Sequence seq(in); m->gobble(in);
904                                 
905                                 if (seq.getName() != "") { 
906                                         
907                                         map<string, string>::iterator itNames;
908                                         
909                                         itNames = nameMap.find(seq.getName());
910                                         
911                                         if (itNames == nameMap.end()) {
912                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
913                                         }else{
914                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
915                                                         seq.printSequence(rout);
916                                                 }else{ //you are a abund name
917                                                         seq.printSequence(aout);
918                                                 }
919                                         }
920                                 }
921                         }
922                         in.close();
923                         rout.close(); 
924                         aout.close(); 
925
926                 }else{ //parse names by abundance and group
927                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
928                         ofstream* temp;
929                         ofstream* temp2;
930                         map<string, ofstream*> filehandles;
931                         map<string, ofstream*>::iterator it3;
932
933                         for (int i=0; i<Groups.size(); i++) {
934                                 temp = new ofstream;
935                                 filehandles[Groups[i]+".rare"] = temp;
936                                 temp2 = new ofstream;
937                                 filehandles[Groups[i]+".abund"] = temp2;
938                                 
939                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
940                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
941                         }
942                         
943                         //open input file
944                         ifstream in;
945                         m->openInputFile(fastafile, in);
946         
947                         while (!in.eof()) {
948                                 if (m->control_pressed) { break; }
949                 
950                                 Sequence seq(in); m->gobble(in);
951                                 
952                                 if (seq.getName() != "") { 
953                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
954                                         
955                                         if (itNames == nameMap.end()) {
956                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
957                                         }else{
958                                                 vector<string> names;
959                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
960                                 
961                                                 string rareAbund;
962                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
963                                                         rareAbund = ".rare";
964                                                 }else{ //you are a abund name
965                                                         rareAbund = ".abund";
966                                                 }
967                                 
968                                                 for (int i = 0; i < names.size(); i++) {
969                                 
970                                                         string group = groupMap->getGroup(seq.getName());
971                                         
972                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
973                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
974                                                         }else if(group == "not found") {
975                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
976                                                         }
977                                                 }
978                                         }
979                                 }
980                         }
981                         in.close();
982                         
983                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
984                                 (*(filehandles[it3->first])).close();
985                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
986                                 delete it3->second;
987                         }
988                 }
989                                 
990                 return 0;
991
992         }
993         catch(exception& e) {
994                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
995                 exit(1);
996         }
997 }
998 /**********************************************************************************************************************/
999